If you don't want to use the Google API, but parse the web page results, here's my code:
use strict;
use IO::Socket::INET;
my $limit = 5; #max number of output
&google_search(@ARGV);
sub google_search {
my $keyword = shift;
if (!$keyword) { die("no keywords\n"); }
my $socket = IO::Socket::INET->new(
Proto => "tcp",
PeerAddr => "www.google.com",
PeerPort => 80,
Timeout => 3
);
if (!$socket) { die("error connecting to the server\n"); }
$socket->autoflush(1);
my $query = $keyword; $query =~ tr/ /+/;
my $desc = "";
my $link = "";
my $junk = "";
my $idx = 0;
my $nodoc = 0;
print $socket "GET /search?hl=en&ie=ISO-8859-1&q=$query HTTP/1.1\r\n
+";
print $socket "Host: www.google.com\r\n";
print $socket "User-Agent: Mozilla/5.0\r\n";
print $socket "Accept: image/gif, image/x-xbitmap, image/jpeg, image
+/pjpeg, */*\r\n";
print $socket "Accept-Language: en-us,en;q=0.5\r\n";
print $socket "Connection: Keep-Alive\r\n";
print $socket "\r\n";
while (my $buffer = <$socket>) {
$buffer =~ s/\s+$//; $buffer =~ s/^\s+//; $buffer =~ tr/ //s;
$buffer =~ s/<b>//g; $buffer =~ s/<\/b>//g;
if (!$idx && ($buffer =~ /^<br><br>Your search - $keyword - did no
+t match any documents./)) {
print STDOUT "no doc found, sorry\n";
$nodoc = 1;
last;
} else {
if (!$desc) { ($junk, $desc) = $buffer =~ /(<\/blockquote>|<div>
+|<\/a><\/font> )<p class=g><a href=\S+>(.*?)<\/a>(<br>)?<font size=-1
+>([^<]| \- \[ | \.\.\.|<i>|<span class=f>)/; $desc =~ s/&/&/g; $d
+esc =~ s/"/"/g; }
if (!$link) { ($junk, $link) = $buffer =~ /(<\/blockquote>|<div>
+|<\/a><\/font> )<p class=g><a href=(\S+)>(.*?)<\/a>(<br>)?<font size=
+-1>([^<]| \- \[ | \.\.\.|<i>|<span class=f>)/; }
if ($desc && $link) {
if (++$idx > $limit) { last; }
print STDOUT "$idx) $desc\n";
print STDOUT " $link\n";
$desc = "";
$link = "";
}
}
}
close($socket);
if (!$idx && !$nodoc) { print STDOUT "no doc found, sorry\n"; }
}
1;
Depending on the results, Google's output page may change, so you have to mix some regex.
Run it using: perl file.pl you keywords here
Hope this helps.
P.S.: Google will change it's web design soon so this code may not work with the new one, I've to try.