use strict;
use LWP::Simple;
use HTML::SimpleLinkExtor;
#usage googlestrip file:///C:/googlesearchresult.htm > urllist.txt
my $url = shift;
my $filetype = "pdf";
my $filetypelen = length($filetype);
my $offset = -$filetypelen;
my $fileget = getstore($url,"tempfile.html");
my $extor = HTML::SimpleLinkExtor->new();
$extor->parse_file("tempfile.html");
my @a_hrefs = $extor->a;
my @pdflist;
for my $element (@a_hrefs) {
my $suffix = substr($element,$offset,$filetypelen);
if ($suffix =~ m/$filetype/) {
push @pdflist, $element;
}
}
for my $url (@pdflist) {
next if ($url =~ m/\/s.*pdf/);
print $url;
print "\n";
}
unlink "tempfile.html" or die "can't unlink tempfile.html: $!";