use strict; use LWP::Simple; use HTML::SimpleLinkExtor; #usage googlestrip file:///C:/googlesearchresult.htm > urllist.txt my $url = shift; my $filetype = "pdf"; my $filetypelen = length($filetype); my $offset = -$filetypelen; my $fileget = getstore($url,"tempfile.html"); my $extor = HTML::SimpleLinkExtor->new(); $extor->parse_file("tempfile.html"); my @a_hrefs = $extor->a; my @pdflist; for my $element (@a_hrefs) { my $suffix = substr($element,$offset,$filetypelen); if ($suffix =~ m/$filetype/) { push @pdflist, $element; } } for my $url (@pdflist) { next if ($url =~ m/\/s.*pdf/); print $url; print "\n"; } unlink "tempfile.html" or die "can't unlink tempfile.html: $!";