It appears from a quick read of
HTML::TokeParser it expects actual HTML in the form of a file handle or string, not a
LWP::UserAgent object.
use strict;
use warnings;
use LWP::UserAgent;
use CGI qw(:standard :cgi-lib -debug fatalsToBrowser warningsToBrowser
+);
use HTML::TokeParser;
my $cgi = new CGI;
print $cgi->header();
print start_html(-title =>'Save QC');
print "Starting...",br;
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent('Mozilla/5.0 Windows Gecko/30.0 Firefox/30.0');
my $response = $ua->get('http://questionablecontent.net/');
if ($response->is_success) {
# print $response->decoded_content; # or whatever
print br,br,"Success!";
my $tp = HTML::TokeParser->new(\$response->content) || die "Can't
+open: $!";
while (my $token = $tp->get_tag('img')) {
my $src = $token->[1]{'src'};
print "src =",$src,br
}
} else {
print $response->decoded_content; # or whatever
print br,br,"Failure!";
die $response->status_line;
}
print end_html;
Nets the following output:
(offline mode: enter name=value pairs on standard input; press ^D or ^
+Z when done)
Content-Type: text/html; charset=ISO-8859-1
(offline mode: enter name=value pairs on standard input; press ^D or ^
+Z when done)
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-U
+S">
<head>
<title>Save QC</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1
+" />
</head>
<body>
Starting...<br /><br /><br />Success!src =http://www.projectwonderful.
+com/nojs.php?id=39770&type=5<br />src =http://www.questionablecontent
+.net/images/patreonad.png<br />src =../images/logo14.png<br />src =ht
+tp://www.questionablecontent.net/comics/2845.png<br />src =http://www
+.questionablecontent.net/images/1s1k.gif<br />src =http://www.questio
+nablecontent.net/images/station.gif<br />src =http://www.questionable
+content.net/images/qc-vol04.jpg<br />src =http://www.questionablecont
+ent.net/images/yelingboard.jpg<br />src =http://www.questionableconte
+nt.net/images/18x24.jpg<br />src =http://www.questionablecontent.net/
+images/mlgshirt.jpg<br />src =../images/ag.gif<br />src =http://api.f
+lattr.com/button/flattr-badge-large.png<br />src =http://www.question
+ablecontent.net/images/patreonad2.png<br />src =http://www.projectwon
+derful.com/nojs.php?id=138&type=2<br />
</body>
</html>
-- edit: Fixed
HTML::TokeParser's link.