Re: How do I parse links out of a web page

⭐ in reply to How do I parse links out of a web page

You could try this as well

#!/usr/bin/perl -w
use LWP::UserAgent;
  use HTML::LinkExtor;
  use URI::URL;

  $url = "http://www.google.ca/";  # for instance
  $ua = LWP::UserAgent->new;

  # Set up a callback that collect image links
  my @imgs = ();
  sub callback {
     my($tag, %attr) = @_;
     return if $tag ne 'a';  # we only look closer at <img ...>
     push(@imgs, values %attr);
  }

  # Make the parser.  Unfortunately, we don't know the base yet
  # (it might be diffent from $url)
  $p = HTML::LinkExtor->new(\&callback);

  # Request document and parse it as it arrives
  $res = $ua->request(HTTP::Request->new(GET => $url),
                      sub {$p->parse($_[0])});

  # Expand all image URLs to absolute ones
  my $base = $res->base;
  @imgs = map { $_ = url($_, $base)->abs; } @imgs;

  # Print them out
  print join("\n", @imgs), "\n";
[download]

Comment on Re: How do I parse links out of a web page Download Code

In Section Seekers of Perl Wisdom