#!/usr/bin/perl use strict; use LWP::simple; my $doc_url = "www.perlmonks.org"; my $document; my $browser; init_browser( ); # Get the page whose links we want to check: my $response = $browser->get($doc_url); #die "Couldn't get $doc_url: ", $response->status_line #unless $response->is_success; $document = $response->content; # $doc_url = $response->base; # In case we need to resolve relative URLs later while ($document =~ m/href\s*=\s*"([^"\s]+)"/gi) { my $absolute_url = absolutize($1, $doc_url); check_url($absolute_url); } sub absolutize { my($url, $base) = @_; use URI; return URI->new_abs($url, $base)->canonical; } sub init_browser { $browser = LWP::UserAgent->new; # ...And any other initialization we might need to do... return $browser; } sub check_url { # A temporary placeholder... print "url's list $_[0]\n"; }