#!/usr/bin/perl -w
use strict;
use HTML::TokeParser;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new;
my $proxy = 'http://proxy.hbt.tassie.net.au:8080';
my $netloc = '';
my $realm = '';
my $uname = '';
my $pass = '';
$ua->credentials($netloc, $realm, $uname, $pass);
$ua->proxy( 'http', $proxy );
my $url = 'http://www.perlmonks.com/';
$url .= '/' if $url =~ m|http://[^/]+$|i; # add trailing / if forgotten
my ($root) = $url =~ m|(http://.*/)|i;
my $content = get( $url );
print "Checking links....\n";
my $parse = HTML::TokeParser->new( \$content );
while (my $token = $parse->get_tag('a')) {
my $link = $token->[1]{href} || next;
my $text=$parse->get_trimmed_text('/a');
if ($link =~ m|^\s*mailto|i) {
next;
}
elsif ($link=~ m|^\s*http://|i){
print &test( $link, $text );
}
else {
$link =~ s|^\s*/||;
print &test( $root.$link, $text );
}
}
sub get {
my $url = shift;
print "Getting $url....";
my $request = HTTP::Request->new( 'GET', $url );
my $content = $ua->request( $request );
print "$$content{_msg} $$content{_rc}\n";
return $$content{_content};
}
sub test {
my ( $url, $text ) = @_;
my $request = HTTP::Request->new( 'HEAD', $url );
my $content = $ua->request( $request );
return "$$content{_msg} $$content{_rc} ($text) $url\n";
}
__END__
Getting http://www.perlmonks.com/....OK 200
Checking links....
OK 200 (Frank) http://www.perlmonks.com/index.pl?node_id=966&lastnode_id=131
OK 200 (perlmonks) http://www.perlmonks.com/index.pl
OK 200 (login) http://www.perlmonks.com/index.pl?node=login&lastnode_id=131
[snip]