#!/usr/bin/perl -w
use LWP::Simple;
use HTML::TokeParser;
use strict;
my $url="http://www.perlmonks.org";
my $content = get("$url");
my $parse = HTML::TokeParser->new(\$content);
my $testlink;
while (my $token = $parse->get_tag("a")) {
# put the link and the text into variables
my $link = $token->[1]{href} || "-";
my $text=$parse->get_trimmed_text("/a");
# if $link is fully qualified url
if ($link=~ m/http\:\/\//i){
# use LWP to get the link
$testlink=get("$link");
if ($testlink){
# parse the title returned from the testlink for 404 or
# not found errs
my $testparse=HTML::TokeParser->new(\$testlink);
if ($testparse->get_tag("title")) {
my $title = $testparse->get_trimmed_text;
if (($title=~ m/not found/i) || ($title=~ m/404/)) {
print "* $link ($text) is a bad link\n";
} else {
print "$link ($text) seems to be a good link\n";
}
}
}
} else {
# guess at qualifiny url by adding $url to the front..
$testlink=get("$url/$link");
if ($testlink) {
my $testparse=HTML::TokeParser->new(\$testlink);
if ($testparse->get_tag("title")) {
my $title = $testparse->get_trimmed_text;
if (($title=~ m/not found/i) || ($title=~ m/404/)) {
print "* $url/$link ($text) is a bad link\n";
} else {
print "$url/$link ($text) seems to be a good link\n";
}
}
}
}
}
exit;