#!/usr/bin/perl -w
use LWP::Simple;
use HTML::TokeParser;
use strict;

my $url="http://www.perlmonks.org";
my $content = get("$url");

my $parse = HTML::TokeParser->new(\$content);
my $testlink;
while (my $token = $parse->get_tag("a")) {
      # put the link and the text into variables
   my $link = $token->[1]{href} || "-";
   my $text=$parse->get_trimmed_text("/a");

      # if $link is fully qualified url
   if ($link=~ m/http\:\/\//i){
         # use LWP to get the link
      $testlink=get("$link");
      if ($testlink){
            # parse the title returned from the testlink for 404 or 
            # not found errs
         my $testparse=HTML::TokeParser->new(\$testlink);
         if ($testparse->get_tag("title")) {
            my $title = $testparse->get_trimmed_text;
            if (($title=~ m/not found/i) || ($title=~ m/404/)) {
               print "* $link ($text) is a bad link\n";
            } else {
               print "$link ($text) seems to be a good link\n";
            }
         }
      }
   } else {
         # guess at qualifiny url by adding $url to the front..
      $testlink=get("$url/$link");
      if ($testlink) {
         my $testparse=HTML::TokeParser->new(\$testlink);
         if ($testparse->get_tag("title")) {
            my $title = $testparse->get_trimmed_text;
            if (($title=~ m/not found/i) || ($title=~ m/404/)) {
               print "* $url/$link ($text) is a bad link\n";
            } else {
               print "$url/$link ($text) seems to be a good link\n";
            }
         }
      }
   }
}
exit;