#!/usr/bin/perl -w
use strict;
use LWP::Simple;
use HTML::TokeParser;
my $url ="http://perlmonks.org/index.pl?node_id=110166";
my $rawHTML = get($url); # attempt to d/l the page to mem
die "LWP::Simple messed up $!" unless $rawHTML;
my ($tp , %monks );
$tp = HTML::TokeParser->new(\$rawHTML) or die "WTF $tp gone bad: $!";
# And now -- a generic HTML::TokeParser loop
while (my $t = $tp->get_token)
{
if(
($$t[0] eq "S") and
($$t[1] eq "tr") and
(exists $$t[2]->{bgcolor} and $$t[2]->{bgcolor} eq "eeeeee")
)
{
my @t = (
$t,# 0
$tp->get_token,# 1
$tp->get_token,# 2
$tp->get_token,# 3
$tp->get_token,# 4 Re: Name Space
$tp->get_token,# 5
$tp->get_token,# 6
$tp->get_token,# 7 by
$tp->get_token,# 8
$tp->get_token,# 9 japhy
$tp->get_token,#10
$tp->get_token,#11 on Sep 04, 2001 at 13:42
$tp->get_token,#12
$tp->get_token,#13 |
$tp->get_token,#14
);
if(
($t[0][0] eq "S" and $t[0][1] eq "tr"
and $t[0][2]->{'bgcolor'} eq "eeeeee") and
($t[1][0] eq "S" and $t[1][1] eq "td") and
($t[2][0] eq "S" and $t[2][1] eq "font") and
($t[3][0] eq "S" and $t[3][1] eq "a") and # reply link
($t[4][0] eq "T") and # reply to original node
($t[5][0] eq "E" and $t[5][1] eq "a") and
($t[6][0] eq "S" and $t[6][1] eq "br") and
($t[7][0] eq "T" and $t[7][1] =~ /by/ ) and
($t[8][0] eq "S" and $t[8][1] eq "a") and # userlink
($t[9][0] eq "T" ) and # username
($t[10][0] eq "E" and $t[10][1] eq "a") and
($t[11][0] eq "T" and $t[11][1] =~ /on \w{3} \d{2}, \d{4} at/) and
($t[12][0] eq "E" and $t[12][1] eq "font") and
($t[13][0] eq "E" and $t[13][1] eq "td") and
($t[14][0] eq "E" and $t[14][1] eq "tr")
)
{
print $t[3][4], # a href
$t[9][1], # monk name
"|\n";
$monks{$t[9][1]}= "$t[3][4]" . "$t[9][1]";
}
}
} # endof while (my $token = $p->get_token)
undef $rawHTML; # no more raw html
undef $tp; # destroy the HTML::TokeParser object (don't need it no more)
print " or sorted
\n";
for my $key (sort keys %monks)
{
print $monks{$key},"|\n";
}
__END__
## one token per line
Re: Name Space
by
japhy
on Sep 04, 2001 at 13:42
|