#!/usr/bin/perl
use strict;
use warnings;
use HTML::TableContentParser;
use HTML::TokeParser::Simple;
use WWW::Mechanize;
use constant SOPW => '&ct=12';
my $mech = WWW::Mechanize->new( autocheck => 1 );
$mech->get( 'http://www.tinymicros.com/ptav/index.pl' );
open (OUTPUT, '>', $ARGV[0] || 'noreplies.txt');
select OUTPUT;
$| = 1;
print OUTPUT "\n
\n";
for my $year ( $mech->find_all_links( url_regex => qr/year/ ) ) {
$mech->get( $year->url() );
for my $month ( $mech->find_all_links( url_regex => qr/month/ ) ) {
$mech->get( $month->url() );
for my $day ( $mech->find_all_links( url_regex => qr/day/ ) ) {
$mech->get( $day->url() . SOPW );
my $table = HTML::TableContentParser->new()->parse( $mech->content() );
for my $row ( @{ $table->[-2]{rows} } ) {
for my $cell ( @{ $row->{cells} } ) {
if ( $cell->{data} =~ /\(0\)/ ) {
print OUTPUT "- ", clean_link( $cell ), "
\n";
next;
}
}
}
sleep 3;
$mech->back();
}
$mech->back();
}
$mech->back();
}
print OUTPUT "
\n\n";
sub clean_link {
my $link = shift;
my $p = HTML::TokeParser::Simple->new( \$link->{data} );
my $node;
while ( my $token = $p->get_token ) {
last if $token->is_end_tag;
if ( $token->is_start_tag( 'a' ) ) {
($node) = $token->return_attr( 'href' ) =~ /(\d+)$/;
next;
}
if ( $token->is_text ) {
return ""
. $token->as_is
. "";
}
}
}