Beefy Boxes and Bandwidth Generously Provided by pair Networks
Don't ask to ask, just ask
 
PerlMonks  

mirod's scratchpad

by mirod (Canon)
on Jun 02, 2004 at 01:56 UTC ( [id://358990]=scratchpad: print w/replies, xml ) Need Help??

#!/usr/bin/perl -w use strict; use LWP; use Getopt::Std; use XML::Twig; use Getopt::Std; my %opt; getopts( 'kf:' => \%opt); my $params= { node_id => 3989, n0 => 220215, BIT => 'use XML::Twig;', # text to search for BIS => '-', # string separator BH => 1, # match in title too HIT => "", HIR => 0, # ?? a => 'mirod', # just one author xa => 0, # exclude author xs => 0, # exclude section BES => '-', HER => 0, # ?? xr => 0, # exclude root nodes re => 'S', # ?? go => 'Search', #displaytype => 'xml', }; my $URL= "http://perlmonks.org/index.pl"; my $doc; if( my $file= $opt{f}) { open( IN, "<$file") or die $!; local $/= undef; $doc=<IN>; close IN; } else { my $browser= LWP::UserAgent->new(); my $resp= $browser->post( $URL, $params); $doc= $resp->content; } my $TMP= "$0.tmp"; open( TMP, ">$TMP") or die $!; print TMP $doc; close TMP; system "tidy -asxhtml -numeric -modify $TMP"; my $t= XML::Twig->new( keep_encoding => 1); $t->parsefile( $TMP); my $content= $t->first_elt( '[@class="main_content"]'); my $table= $content->first_child( 'table'); my $data; foreach my $tr ($table->children( 'tr')) { my @td= $tr->children( 'td'); $td[1]->cut; $td[3]->cut; my $a= $td[2]->first_child( 'a'); my $href= $a->att( 'href'); $href="http://perlmonks.org/$href"; $a->set_att( href => $href); } $table->sort_children_on_field( 'td', order => 'reverse'); $table->print; warn "done\n"; if( $opt{k}) { warn "raw data is in $TMP\n"; } else { unlink $TMP; }
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others surveying the Monastery: (6)
As of 2024-03-28 10:27 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found