#!/usr/bin/perl # AutoPMSuperSearch.pl # Based on: Keeping Tutorials Current, by Limbic~Region [id://381389] and [pad://bobf] # Usage: perl AutoPMSuperSearch.pl username password searchterm > filename.ext use strict; use warnings; use constant USER => 15; use Data::Dumper; use Getopt::Std; use HTML::TableContentParser; use HTML::TokeParser::Simple; use PerlMonks::Mechanized; use URI; use WWW::Mechanize; use XML::Simple; my ($monk, $opt, $tut) = ( {}, {}, [] ); my %seen = (); Get_Args( $opt ); my $user = shift(@ARGV); my $passwd = shift(@ARGV); my $searchterm = shift (@ARGV); my $mech = WWW::Mechanize->new( autocheck => 1 ); Get_SearchResults(); my $pm_obj = PerlMonks::Mechanized->new($user,$passwd); print "Nodes to fetch:\n"; Print_Nodes(); print "\n"; Get_Nodes(); sub Get_Args { my $opt = shift; my $Usage = qq{Usage: $0 [options] -h : This help message. -b : Base URL - default: http://www.perlmonks.org/ } . "\n"; getopts( 'hb:' , $opt ) or die $Usage; die $Usage if $opt->{h}; $opt->{b} ||= 'http://www.perlmonks.org/'; } sub Monk { $mech->get( $opt->{b} . $_[0] . '&displaytype=xml' ); my $node = XML::Simple->new()->XMLin( $mech->content() ); return 1 if exists $node->{type}{id} && $node->{type}{id} == USER; return 0; } sub Process_Link { my ($link, $type) = @_; my $p = HTML::TokeParser::Simple->new( \$link->{data} ); my ($node, $label); while ( my $token = $p->get_token ) { last if $token->is_end_tag; if ( $token->is_start_tag( 'a' ) ) { $node = lc $token->return_attr( 'href' ); next; } $label = lc URI->new( '/index.pl?node=' . $token->as_is )->as_string if $token->is_text; } die "Something went terribly wrong" if ! $node || ! $label; if ( $type eq 'author' ) { $monk->{ $label } = undef; $monk->{ $node } = undef; } else { $node =~ s/\?node_id=//; push @{ $tut } , { id => $node, name => $label }; } } sub Process_Table { my $table = HTML::TableContentParser->new()->parse( $mech->content() ); for my $row ( @{ $table->[0]{rows} } ) { Process_Link( $row->{cells}[1], 'author' ); Process_Link( $row->{cells}[2], 'tutorial' ); } } sub URL { return URI->new( $opt->{b} . 'index.pl?node=' . $_[0] . '&displaytype=print' )->as_string } # sub Get_SearchResults { $mech->get( URL( 'Super Search' ) ); $mech->field( 'BIT', $searchterm ); # Match text containing... $mech->field( 'xa', '1' ); # Exclude Authors $mech->field( 'a', 'NodeReaper' ); # Author NodeReaper $mech->click_button( name => 'go' ); # Submit Form while ( $mech->content !~ /Finished searching database/ ) { Process_Table(); $mech->click_button( name => 'nx' ); } } sub GetThreadListAndAllNodeContents { my $nodeid = shift; my @node_ids = (); my $info = $pm_obj->node_info( $nodeid ); # $info is a reference foreach my $item ( @$info ) { # If this is not a root node, get the root and build the thread_list from there if (exists( $item->{root_node} )) { @node_ids = @{$pm_obj->thread_list( $item->{root_node} )}; # de-reference - see [id://69927] } else { @node_ids = @{$pm_obj->thread_list($nodeid)}; } } foreach $nodeid (@node_ids) { unless ($seen{$nodeid}) { print ">>>>>\t" . $nodeid . ":\n"; my $data = $pm_obj->node_content( $nodeid ); # make a note we got this one so we don't get it again print Dumper( $data ); $seen{$nodeid} = 1; } } } sub Get_Nodes { GetThreadListAndAllNodeContents( $_->{id} ) for grep defined , @$tut; } sub Print_Nodes { print "\t$_->{id}\n" for grep defined , @$tut; }