#!/usr/bin/perl # AutoPMSuperSearch.pl # Based on: Keeping Tutorials Current, by Limbic~Region [id://381389] and [pad://bobf] # Usage: perl AutoPMSuperSearch.pl username password searchterm > filename.ext use strict; use warnings; use constant USER => 15; use Data::Dumper; use Getopt::Std; use HTML::TableContentParser; use HTML::TokeParser::Simple; use PerlMonks::Mechanized; use URI; use WWW::Mechanize; use XML::Simple; my ($monk, $opt, $tut) = ( {}, {}, [] ); my %seen = (); Get_Args( $opt ); my $user = shift(@ARGV); my $passwd = shift(@ARGV); my $searchterm = shift (@ARGV); my $mech = WWW::Mechanize->new( autocheck => 1 ); Get_SearchResults(); my $pm_obj = PerlMonks::Mechanized->new($user,$passwd); print "Nodes to fetch:\n"; Print_Nodes(); print "\n"; Get_Nodes(); sub Get_Args { my $opt = shift; my $Usage = qq{Usage: $0 [options] -h : This help message. -b : Base URL - default: http://www.perlmonks.org/ } . "\n"; getopts( 'hb:' , $opt ) or die $Usage; die $Usage if $opt->{h}; $opt->{b} ||= 'http://www.perlmonks.org/'; } sub Monk { $mech->get( $opt->{b} . $_[0] . '&displaytype=xml' ); my $node = XML::Simple->new()->XMLin( $mech->content() ); return 1 if exists $node->{type}{id} && $node->{type}{id} == USER; return 0; } sub Process_Link { my ($link, $type) = @_; my $p = HTML::TokeParser::Simple->new( \$link->{data} ); my ($node, $label); while ( my $token = $p->get_token ) { last if $token->is_end_tag; if ( $token->is_start_tag( 'a' ) ) { $node = lc $token->return_attr( 'href' ); next; } $label = lc URI->new( '/index.pl?node=' . $token->as_is )->as_string if $token->is_text; } die "Something went terribly wrong" if ! $node || ! $label; if ( $type eq 'author' ) { $monk->{ $label } = undef; $monk->{ $node } = undef; } else { $node =~ s/\?node_id=//; push @{ $tut } , { id => $node, name => $label }; } } sub Process_Table { my $table = HTML::TableContentParser->new()->parse( $mech->content() ); for my $row ( @{ $table->[0]{rows} } ) { Process_Link( $row->{cells}[1], 'author' ); Process_Link( $row->{cells}[2], 'tutorial' ); } } sub URL { return URI->new( $opt->{b} . 'index.pl?node=' . $_[0] . '&displaytype=print' )->as_string } # sub Get_SearchResults { $mech->get( URL( 'Super Search' ) ); $mech->field( 'BIT', $searchterm ); # Match text containing... $mech->field( 'xa', '1' ); # Exclude Authors $mech->field( 'a', 'NodeReaper' ); # Author NodeReaper $mech->click_button( name => 'go' ); # Submit Form while ( $mech->content !~ /Finished searching database/ ) { Process_Table(); $mech->click_button( name => 'nx' ); } } sub GetThreadListAndAllNodeContents { my $nodeid = shift; my @node_ids = (); my $info = $pm_obj->node_info( $nodeid ); # $info is a reference foreach my $item ( @$info ) { # If this is not a root node, get the root and build the thread_list from there if (exists( $item->{root_node} )) { @node_ids = @{$pm_obj->thread_list( $item->{root_node} )}; # de-reference - see [id://69927] } else { @node_ids = @{$pm_obj->thread_list($nodeid)}; } } foreach $nodeid (@node_ids) { unless ($seen{$nodeid}) { print ">>>>>\t" . $nodeid . ":\n"; my $data = $pm_obj->node_content( $nodeid ); # make a note we got this one so we don't get it again print Dumper( $data ); $seen{$nodeid} = 1; } } } sub Get_Nodes { GetThreadListAndAllNodeContents( $_->{id} ) for grep defined , @$tut; } sub Print_Nodes { print "\t$_->{id}\n" for grep defined , @$tut; } ####
Editors' Tools

[id://28877|NTC] | [id://59481|NRE]
[link://?displaytype=display;node_id=`id`|Display] [link://?displaytype=editors;node_id=`id`|Edit] | [link://?node_tr=`id`;node_id=483915|Retitle] [link://?mnode=`id`;node_id=60309|Reparent]
[link://?snippet_id=`id`;node_id=481230|Unsnippet]
[id://499680] | [id://499693]

[id://43037|Shortcuts] | [id://17558|Writeup Formatting]
[id://29281|PM HTML] | [id://477713]
[id://490460] | [id://237035]


Free Nodelet Tools
[href://?node_id=`id`;op=_freer;at=todo|Add] above, [href://?node_id=`id`;op=_freer;at=ad;in=pub|Public], [href://?node_id=`id`;op=_freer;at=ad;in=priv|Private], [href://?node_id=`id`;op=_freer;at=ad;in=priv;in=pub|Both].
[link://?append_to_personal_nodelet=%5Bid%3A%2F%2F`id`%5D;node_id=`id`|Add to Personal Nodelet]
Cabal Inboxes

[id://475485] | [link://?type=strangedoc;recipient=475485;node=message%20inbox|BoF Inbox]
[id://499790] | [link://?type=strangedoc;recipient=499790;node=message%20inbox|pedagogues inbox] [link://?sendto=pedagogues;node_id=48824|/msg pedagogues]
[id://59438] | [link://?type=strangedoc;recipient=59438;node=message%20inbox|janitors inbox]
[id://106850] | [link://?type=strangedoc;recipient=106850;node=message%20inbox|pmdev inbox]
[id://56883] | [link://?type=strangedoc;recipient=56883;node=message%20inbox|PU inbox] | [id://56087]
##
##


[id://48824|Message Inbox]
[FullPage Chat] | [http://nbpfaus.net/~pfau/cbhistory.cgi?site=PM|PM Recent CB Msgs]
[http://crazyinsomniac.perlmonk.org/perl/cblast35/cblast35.cgi|cb Last 35] | [http://pthbb.org/cb/last.cgi|another CB last]

[id://`id`|THIS NODE:] [id://`id`]
[link://?node_id=3333;parent=`id`|Comment on `id`]

[id://454610] | [id://511504]
[link://?node_id=6364&user=planetscape|planetscape's writeups] | [link://?node_id=6364&user=planetscape&showqa=on|planetscape's Q&A]

[http://mojotoad.perlmonk.org/cbs/|cbstats] | [http://desert-island.dynodns.net/perl/pmplanet_na.html|PMPlanet]
[http://desert-island.dynodns.net/perl/pmplanet_google.html|PMPlanet: Google Maps]
[http://tinymicros.com/pm/index.php?goto=MainPage|PMStats] | [http://desert-island.dynodns.net/perl/im2/karma.html|Karma]

[http://perlmonk.org/|perlmonk.org] | [http://perlmonk.org/disk.html|total disk usage]
[id://117450|PM Snippets Index] | [http://hop.perl.plover.com/|HOP]

[id://284175|Start Learning] | [id://352893|Perldoc POD QuickRef] | [id://408254|Perl documentation documentation] | [http://perldoc.perl.org/|perldoc.perl.org]
[id://401006] | [id://8070]


Check All Upvotes | Clear All Radios


[link://?node_id=145597;displaytype=viewcode|fullpage testing node]