#!/usr/bin/perl
# AutoPMSuperSearch.pl
# Based on: Keeping Tutorials Current, by Limbic~Region [id://381389] and [pad://bobf]
# Usage:    perl AutoPMSuperSearch.pl username password searchterm > filename.ext

use strict;
use warnings;
use constant USER => 15;
use Data::Dumper;
use Getopt::Std;
use HTML::TableContentParser;
use HTML::TokeParser::Simple;
use PerlMonks::Mechanized;
use URI;
use WWW::Mechanize;
use XML::Simple;

my ($monk, $opt, $tut) = ( {}, {}, [] );
my %seen = ();

Get_Args( $opt );

my $user = shift(@ARGV);
my $passwd = shift(@ARGV);
my $searchterm = shift (@ARGV);

my $mech = WWW::Mechanize->new( autocheck => 1 );

Get_SearchResults();

my $pm_obj = PerlMonks::Mechanized->new($user,$passwd);

print "Nodes to fetch:\n";
Print_Nodes();
print "\n";
Get_Nodes();

sub Get_Args {
    my $opt = shift;
    my $Usage = qq{Usage: $0 [options]
        -h : This help message.
        -b : Base URL - default:  http://www.perlmonks.org/
    } . "\n";
    getopts( 'hb:' , $opt ) or die $Usage;
    die $Usage if $opt->{h};
    $opt->{b} ||= 'http://www.perlmonks.org/';
}

sub Monk {
    $mech->get( $opt->{b} . $_[0] . '&displaytype=xml' ); 
    my $node = XML::Simple->new()->XMLin( $mech->content() );
    return 1 if exists $node->{type}{id} && $node->{type}{id} == USER;
    return 0;
}

sub Process_Link {
    my ($link, $type) = @_;
    my $p = HTML::TokeParser::Simple->new( \$link->{data} );
    my ($node, $label);
    while ( my $token = $p->get_token ) {
        last if $token->is_end_tag;
        if ( $token->is_start_tag( 'a' ) ) {
            $node = lc $token->return_attr( 'href' );
            next;
        }
        $label = lc URI->new( '/index.pl?node=' . $token->as_is )->as_string if $token->is_text;
    }
    die "Something went terribly wrong" if ! $node || ! $label;
    if ( $type eq 'author' ) {
        $monk->{ $label } = undef;
        $monk->{ $node  } = undef;
    }
    else {
	$node =~ s/\?node_id=//;    
        push @{ $tut } , { id => $node, name => $label };
    }
}

sub Process_Table {
    my $table = HTML::TableContentParser->new()->parse( $mech->content() );
    for my $row ( @{ $table->[0]{rows} } ) {
        Process_Link( $row->{cells}[1], 'author'   );
        Process_Link( $row->{cells}[2], 'tutorial' );
    }
}

sub URL { return URI->new( $opt->{b} . 'index.pl?node=' . $_[0] . '&displaytype=print' )->as_string }

#
sub Get_SearchResults {
    $mech->get( URL( 'Super Search' ) );
    $mech->field( 'BIT', $searchterm );        # Match text containing... 
    
    $mech->field( 'xa', '1' );                # Exclude Authors
    $mech->field( 'a', 'NodeReaper' );        # Author  NodeReaper 
    $mech->click_button( name => 'go' );      # Submit  Form
    while ( $mech->content !~ /Finished searching database/ ) {
        Process_Table();
        $mech->click_button( name => 'nx' );
    }
}

sub GetThreadListAndAllNodeContents {
    my $nodeid = shift;
    my @node_ids = ();
    my $info =  $pm_obj->node_info( $nodeid ); # $info is a reference
    
    foreach my $item ( @$info ) {
    	# If this is not a root node, get the root and build the thread_list from there
        if (exists( $item->{root_node} )) {
	    	@node_ids = @{$pm_obj->thread_list( $item->{root_node} )}; # de-reference - see [id://69927]
	} else {
		@node_ids = @{$pm_obj->thread_list($nodeid)};
	}    
    }

    foreach $nodeid (@node_ids) {
	    unless ($seen{$nodeid}) {
		print ">>>>>\t" . $nodeid . ":\n";
		my $data = $pm_obj->node_content( $nodeid );
		# make a note we got this one so we don't get it again
		print Dumper( $data );
		$seen{$nodeid} = 1;
	    }
    }

}

sub Get_Nodes {
	GetThreadListAndAllNodeContents( $_->{id} ) for grep defined , @$tut;
}

sub Print_Nodes {
	print "\t$_->{id}\n" for grep defined , @$tut;
}