#/usr/bin/perl -w use LWP::Simple; use strict qw(like a dominatrix); $|++; #step 1 - get list of new nodes. # get list of answered nodes # remove all nodes answered. my %newestnodes = &GetNodes(); my @merlyn_preface; open ANSWERED, ") { chomp; delete $newestnodes{$_} if exists $newestnodes{$_}; } close ANSWERED; #step 3 - for each unreplied node, compile "best" words. foreach (sort keys %newestnodes){ my $keyword = GetNode ($_); my $column = GetAnswer($keyword); if (defined $column) { @merlyn_preface = ("I've already covered this topic. Check out my Web Techniques column on the subject.", "See my WT column on '$keyword' ideas.", "For a more full featured exploration of $keyword concepts, see my WT Column.", "I don't see why people insist on trying to write partially implemented solutions for this type of thing, especially when they can reference this'$keyword' WT column." ); } else { @merlyn_preface = ("I don't really understand your request, and I'm not sure you know what you want to do. Nonetheless, I suggest you browse my Web Techniques Perl columns, and see if something helps you there.", "This smacks of cargo cult code. If I tried to run this past a customer, he'd shoot me. Are you sure you understand the problem?", "Why reinvent the wheel? Check [CPAN://$keyword] before you put serious time into this.", "I've written a lot of things here on the topic of $keyword. Try searching for them before asking questions like this.", "Oddly enough, this is precisely demonstrated in an upcoming WT column. Sadly, I can't republish the column until it has appeared in print, so wait a month or two and you'll see the whole thing.", "I smell homework!" ); } print $merlyn_preface [rand (scalar @merlyn_preface)];print "\n"; } open ANSWERED, ">>c:\\answered_nodes.txt" || die "No answers! $!"; foreach (sort keys %newestnodes){ print ANSWERED "$_\n";} close ANSWERED; #step 4 - search for appropriate articles, return the url for one. # if there are no appropriate articles, return "upcoming" or # "cargo cult!" #step 5 - add replied nodes to flat file sub GetNodes { my $newnodes = get('http://perlmonks.org/index.pl?node_id=3628'); my @newsopw = ($newnodes =~/New Questions\<\/a\>\<\/H3\>\(.*?)\<\/TABLE\>/i); $newsopw[0] =~s/ (\<\/TR\>)/\n/ig; my %checknodes; while ($newsopw[0]=~/\?node_id=(\d*)\&.*?\?node_id=(\d*)\&/ig){ $checknodes{$1}=1; } return %checknodes; } sub GetNode{ my $node = shift; my $url= "http://perlmonks.org/index.pl?node_id=$node"; my $nodetext = get ($url); if ($nodetext=~/(.*?)

.*?/ /g; $text =~s/[^a-zA-Z0-9 ]//ig; my @words = split /\s+/, $text; my %freq; my %common; open COMMON, "common.txt" or die "no common words"; while () {chomp;my $tempwd= uc ($_) ;$common{"$tempwd"}=1;} close COMMON; foreach (@words) { my $tempwd = uc($_); if ($common{"$tempwd"}) {;next} $freq{$_}++ ; } my $maxval; my $search=""; foreach (sort {$freq{$b} <=>$freq{$a}} keys %freq) { if ($freq{$_}>=$maxval) { next if !/[a-zA-Z0-9]/; $maxval=$freq{$_}; return $_; } else {last} }; } } sub GetAnswer{ my $keyword = shift; $merlyn = get "http://web.stonehenge.com/cgi/wtsearch?search=$keyword"; # this had a die clause on it, but I think merlyn's got a throttle on the page... # dying isn't sexy anyway. # if ($merlyn =~/
(.*?)<\/PRE>/gis ){
        my $columns = $1;
        my %uniquecolumns;
        while ($columns =~m|http://www.stonehenge.com/merlyn/WebTechniques/col(\d+).listing.txt|gi) {
            $uniquecolumns{$1}=1;
        }
        foreach (sort {rand(1) <=>rand(1)} keys %uniquecolumns) { return "http://www.stonehenge.com/merlyn/WebTechniques/col$_.html"}
    }
    return undef;
}