#!/usr/local/bin/perl -w # # Version 1.00.00 - 2000/08/05 - Initial incarnation # Version 1.00.10 - 2000/08/05 - A few cleanups per node 26390 # Version 1.00.20 - 2000/08/08 - Added DBI support # Version 1.00.30 - 2000/09/02 - Fix error if number of nodes is a multiple of 50 # # Invoke with './luke_repwalker.pl -?' for help # # The username and/or password can be embedded into the script, if you don't want command # line arguments. # # Compares the users current writeups to a previous snapshot, displaying articles that have # been added, deleted, or reputations that have changed since the last run. Unless disabled. # the new writeups info is saved as the snapshot for the next run. # # The output can either be displayed at the user's console, and/or it can be emailed to a given # user, via MIME::Lite. # # For a cron job, the following entry will run every hour at 0 minutes past, only generate output # when something has changed, e-mail us the results, and update the mySQL database. You will, of # course, have to change the fields to match who/what/where and when you really are. # # 0 * * * * /PMUtils/luke_repwalker.pl -u pmuser -p pmpw -e -t '"PerlDude" ' -z -d # # The SQL necessary to create the mySQL table is located at the bottom of the output file, and may # be fed to 'mysqldump' to create the table. You'll need to create the database it's going to live # in, first. # # Requires: # HTML::TableExtract # LWP::Simple # Text::CSV_XS # MIME::Lite; # DBI; # # Copyright 2000(c) J.C.Wren jcwren@jcwren.com # No rights reserved, use as you see fit. I'd like to know about it, though, just for kicks. # use strict; use Carp; use LWP::Simple; use Text::CSV_XS; use MIME::Lite; use DBI; use IO::File; use Getopt::Std; use vars qw($def_username $def_password $def_filename); use vars qw($def_dbhost $def_dbdb $def_dbtable $def_dbuser $def_dbpw); use vars qw($def_mto $def_msubject $def_mserver $def_mfrom); use vars qw($pmsite $pmpagelen); # # Set these accordingly, if you don't want command line parameters. # $def_username = ''; # username, unless -u is preferred $def_password = ''; # password, unless -p is preferred $def_filename = "$ENV{HOME}/.rep.%s"; # snapshot file $def_mto = ''; # no default 'to' user $def_msubject = 'Perlmonks.org Reputation Change Report'; # default title $def_mserver = 'localhost'; # default mailserver $def_mfrom = '%s'; # %s means use the 'to' parameter $pmsite = 'http://www.perlmonks.org/index.pl'; # vroom's house of illrepute $pmpagelen = 50; # articles returned per page $def_dbhost = 'localhost'; # Where our database is hosted $def_dbdb = 'Perlmonks'; # Name of our database $def_dbtable = 'Reputation'; # Name of our table $def_dbuser = 'pmuser'; # Our mySQL username $def_dbpw = 'pmdude'; # Our mySQL password # # # { my %args = (); my $out = ""; getopts ('u:p:F:t:f:s:m:Inhe?cbzP123d', \%args); if ($args{'?'}) { usage (); exit; } if ($args {P}) { local $| = 1; print "Password: "; $args {p} = ; chomp ($args{p}); } my $username = $args{u} || $def_username; my $password = $args{p} || $def_password; my $filename = $args{F} || sprintf ($def_filename, $username); $username or die "No username. Program terminated.\n"; $password or die "No password. Program terminated.\n"; (!$args{I} || !$args{n}) or die "-I and -n are mutually exclusive. Program terminated\n"; # # # if ($args{I}) { my $hreplist = initialize_rep_file ($username, $password, $filename); if ($args{d}) { my @nodelist = (); my %dbreplist = (); push @nodelist, $_ foreach (keys %$hreplist); update_replist ('I', \%dbreplist, $hreplist, \@nodelist); db_update (\%dbreplist); } exit; } if (!-e $filename) { print "No previous reputation snapshot file exists. Use -I to create\n"; exit; } my $hmailopts = confirm_mailargs ($args{e}, $args{t}, $args{m}, $args{f}, $args{s}); my ($outd, $outr, $dbreplist) = compare_reps ($username, $password, $filename, $args{n}, $args{b}, $args{z}); if (defined ($outd) && defined ($outr)) { my $out; $out = $outr . $outd . "\n"; $out = $outr . "\n" if ($args{1} && !$args{3}); $out = $outd . "\n" if ($args{2} && !$args{3}); print $out if ($args{c} || !$args{e}); if ($args{e}) { MIME::Lite->send ('smtp', $hmailopts->{server}, Timeout=>60); my $msg = MIME::Lite->new (From => $hmailopts->{from}, To => $hmailopts->{to}, Subject => $hmailopts->{subject}, Type => 'TEXT', Encoding => '7bit', Data => $out) || croak "MIME::Lite->new failed"; $msg->send || croak "MIME::Lite->send failed."; } db_update ($dbreplist) if $args{d}; } } sub compare_reps { @_ == 6 or croak "Incorrect number of parameters"; my ($username, $password, $filename, $noupdate, $brief, $zero) = @_; my @newnodes = (); my @deletednodes = (); my @changednodes = (); my %replist = (); my $outd = undef; my $outr = undef; my $holdreps = read_file ($filename); my $hnewreps = get_article_list ($username, $password); scalar keys %$hnewreps != 0 or die "You have no articles, perhaps?\n"; # # Find all the new, deleted and changed entries # foreach (keys %$hnewreps) {push (@newnodes, $_) if !exists ($holdreps->{$_})} foreach (keys %$holdreps) {push (@deletednodes, $_) if !exists ($hnewreps->{$_})} foreach (keys %$holdreps) {push (@changednodes, $_) if exists ($hnewreps->{$_}) && $hnewreps->{$_}->{'rep'} != $holdreps->{$_}->{'rep'}} # # For any article in the @changednodes array, move the 'rep' field from %holdreps into # the 'last' of %nhewreps. This makes displaying it really easy. # $hnewreps->{$_}->{'last'} = $holdreps->{$_}->{'rep'} foreach (@changednodes); # # If no -z (zero output) flag, and we have changes, then generate the reports. Otherwise, if # -z is set, then return undef for both reports. # if (!$zero || $#newnodes != -1 || $#deletednodes != -1 || $#changednodes != -1) { if ($brief) { $outd = "\n"; $outd .= "New nodes: " . ($#newnodes == -1 ? "none" : join (',', @newnodes)) . "\n"; $outd .= "Deleted nodes: " . ($#deletednodes == -1 ? "none" : join (',', @deletednodes)) . "\n"; $outd .= "Changed nodes: " . ($#changednodes == -1 ? "none" : join (',', @changednodes)) . "\n"; } else { my $longest_title = find_longest_title ([{'array' => \@newnodes, 'hash' => $hnewreps}, {'array' => \@deletednodes, 'hash' => $holdreps}, {'array' => \@changednodes, 'hash' => $hnewreps} ]); $outd = sprintf ("\nNew nodes: %d\n", scalar @newnodes) . display_nodelist ($hnewreps, \@newnodes, $longest_title); $outd .= sprintf ("\nDeleted nodes: %d\n", scalar @deletednodes) . display_nodelist ($holdreps, \@deletednodes, $longest_title); $outd .= sprintf ("\nChanged nodes: %d\n", scalar @changednodes) . display_nodelist ($hnewreps, \@changednodes, $longest_title); } $outr = reputation_report ($hnewreps); write_file ($filename, $hnewreps) unless $noupdate; # # This builds the hash that might be written to the database # update_replist ('N', \%replist, $hnewreps, \@newnodes); update_replist ('D', \%replist, $holdreps, \@deletednodes); update_replist ('C', \%replist, $hnewreps, \@changednodes); } return ($outd, $outr, \%replist); } sub update_replist { @_ == 4 or croak "Incorrect number of parameters"; my ($type, $dbreplist, $replist, $repnodes) = @_; foreach (@$repnodes) { croak "Duplicate node_id $_" if exists ($dbreplist->{$_}); $dbreplist->{$_} = $replist->{$_}; $dbreplist->{$_}->{type} = $type; } } sub reputation_report { @_ == 1 or croak "Incorrect number of parameters"; my $hrephash = shift; my $total = 0; my $repmax = 0; my $repmin = 999999999; my $out = ""; scalar keys %$hrephash >= 0 or die "You have no articles, perhaps?\n"; for (keys %$hrephash) { $total += $hrephash->{$_}->{rep}; $repmax = max ($repmax, $hrephash->{$_}->{rep}); $repmin = min ($repmin, $hrephash->{$_}->{rep}); } $out = "\n"; $out .= sprintf (" Total articles: %d\n", scalar keys %$hrephash); $out .= sprintf (" Total reputation: %d\n", $total); $out .= sprintf (" Min reputation: %d\n", $repmin); $out .= sprintf (" Max reputation: %d\n", $repmax); $out .= sprintf ("Average reputation: %3.2f\n", $total / scalar keys %$hrephash); return ($out); } sub display_nodelist { @_ == 3 or croak "Incorrect number of parameters"; my ($rnodehash, $rnodelist, $longest) = @_; my $out = ""; return (" (none)\n") if ($#$rnodelist == -1); my $fmt = '% 6d | %-' . $longest . 's | %s | % 4d -> % 4d'; foreach (@$rnodelist) { $out .= sprintf ("$fmt\n", $rnodehash->{$_}->{nodeid}, $rnodehash->{$_}->{title}, $rnodehash->{$_}->{date}, $rnodehash->{$_}->{last}, $rnodehash->{$_}->{rep}); } return ($out); } sub find_longest_title { @_ == 1 or croak "Incorrect number of parameters"; my $hashlist = shift; my $linelen = 0; foreach (@$hashlist) { my $nodes = $_->{'hash'}; $linelen = max ($linelen, length ($nodes->{$_}->{'title'})) foreach (@{$_->{'array'}}); } return ($linelen); } sub max { my ($a, $b) = @_; return ($a > $b ? $a : $b); } sub min { my ($a, $b) = @_; return ($a < $b ? $a : $b); } sub initialize_rep_file { @_ == 3 or croak "Incorrect number of parameters"; my ($username, $password, $filename) = @_; my $hnewreps = get_article_list ($username, $password); scalar keys %$hnewreps >= 0 or die "You have no articles, perhaps?\n"; write_file ($filename, $hnewreps); return ($hnewreps); } sub read_file { @_ == 1 or croak "Incorrect number of parameters"; my $filename = shift; my %nodehash = (); my $fh = IO::File->new ("<$filename"); defined ($fh) or croak "Can't open file \'$filename\": $!"; my $csv = Text::CSV_XS->new ({'always_quote' => 1, 'eol' => "\n" }); while (<$fh>) { $csv->parse ($_) or croak "Can't parse input fields"; my ($nodeid, $article, $rep, $date) = $csv->fields (); !exists ($nodehash {$nodeid}) or croak "Node ID $nodeid is duplicated!"; $nodehash {$nodeid} = {'nodeid' => $nodeid, 'title' => $article, 'rep' => $rep, 'last' => $rep, 'date' => $date }; } $fh->close; return (\%nodehash); } sub write_file { @_ == 2 or croak "Incorrect number of parameters"; my ($filename, $nodehash) = @_; my $fh = IO::File->new (">$filename"); defined ($fh) or croak "Can't create file \"$filename\": $!"; my $csv = Text::CSV_XS->new ({'always_quote' => 1, 'eol' => "\n" }); for (sort {$a <=> $b} keys %$nodehash) { $csv->print ($fh, [ @{ $nodehash->{$_} }{ qw(nodeid title rep date) } ]) or croak "Text::CSV_XS->print failed"; } $fh->close; } # # Don't display the URL when we die (which would be more informative), because the users # password might be e-mailed somewhere. And we sure don't want some dweeb to be impersonating # us on perlmonks.org, do we? # sub get_article_list { @_ == 2 or croak "Incorrect number of parameters"; my ($username, $password) = @_; my %nodehash = (); $LWP::Simple::FULL_LWP = 1; for (my $i = 0; 1; $i += $pmpagelen) { my $url = "$pmsite?user=$username&passwd=$password&op=login&node=perl+monks+user+search&usersearch=$username&orderby=createtime%20DESC&count=$i"; my $page = 0; $page = get ($url) or croak "Get on $pmsite failed."; last if (get_article_page ($page, ['Writeup', 'Rep', 'Create Time'], \%nodehash, $i % $pmpagelen, $i / $pmpagelen) < $pmpagelen); } return (\%nodehash); } sub get_article_page { @_ == 5 or croak "Incorrect number of parameters"; my ($html, $tablecols, $nodehash, $lines, $pageno) = @_; my $rowcnt = 0; $html =~ s/bgcolor=>/bgcolor="">/mg; my $te = new jcwExtract (headers => $tablecols)->parse ($html); if (scalar $te->table_states != 1) { # # Fix for if you have an even multiple of $pmpagelen writeups (50, 100, 150, 200...). An # error is caused since the PM code doesn't display the table headers (Writeup, Rep, Date) # if the count parm exceeds the number of writeups. Presumably, since we've gotten the # first page correctly, the password isn't going to change between fetches on subsequent # pages (the window is *very* small) # return 0 if (scalar $te->table_states == 0 && $pageno); croak sprintf ("Wrong number of tables (%d) returned! (Probably bad username/password)\n", scalar $te->table_states); } foreach my $ts ($te->table_states) { foreach ($ts->rows) { last if (@$_[2] !~ /\d+-\d+-\d+/); my ($nodeid, $title) = @$_[0] =~ /^node_id=(\d+):(.*)/i; !exists ($nodehash->{$nodeid}) or croak "Node $nodeid is duplicated!"; $nodehash->{$nodeid} = {'nodeid' => $nodeid, 'title' => $title, 'rep' => @$_[1], 'last' => @$_[1], 'date' => @$_[2] }; $rowcnt++; } } return ($rowcnt); } # # OK, so if I was really smart, I'd have passed a hash in. Know what? Too much work, # too little return. # sub confirm_mailargs { @_ == 5 or croak "Incorrect number of parameters"; my ($eflag, $mto, $mserver, $mfrom, $msubject) = @_; my %mailargs = (); return undef if !$eflag; $mailargs {to} = $mto || $def_mto || die "-e specified, but no -t or script default\n"; $mailargs {server} = $mserver || $def_mserver || die "-e specified, but no -m or script default\n"; $mailargs {from} = $mfrom || $def_mfrom || die "-e specified, but no -f or script default\n"; $mailargs {subject} = $msubject || $def_msubject || die "-e specified, but no -s or script default\n"; $mailargs {from} = sprintf ($mailargs {from}, $mailargs {to}); return (\%mailargs); } sub db_update { @_ == 1 or croak "Incorrect number of parameters"; my $hreplist = shift; my $database = DBI->connect ("DBI:mysql:$def_dbdb:$def_dbhost", $def_dbuser, $def_dbpw); if (!defined $database) { warn "Can't open the $def_dbdb database\n"; return; } foreach (sort keys %$hreplist) { my $command = sprintf ("INSERT INTO %s (Type, NodeId, Title, Date, LastReputation, Reputation) VALUES (%s, %d, %s, %s, %d, %d)", $def_dbtable, $database->quote ($hreplist->{$_}->{type}), $hreplist->{$_}->{nodeid}, $database->quote ($hreplist->{$_}->{title}), $database->quote ($hreplist->{$_}->{date}), $hreplist->{$_}->{last}, $hreplist->{$_}->{rep}); $database->do ($command) or croak; } $database->disconnect; } BEGIN { # # This is not good code. It's really evil and the both the author of this package, and the author # of HTML::TableExtract should be severely beaten about the head and shoulders. # package jcwExtract; use strict; use HTML::TableExtract; @jcwExtract::ISA = qw(HTML::TableExtract HTML::TableExtract::TableState); my $node_id = undef; { local $^W = 0; # # Override the _add_text mode that if $node_id is defined, we'll insert the node_id # value at the front of the string. # eval 'sub HTML::TableExtract::TableState::_add_text { my ($self, $txt, $skew_column) = @_; defined $txt or return; my $row = $self->{content}[$#{$self->{content}}]; $txt = sprintf ("node_id=%d:%s", $node_id, $txt) if defined ($node_id); $node_id = undef; $row->[$skew_column] .= $txt; $txt; }'; } # # Overridden start method, so we can look for tags # sub start { my $self = shift; my ($tag, $attr, $attrseq, $origtext) = @_; $self->SUPER::start (@_); # # If it's a tag, and has a node_id, then $1 will contain the node_id. # If we're in a table cell, set $node_id to $1, otherwise undef it. We don't simply # set $node_id to undef if it's not a tag, because we want to save the last value # if there is a subsequent or or somesuch tag between the and tags. # if ($tag eq 'a' && defined ($attr->{'href'}) && $attr->{'href'} =~ /\bnode_id=(\d+)/i) { $node_id = ($self->_current_table_state->{in_cell}) ? $1 : undef; } } 1; } sub usage { print <' to get textual names in the To: and From: fields, instead of the 'user\@address' form. By default, if -e is used, no output is sent to the console. The -c flag will force the output to the console, in addition to mailing. -c specified without -e is meaningless, but not an error. ENDOFHELP } __END__ # MySQL dump 7.1 # # Host: localhost Database: Perlmonks #-------------------------------------------------------- # Server version 3.22.32 # # Table structure for table 'Reputation' # CREATE TABLE Reputation ( ReputationID int(10) unsigned DEFAULT '0' NOT NULL auto_increment, Type char(1) DEFAULT 'U' NOT NULL, NodeId int(10) unsigned DEFAULT '0' NOT NULL, Title varchar(160) DEFAULT '' NOT NULL, Date datetime DEFAULT '0000-00-00 00:00:00' NOT NULL, LastReputation int(11) DEFAULT '0' NOT NULL, Reputation int(11) DEFAULT '0' NOT NULL, Modified timestamp(14), PRIMARY KEY (ReputationID) );