http://www.perlmonks.org?node_id=65221
Category: PerlMonks Related Scripts
Author/Contact Info J.C. Wren
jcwren@jcwren.com
Description:

This is an update of luke_repwalker.pl that uses mirods XML::Twig module. It's faster because it generates few hits to the server. People like tilly should appreciate this... And ultimately it's more reliable, since it doesn't try to parse the tables of article listings.

I posted this as new code so that the old one would still be available. Some people have had trouble installing the various XML modules, or don't wish to.

#!/usr/local/bin/perl -w
   
#
#  Version 1.00.00 - 2000/08/05 - Initial incarnation
#  Version 1.00.10 - 2000/08/05 - A few cleanups per node 26390
#  Version 1.00.20 - 2000/08/08 - Added DBI support
#  Version 1.00.30 - 2000/09/02 - Fix error if number of nodes is a mu
+ltiple of 50
#  Version 1.10.00 - 2001/03/17 - Rip out HTML::TableExtract, convert 
+to XML::Twig
#  Version 1.10.01 - 2001/03/18 - Fixed mirods comments in node 65444
#
#  Invoke with './luke_repwalker.pl -?' for help
#
#  The username and/or password can be embedded into the script, if yo
+u don't want command
#  line arguments.
#
#  Compares the users current writeups to a previous snapshot, display
+ing articles that have
#  been added, deleted, or reputations that have changed since the las
+t run.  Unless disabled.
#  the new writeups info is saved as the snapshot for the next run.
#
#  The output can either be displayed at the user's console, and/or it
+ can be emailed to a given
#  user, via MIME::Lite.
#
#  For a cron job, the following entry will run every hour at 0 minute
+s past, only generate output
#  when something has changed, e-mail us the results, and update the m
+ySQL database.  You will, of
#  course, have to change the fields to match who/what/where and when 
+you really are.
#
#    0 * * * * /PMUtils/luke_repwalker.pl -u pmuser -p pmpw -e -t '"Pe
+rlDude" <perldude@hackers.com>' -z -d
#
#  The SQL necessary to create the mySQL table is located at the botto
+m of the output file, and may
#  be fed to 'mysqldump' to create the table.  You'll need to create t
+he database it's going to live
#  in, first.
#
#  Requires:
#    LWP::Simple
#    Text::CSV_XS
#    MIME::Lite;
#    DBI;
#    XML::Twig
#
#  Copyright 2000,2001(c) J.C.Wren   jcwren@jcwren.com
#  No rights reserved, use as you see fit.  I'd like to know about it,
+ though, just for kicks.
#

use strict;
use Carp;
use XML::Twig;
use LWP::Simple;
use Text::CSV_XS;
use MIME::Lite;
use DBI;
use IO::File;
use Getopt::Std;

use vars qw($def_username $def_password $def_filename);
use vars qw($def_dbhost $def_dbdb $def_dbtable $def_dbuser $def_dbpw);
use vars qw($def_mto $def_msubject $def_mserver $def_mfrom);
use vars qw($pmsite $pmpagelen);

#
#  Set these accordingly, if you don't want command line parameters.
#
$def_username = '';                                         # username
+, unless -u is preferred
$def_password = '';                                         # password
+, unless -p is preferred
$def_filename = "$ENV{HOME}/.rep.%s";                       # snapshot
+ file
$def_mto      = '';                                         # no defau
+lt 'to' user
$def_msubject = 'Perlmonks.org Reputation Change Report';   # default 
+title
$def_mserver  = 'localhost';                                # default 
+mailserver
$def_mfrom    = '%s';                                       # %s means
+ use the 'to' parameter
$pmsite       = 'http://www.perlmonks.net/index.pl';        # vroom's 
+house of illrepute
$pmpagelen    = 50;                                         # articles
+ returned per page

$def_dbhost   = 'localhost';     # Where our database is hosted
$def_dbdb     = 'Perlmonks';     # Name of our database
$def_dbtable  = 'Reputation';    # Name of our table
$def_dbuser   = 'isername';      # Our mySQL username
$def_dbpw     = 'password';      # Our mySQL password

#
#
#
{
   my %args = ();
   my $out = "";

   getopts ('u:p:F:t:f:s:m:Inhe?cbzP123d', \%args);

   if ($args{'?'} || $args{h})
   {
      usage ();
      exit;
   }

   if ($args {P})
   {
      local $| = 1;
      print "Password: ";
      $args {p} = <STDIN>;
      chomp ($args{p});
   }

   my $username = $args{u} || $def_username;
   my $password = $args{p} || $def_password;
   my $filename = $args{F} || sprintf ($def_filename, $username);

   $username or die "No username.  Program terminated.\n";
   $password or die "No password.  Program terminated.\n";

   (!$args{I} || !$args{n}) or die "-I and -n are mutually exclusive. 
+ Program terminated\n";

   #
   # 
   #
   if ($args{I})
   {
      my $hreplist = initialize_rep_file ($username, $password, $filen
+ame);

      if ($args{d})
      {
         my @nodelist = ();
         my %dbreplist = ();

         push @nodelist, $_ foreach (keys %$hreplist);

         update_replist ('I', \%dbreplist, $hreplist, \@nodelist);

         db_update (\%dbreplist);
      }

      exit;
   }

   if (!-e $filename)
   {
      print "No previous reputation snapshot file exists.  Use -I to c
+reate\n";
      exit;
   }

   my $hmailopts = confirm_mailargs ($args{e}, $args{t}, $args{m}, $ar
+gs{f}, $args{s});

   my ($outd, $outr, $dbreplist) = compare_reps ($username, $password,
+ $filename, $args{n}, $args{b}, $args{z});

   if (defined ($outd) && defined ($outr))
   {
      my $out;

      $out = $outr . $outd . "\n";
      $out = $outr . "\n"         if ($args{1} && !$args{3});
      $out = $outd . "\n"         if ($args{2} && !$args{3});

      print $out if ($args{c} || !$args{e});
   
      if ($args{e})
      {
         MIME::Lite->send ('smtp', $hmailopts->{server}, Timeout=>60);
   
         my $msg = MIME::Lite->new (From     => $hmailopts->{from},
                                    To       => $hmailopts->{to},
                                    Subject  => $hmailopts->{subject},
                                    Type     => 'TEXT',
                                    Encoding => '7bit',
                                    Data     => $out) || croak "MIME::
+Lite->new failed";
   
         $msg->send || croak "MIME::Lite->send failed.";
      }

      db_update ($dbreplist) if $args{d};    
   }
}

sub compare_reps
{
   @_ == 6 or croak "Incorrect number of parameters";

   my ($username, $password, $filename, $noupdate, $brief, $zero) = @_
+;
   my @newnodes = ();
   my @deletednodes = ();
   my @changednodes = ();
   my %replist = ();
   my $outd = undef;
   my $outr = undef;

   my $holdreps = read_file ($filename);
   my $hnewreps = get_article_list ($username, $password);

   scalar keys %$hnewreps != 0 or die "You have no articles, perhaps?\
+n";

   #
   #  Find all the new, deleted and changed entries
   #
   foreach (keys %$hnewreps) {push (@newnodes, $_)     if !exists ($ho
+ldreps->{$_})} 
   foreach (keys %$holdreps) {push (@deletednodes, $_) if !exists ($hn
+ewreps->{$_})}
   foreach (keys %$holdreps) {push (@changednodes, $_) if  exists ($hn
+ewreps->{$_}) && $hnewreps->{$_}->{'rep'} != $holdreps->{$_}->{'rep'}
+}
   
   #
   #  For any article in the @changednodes array, move the 'rep' field
+ from %holdreps into
   #  the 'last' of %nhewreps.  This makes displaying it really easy.
   #
   $hnewreps->{$_}->{'last'} = $holdreps->{$_}->{'rep'} foreach (@chan
+gednodes);

   #
   #  If no -z (zero output) flag, and we have changes, then generate 
+the reports.  Otherwise, if
   #  -z is set, then return undef for both reports.
   #
   if (!$zero || $#newnodes != -1 || $#deletednodes != -1 || $#changed
+nodes != -1)
   {
      if ($brief)
      {
         $outd  = "\n";
         $outd .= "New nodes: "     . ($#newnodes     == -1 ? "none" :
+ join (',', @newnodes))     . "\n";
         $outd .= "Deleted nodes: " . ($#deletednodes == -1 ? "none" :
+ join (',', @deletednodes)) . "\n";
         $outd .= "Changed nodes: " . ($#changednodes == -1 ? "none" :
+ join (',', @changednodes)) . "\n";
      }
      else
      {
         my $longest_title = find_longest_title ([{'array' => \@newnod
+es,     'hash' => $hnewreps},
                                                  {'array' => \@delete
+dnodes, 'hash' => $holdreps},
                                                  {'array' => \@change
+dnodes, 'hash' => $hnewreps}
                                                 ]);
   
         $outd  = sprintf ("\nNew nodes: %d\n",     scalar @newnodes) 
+    . display_nodelist ($hnewreps, \@newnodes,     $longest_title);
         $outd .= sprintf ("\nDeleted nodes: %d\n", scalar @deletednod
+es) . display_nodelist ($holdreps, \@deletednodes, $longest_title);
         $outd .= sprintf ("\nChanged nodes: %d\n", scalar @changednod
+es) . display_nodelist ($hnewreps, \@changednodes, $longest_title);
      }

      $outr = reputation_report ($hnewreps);

      write_file ($filename, $hnewreps) unless $noupdate;

      #
      #  This builds the hash that might be written to the database
      #
      update_replist ('N', \%replist, $hnewreps, \@newnodes);
      update_replist ('D', \%replist, $holdreps, \@deletednodes);
      update_replist ('C', \%replist, $hnewreps, \@changednodes);
   }

   return ($outd, $outr, \%replist);
}

sub update_replist
{
   @_ == 4 or croak "Incorrect number of parameters";

   my ($type, $dbreplist, $replist, $repnodes) = @_;

   foreach (@$repnodes)
   {
      croak "Duplicate node_id $_" if exists ($dbreplist->{$_});

      $dbreplist->{$_} = $replist->{$_};
      $dbreplist->{$_}->{type} = $type;
   }
}

sub reputation_report
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $hrephash = shift;
   my $total = 0;
   my $repmax = 0;
   my $repmin = 999999999;
   my $out = "";

   scalar keys %$hrephash >= 0 or die "You have no articles, perhaps?\
+n";

   for (keys %$hrephash)
   {
      $total += $hrephash->{$_}->{rep};
      $repmax = max ($repmax, $hrephash->{$_}->{rep});
      $repmin = min ($repmin, $hrephash->{$_}->{rep});
   }

   $out  = "\n";
   $out .= sprintf ("    Total articles: %d\n",    (scalar keys %$hrep
+hash) - 1);
   $out .= sprintf ("  Total reputation: %d\n",    $total);
   $out .= sprintf ("    Min reputation: %d\n",    $repmin);
   $out .= sprintf ("    Max reputation: %d\n",    $repmax);
   $out .= sprintf ("Average reputation: %3.2f\n", $total / ((scalar k
+eys %$hrephash) - 1));

   return ($out);
}

sub display_nodelist
{
   @_ == 3 or croak "Incorrect number of parameters";

   my ($rnodehash, $rnodelist, $longest) = @_;
   my $out = "";

   return (" (none)\n") if ($#$rnodelist == -1);

   my $fmt = '% 6d | %-' . $longest . 's | %s | % 4d -> % 4d';

   foreach (@$rnodelist)
   {
      $out .= sprintf ("$fmt\n", $rnodehash->{$_}->{nodeid}, 
                                 $rnodehash->{$_}->{title}, 
                                 $rnodehash->{$_}->{date},
                                 $rnodehash->{$_}->{last}, 
                                 $rnodehash->{$_}->{rep});
   }

   return ($out);
}

sub find_longest_title
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $hashlist = shift;
   my $linelen = 0;

   foreach (@$hashlist)
   {
      my $nodes = $_->{'hash'};

      $linelen = max ($linelen, length ($nodes->{$_}->{'title'})) fore
+ach (@{$_->{'array'}});
   }

   return ($linelen);
}

sub max
{
   my ($a, $b) = @_;

   return ($a > $b ? $a : $b);
}

sub min
{
   my ($a, $b) = @_;

   return ($a < $b ? $a : $b);
}

sub initialize_rep_file
{
   @_ == 3 or croak "Incorrect number of parameters";

   my ($username, $password, $filename) = @_;

   my $hnewreps = get_article_list ($username, $password);

   scalar keys %$hnewreps >= 0 or die "You have no articles, perhaps?\
+n";

   write_file ($filename, $hnewreps);

   return ($hnewreps);
}

sub read_file
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $filename = shift;
   my %nodehash = ();

   my $fh  = IO::File->new ("<$filename");

   defined ($fh) or croak "Can't open file \'$filename\": $!";

   my $csv = Text::CSV_XS->new ({'always_quote' => 1,
                                 'eol'          => "\n"
                                });

   while (<$fh>)
   {
      $csv->parse ($_) or croak "Can't parse input fields";

      my ($nodeid, $article, $rep, $date) = $csv->fields ();

      !exists ($nodehash {$nodeid}) or croak "Node ID $nodeid is dupli
+cated!";

      $nodehash {$nodeid} = {'nodeid' => $nodeid, 
                             'title'  => $article, 
                             'rep'    => $rep,
                             'last'   => $rep,
                             'date'   => $date
                            };
   }

   $fh->close;

   return (\%nodehash);
}

sub write_file
{
   @_ == 2 or croak "Incorrect number of parameters";

   my ($filename, $nodehash) = @_;

   my $fh  = IO::File->new (">$filename");

   defined ($fh) or croak "Can't create file \"$filename\": $!";

   my $csv = Text::CSV_XS->new ({'always_quote' => 1,
                                 'eol'          => "\n"
                                });

   for (sort {$a <=> $b} keys %$nodehash)
   {
      $csv->print ($fh, [ @{ $nodehash->{$_} }{ qw(nodeid title rep da
+te) } ]) or croak "Text::CSV_XS->print failed";
   }

   $fh->close;
}
                        
#
#  Don't display the URL when we die (which would be more informative)
+, because the users
#  password might be e-mailed somewhere.  And we sure don't want some 
+dweeb to be impersonating
#  us on perlmonks.org, do we?
#
sub get_article_list
{
   @_ == 2 or croak "Incorrect number of parameters";

   my ($username, $password) = @_;
   my %nodehash = ();

   $LWP::Simple::FULL_LWP = 1;

   my $page = get ("$pmsite?user=$username&passwd=$password&op=login&n
+ode=User+nodes+info+xml+generator") or croak "Get on $pmsite failed."
+;

   my $twig= new XML::Twig (TwigRoots => 
            { NODE => sub { my ($t, $node) = @_;
                            my $nodeid = $node->att ('id');
                            !exists ($nodehash {$nodeid}) or croak "No
+de $nodeid is duplicated!";
                            $nodehash {$nodeid} = {'nodeid' => $nodeid
+,
                                                   'title'  => $node->
+text,
                                                   'rep'    => $node->
+att ('reputation'),
                                                   'last'   => $node->
+att ('reputation'),
                                                   'date'   => $node->
+att ('createtime')
                                                  };
                            $t->purge;
                          }
            });

   $twig->parse ($page);

   return (\%nodehash);
}

#
#  OK, so if I was really smart, I'd have passed a hash in.  Know what
+?  Too much work,
#  too little return.
#
sub confirm_mailargs
{
   @_ == 5 or croak "Incorrect number of parameters";

   my ($eflag, $mto, $mserver, $mfrom, $msubject) = @_;
   my %mailargs = ();

   return undef if !$eflag;

   $mailargs {to}      = $mto      || $def_mto      || die "-e specifi
+ed, but no -t or script default\n";
   $mailargs {server}  = $mserver  || $def_mserver  || die "-e specifi
+ed, but no -m or script default\n";
   $mailargs {from}    = $mfrom    || $def_mfrom    || die "-e specifi
+ed, but no -f or script default\n";
   $mailargs {subject} = $msubject || $def_msubject || die "-e specifi
+ed, but no -s or script default\n";

   $mailargs {from} = sprintf ($mailargs {from}, $mailargs {to});

   return (\%mailargs);
}

sub db_update
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $hreplist = shift;

   my $database = DBI->connect ("DBI:mysql:$def_dbdb:$def_dbhost", $de
+f_dbuser, $def_dbpw);

   if (!defined $database)
   {
     warn "Can't open the $def_dbdb database\n";
     return;
   }

   foreach (sort keys %$hreplist)
   {
      my $command = sprintf ("INSERT INTO %s
                                          (Type,
                                           NodeId,
                                           Title,
                                           Date,
                                           LastReputation,
                                           Reputation)
                                   VALUES (%s, %d, %s, %s, %d, %d)",
                                               $def_dbtable,
                             $database->quote ($hreplist->{$_}->{type}
+),
                                               $hreplist->{$_}->{nodei
+d},
                             $database->quote ($hreplist->{$_}->{title
+}),
                             $database->quote ($hreplist->{$_}->{date}
+),
                                               $hreplist->{$_}->{last}
+,
                                               $hreplist->{$_}->{rep})
+;

                                          
      $database->do ($command) or croak;
   }

   $database->disconnect;
}

sub usage
{
   print <<ENDOFHELP;

usage: luke_repwalker.pl [-h | -?] [-I] [-n] [-b] [-u username] [-p pa
+ssword] [-P]
                         [-F filename] [-e [-t toaddress] [-m mailserv
+er] [-s subject]
                         [-f fromaddress]] [-c] [-z] [-1 | -2 | -3] [-
+d]

Show differences between current reputation and last saved reputation

   -h             this help list
   -?             this help list
   -u username    user name on Perlmonks.org
   -p password    password for user
   -P             forces interactive prompt for password.  Overrides -
+p or script defaults
   -F filename    reputation snapshot (defaults to \$ENV{HOME}/.[usern
+ame].rep)
   -I             initialize snapshot file.  Must be done first time s
+cript is run
   -n             don't update snapshot file, just compare
   -b             brief output (node numbers only)
   -c             force console output if -e is used
   -z             no console or email output if nothing has changed
   -e             send e-mail (requires -t and -m, optionally -f and/o
+r -s)
   -t             e-mail addressee (yourname\@somesite.com)
   -f             whom the mail should as be from (myname\@planetx.com
+)
   -s             the subject (default is "Perlmonks.org Reputation Ch
+ange Report")
   -m             SMTP mail server address ('mailserver.myserver.com')
   -1             quick reputation report
   -2             detailed reputation change report
   -3             both -1 and -2 (default)
   -d             update mysql database with new/deleted/changed recor
+ds

   The -I and -n options are mutually exclusive.

   -I needs to be used the first time the script is run to initialize 
+the snapshot file.  No 
   other options affect -I, nor are they checked for validity.

   Using -t, -m, -f, or -s does not imply -e, since e-mail defaults ca
+n be embedded in the 
   script.  Specifying these flags without -e is meaningless, but not 
+an error.

   The script can be edited to set defaults for username, password, fi
+lename, mail options, etc.
   If the script is not edited, then -u and -p are always required, as
+ is -t if -e used.  For 
   -t and -f, use the form  '"James T. Kirk" <jtkirk\@starfleet.com>' 
+to get textual names in 
   the To: and From: fields, instead of the 'user\@address' form.

   By default, if -e is used, no output is sent to the console.  The -
+c flag will force the
   output to the console, in addition to mailing.  -c specified withou
+t -e is meaningless, but
   not an error.

ENDOFHELP
}

__END__

# MySQL dump 7.1
#
# Host: localhost    Database: Perlmonks
#--------------------------------------------------------
# Server version        3.22.32

#
# Table structure for table 'Reputation'
#
CREATE TABLE Reputation (
  ReputationID int(10) unsigned DEFAULT '0' NOT NULL auto_increment,
  Type char(1) DEFAULT 'U' NOT NULL,
  NodeId int(10) unsigned DEFAULT '0' NOT NULL,
  Title varchar(160) DEFAULT '' NOT NULL,
  Date datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
  LastReputation int(11) DEFAULT '0' NOT NULL,
  Reputation int(11) DEFAULT '0' NOT NULL,
  Modified timestamp(14),
  PRIMARY KEY (ReputationID)
);