http://www.perlmonks.org?node_id=65221
Category: PerlMonks Related Scripts
Author/Contact Info J.C. Wren
jcwren@jcwren.com
Description:

This is an update of luke_repwalker.pl that uses mirods XML::Twig module. It's faster because it generates few hits to the server. People like tilly should appreciate this... And ultimately it's more reliable, since it doesn't try to parse the tables of article listings.

I posted this as new code so that the old one would still be available. Some people have had trouble installing the various XML modules, or don't wish to.

#!/usr/local/bin/perl -w
   
#
#  Version 1.00.00 - 2000/08/05 - Initial incarnation
#  Version 1.00.10 - 2000/08/05 - A few cleanups per node 26390
#  Version 1.00.20 - 2000/08/08 - Added DBI support
#  Version 1.00.30 - 2000/09/02 - Fix error if number of nodes is a mu
+ltiple of 50
#  Version 1.10.00 - 2001/03/17 - Rip out HTML::TableExtract, convert 
+to XML::Twig
#  Version 1.10.01 - 2001/03/18 - Fixed mirods comments in node 65444
#
#  Invoke with './luke_repwalker.pl -?' for help
#
#  The username and/or password can be embedded into the script, if yo
+u don't want command
#  line arguments.
#
#  Compares the users current writeups to a previous snapshot, display
+ing articles that have
#  been added, deleted, or reputations that have changed since the las
+t run.  Unless disabled.
#  the new writeups info is saved as the snapshot for the next run.
#
#  The output can either be displayed at the user's console, and/or it
+ can be emailed to a given
#  user, via MIME::Lite.
#
#  For a cron job, the following entry will run every hour at 0 minute
+s past, only generate output
#  when something has changed, e-mail us the results, and update the m
+ySQL database.  You will, of
#  course, have to change the fields to match who/what/where and when 
+you really are.
#
#    0 * * * * /PMUtils/luke_repwalker.pl -u pmuser -p pmpw -e -t '"Pe
+rlDude" <perldude@hackers.com>' -z -d
#
#  The SQL necessary to create the mySQL table is located at the botto
+m of the output file, and may
#  be fed to 'mysqldump' to create the table.  You'll need to create t
+he database it's going to live
#  in, first.
#
#  Requires:
#    LWP::Simple
#    Text::CSV_XS
#    MIME::Lite;
#    DBI;
#    XML::Twig
#
#  Copyright 2000,2001(c) J.C.Wren   jcwren@jcwren.com
#  No rights reserved, use as you see fit.  I'd like to know about it,
+ though, just for kicks.
#

use strict;
use Carp;
use XML::Twig;
use LWP::Simple;
use Text::CSV_XS;
use MIME::Lite;
use DBI;
use IO::File;
use Getopt::Std;

use vars qw($def_username $def_password $def_filename);
use vars qw($def_dbhost $def_dbdb $def_dbtable $def_dbuser $def_dbpw);
use vars qw($def_mto $def_msubject $def_mserver $def_mfrom);
use vars qw($pmsite $pmpagelen);

#
#  Set these accordingly, if you don't want command line parameters.
#
$def_username = '';                                         # username
+, unless -u is preferred
$def_password = '';                                         # password
+, unless -p is preferred
$def_filename = "$ENV{HOME}/.rep.%s";                       # snapshot
+ file
$def_mto      = '';                                         # no defau
+lt 'to' user
$def_msubject = 'Perlmonks.org Reputation Change Report';   # default 
+title
$def_mserver  = 'localhost';                                # default 
+mailserver
$def_mfrom    = '%s';                                       # %s means
+ use the 'to' parameter
$pmsite       = 'http://www.perlmonks.net/index.pl';        # vroom's 
+house of illrepute
$pmpagelen    = 50;                                         # articles
+ returned per page

$def_dbhost   = 'localhost';     # Where our database is hosted
$def_dbdb     = 'Perlmonks';     # Name of our database
$def_dbtable  = 'Reputation';    # Name of our table
$def_dbuser   = 'isername';      # Our mySQL username
$def_dbpw     = 'password';      # Our mySQL password

#
#
#
{
   my %args = ();
   my $out = "";

   getopts ('u:p:F:t:f:s:m:Inhe?cbzP123d', \%args);

   if ($args{'?'} || $args{h})
   {
      usage ();
      exit;
   }

   if ($args {P})
   {
      local $| = 1;
      print "Password: ";
      $args {p} = <STDIN>;
      chomp ($args{p});
   }

   my $username = $args{u} || $def_username;
   my $password = $args{p} || $def_password;
   my $filename = $args{F} || sprintf ($def_filename, $username);

   $username or die "No username.  Program terminated.\n";
   $password or die "No password.  Program terminated.\n";

   (!$args{I} || !$args{n}) or die "-I and -n are mutually exclusive. 
+ Program terminated\n";

   #
   # 
   #
   if ($args{I})
   {
      my $hreplist = initialize_rep_file ($username, $password, $filen
+ame);

      if ($args{d})
      {
         my @nodelist = ();
         my %dbreplist = ();

         push @nodelist, $_ foreach (keys %$hreplist);

         update_replist ('I', \%dbreplist, $hreplist, \@nodelist);

         db_update (\%dbreplist);
      }

      exit;
   }

   if (!-e $filename)
   {
      print "No previous reputation snapshot file exists.  Use -I to c
+reate\n";
      exit;
   }

   my $hmailopts = confirm_mailargs ($args{e}, $args{t}, $args{m}, $ar
+gs{f}, $args{s});

   my ($outd, $outr, $dbreplist) = compare_reps ($username, $password,
+ $filename, $args{n}, $args{b}, $args{z});

   if (defined ($outd) && defined ($outr))
   {
      my $out;

      $out = $outr . $outd . "\n";
      $out = $outr . "\n"         if ($args{1} && !$args{3});
      $out = $outd . "\n"         if ($args{2} && !$args{3});

      print $out if ($args{c} || !$args{e});
   
      if ($args{e})
      {
         MIME::Lite->send ('smtp', $hmailopts->{server}, Timeout=>60);
   
         my $msg = MIME::Lite->new (From     => $hmailopts->{from},
                                    To       => $hmailopts->{to},
                                    Subject  => $hmailopts->{subject},
                                    Type     => 'TEXT',
                                    Encoding => '7bit',
                                    Data     => $out) || croak "MIME::
+Lite->new failed";
   
         $msg->send || croak "MIME::Lite->send failed.";
      }

      db_update ($dbreplist) if $args{d};    
   }
}

sub compare_reps
{
   @_ == 6 or croak "Incorrect number of parameters";

   my ($username, $password, $filename, $noupdate, $brief, $zero) = @_
+;
   my @newnodes = ();
   my @deletednodes = ();
   my @changednodes = ();
   my %replist = ();
   my $outd = undef;
   my $outr = undef;

   my $holdreps = read_file ($filename);
   my $hnewreps = get_article_list ($username, $password);

   scalar keys %$hnewreps != 0 or die "You have no articles, perhaps?\
+n";

   #
   #  Find all the new, deleted and changed entries
   #
   foreach (keys %$hnewreps) {push (@newnodes, $_)     if !exists ($ho
+ldreps->{$_})} 
   foreach (keys %$holdreps) {push (@deletednodes, $_) if !exists ($hn
+ewreps->{$_})}
   foreach (keys %$holdreps) {push (@changednodes, $_) if  exists ($hn
+ewreps->{$_}) && $hnewreps->{$_}->{'rep'} != $holdreps->{$_}->{'rep'}
+}
   
   #
   #  For any article in the @changednodes array, move the 'rep' field
+ from %holdreps into
   #  the 'last' of %nhewreps.  This makes displaying it really easy.
   #
   $hnewreps->{$_}->{'last'} = $holdreps->{$_}->{'rep'} foreach (@chan
+gednodes);

   #
   #  If no -z (zero output) flag, and we have changes, then generate 
+the reports.  Otherwise, if
   #  -z is set, then return undef for both reports.
   #
   if (!$zero || $#newnodes != -1 || $#deletednodes != -1 || $#changed
+nodes != -1)
   {
      if ($brief)
      {
         $outd  = "\n";
         $outd .= "New nodes: "     . ($#newnodes     == -1 ? "none" :
+ join (',', @newnodes))     . "\n";
         $outd .= "Deleted nodes: " . ($#deletednodes == -1 ? "none" :
+ join (',', @deletednodes)) . "\n";
         $outd .= "Changed nodes: " . ($#changednodes == -1 ? "none" :
+ join (',', @changednodes)) . "\n";
      }
      else
      {
         my $longest_title = find_longest_title ([{'array' => \@newnod
+es,     'hash' => $hnewreps},
                                                  {'array' => \@delete
+dnodes, 'hash' => $holdreps},
                                                  {'array' => \@change
+dnodes, 'hash' => $hnewreps}
                                                 ]);
   
         $outd  = sprintf ("\nNew nodes: %d\n",     scalar @newnodes) 
+    . display_nodelist ($hnewreps, \@newnodes,     $longest_title);
         $outd .= sprintf ("\nDeleted nodes: %d\n", scalar @deletednod
+es) . display_nodelist ($holdreps, \@deletednodes, $longest_title);
         $outd .= sprintf ("\nChanged nodes: %d\n", scalar @changednod
+es) . display_nodelist ($hnewreps, \@changednodes, $longest_title);
      }

      $outr = reputation_report ($hnewreps);

      write_file ($filename, $hnewreps) unless $noupdate;

      #
      #  This builds the hash that might be written to the database
      #
      update_replist ('N', \%replist, $hnewreps, \@newnodes);
      update_replist ('D', \%replist, $holdreps, \@deletednodes);
      update_replist ('C', \%replist, $hnewreps, \@changednodes);
   }

   return ($outd, $outr, \%replist);
}

sub update_replist
{
   @_ == 4 or croak "Incorrect number of parameters";

   my ($type, $dbreplist, $replist, $repnodes) = @_;

   foreach (@$repnodes)
   {
      croak "Duplicate node_id $_" if exists ($dbreplist->{$_});

      $dbreplist->{$_} = $replist->{$_};
      $dbreplist->{$_}->{type} = $type;
   }
}

sub reputation_report
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $hrephash = shift;
   my $total = 0;
   my $repmax = 0;
   my $repmin = 999999999;
   my $out = "";

   scalar keys %$hrephash >= 0 or die "You have no articles, perhaps?\
+n";

   for (keys %$hrephash)
   {
      $total += $hrephash->{$_}->{rep};
      $repmax = max ($repmax, $hrephash->{$_}->{rep});
      $repmin = min ($repmin, $hrephash->{$_}->{rep});
   }

   $out  = "\n";
   $out .= sprintf ("    Total articles: %d\n",    (scalar keys %$hrep
+hash) - 1);
   $out .= sprintf ("  Total reputation: %d\n",    $total);
   $out .= sprintf ("    Min reputation: %d\n",    $repmin);
   $out .= sprintf ("    Max reputation: %d\n",    $repmax);
   $out .= sprintf ("Average reputation: %3.2f\n", $total / ((scalar k
+eys %$hrephash) - 1));

   return ($out);
}

sub display_nodelist
{
   @_ == 3 or croak "Incorrect number of parameters";

   my ($rnodehash, $rnodelist, $longest) = @_;
   my $out = "";

   return (" (none)\n") if ($#$rnodelist == -1);

   my $fmt = '% 6d | %-' . $longest . 's | %s | % 4d -> % 4d';

   foreach (@$rnodelist)
   {
      $out .= sprintf ("$fmt\n", $rnodehash->{$_}->{nodeid}, 
                                 $rnodehash->{$_}->{title}, 
                                 $rnodehash->{$_}->{date},
                                 $rnodehash->{$_}->{last}, 
                                 $rnodehash->{$_}->{rep});
   }

   return ($out);
}

sub find_longest_title
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $hashlist = shift;
   my $linelen = 0;

   foreach (@$hashlist)
   {
      my $nodes = $_->{'hash'};

      $linelen = max ($linelen, length ($nodes->{$_}->{'title'})) fore
+ach (@{$_->{'array'}});
   }

   return ($linelen);
}

sub max
{
   my ($a, $b) = @_;

   return ($a > $b ? $a : $b);
}

sub min
{
   my ($a, $b) = @_;

   return ($a < $b ? $a : $b);
}

sub initialize_rep_file
{
   @_ == 3 or croak "Incorrect number of parameters";

   my ($username, $password, $filename) = @_;

   my $hnewreps = get_article_list ($username, $password);

   scalar keys %$hnewreps >= 0 or die "You have no articles, perhaps?\
+n";

   write_file ($filename, $hnewreps);

   return ($hnewreps);
}

sub read_file
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $filename = shift;
   my %nodehash = ();

   my $fh  = IO::File->new ("<$filename");

   defined ($fh) or croak "Can't open file \'$filename\": $!";

   my $csv = Text::CSV_XS->new ({'always_quote' => 1,
                                 'eol'          => "\n"
                                });

   while (<$fh>)
   {
      $csv->parse ($_) or croak "Can't parse input fields";

      my ($nodeid, $article, $rep, $date) = $csv->fields ();

      !exists ($nodehash {$nodeid}) or croak "Node ID $nodeid is dupli
+cated!";

      $nodehash {$nodeid} = {'nodeid' => $nodeid, 
                             'title'  => $article, 
                             'rep'    => $rep,
                             'last'   => $rep,
                             'date'   => $date
                            };
   }

   $fh->close;

   return (\%nodehash);
}

sub write_file
{
   @_ == 2 or croak "Incorrect number of parameters";

   my ($filename, $nodehash) = @_;

   my $fh  = IO::File->new (">$filename");

   defined ($fh) or croak "Can't create file \"$filename\": $!";

   my $csv = Text::CSV_XS->new ({'always_quote' => 1,
                                 'eol'          => "\n"
                                });

   for (sort {$a <=> $b} keys %$nodehash)
   {
      $csv->print ($fh, [ @{ $nodehash->{$_} }{ qw(nodeid title rep da
+te) } ]) or croak "Text::CSV_XS->print failed";
   }

   $fh->close;
}
                        
#
#  Don't display the URL when we die (which would be more informative)
+, because the users
#  password might be e-mailed somewhere.  And we sure don't want some 
+dweeb to be impersonating
#  us on perlmonks.org, do we?
#
sub get_article_list
{
   @_ == 2 or croak "Incorrect number of parameters";

   my ($username, $password) = @_;
   my %nodehash = ();

   $LWP::Simple::FULL_LWP = 1;

   my $page = get ("$pmsite?user=$username&passwd=$password&op=login&n
+ode=User+nodes+info+xml+generator") or croak "Get on $pmsite failed."
+;

   my $twig= new XML::Twig (TwigRoots => 
            { NODE => sub { my ($t, $node) = @_;
                            my $nodeid = $node->att ('id');
                            !exists ($nodehash {$nodeid}) or croak "No
+de $nodeid is duplicated!";
                            $nodehash {$nodeid} = {'nodeid' => $nodeid
+,
                                                   'title'  => $node->
+text,
                                                   'rep'    => $node->
+att ('reputation'),
                                                   'last'   => $node->
+att ('reputation'),
                                                   'date'   => $node->
+att ('createtime')
                                                  };
                            $t->purge;
                          }
            });

   $twig->parse ($page);

   return (\%nodehash);
}

#
#  OK, so if I was really smart, I'd have passed a hash in.  Know what
+?  Too much work,
#  too little return.
#
sub confirm_mailargs
{
   @_ == 5 or croak "Incorrect number of parameters";

   my ($eflag, $mto, $mserver, $mfrom, $msubject) = @_;
   my %mailargs = ();

   return undef if !$eflag;

   $mailargs {to}      = $mto      || $def_mto      || die "-e specifi
+ed, but no -t or script default\n";
   $mailargs {server}  = $mserver  || $def_mserver  || die "-e specifi
+ed, but no -m or script default\n";
   $mailargs {from}    = $mfrom    || $def_mfrom    || die "-e specifi
+ed, but no -f or script default\n";
   $mailargs {subject} = $msubject || $def_msubject || die "-e specifi
+ed, but no -s or script default\n";

   $mailargs {from} = sprintf ($mailargs {from}, $mailargs {to});

   return (\%mailargs);
}

sub db_update
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $hreplist = shift;

   my $database = DBI->connect ("DBI:mysql:$def_dbdb:$def_dbhost", $de
+f_dbuser, $def_dbpw);

   if (!defined $database)
   {
     warn "Can't open the $def_dbdb database\n";
     return;
   }

   foreach (sort keys %$hreplist)
   {
      my $command = sprintf ("INSERT INTO %s
                                          (Type,
                                           NodeId,
                                           Title,
                                           Date,
                                           LastReputation,
                                           Reputation)
                                   VALUES (%s, %d, %s, %s, %d, %d)",
                                               $def_dbtable,
                             $database->quote ($hreplist->{$_}->{type}
+),
                                               $hreplist->{$_}->{nodei
+d},
                             $database->quote ($hreplist->{$_}->{title
+}),
                             $database->quote ($hreplist->{$_}->{date}
+),
                                               $hreplist->{$_}->{last}
+,
                                               $hreplist->{$_}->{rep})
+;

                                          
      $database->do ($command) or croak;
   }

   $database->disconnect;
}

sub usage
{
   print <<ENDOFHELP;

usage: luke_repwalker.pl [-h | -?] [-I] [-n] [-b] [-u username] [-p pa
+ssword] [-P]
                         [-F filename] [-e [-t toaddress] [-m mailserv
+er] [-s subject]
                         [-f fromaddress]] [-c] [-z] [-1 | -2 | -3] [-
+d]

Show differences between current reputation and last saved reputation

   -h             this help list
   -?             this help list
   -u username    user name on Perlmonks.org
   -p password    password for user
   -P             forces interactive prompt for password.  Overrides -
+p or script defaults
   -F filename    reputation snapshot (defaults to \$ENV{HOME}/.[usern
+ame].rep)
   -I             initialize snapshot file.  Must be done first time s
+cript is run
   -n             don't update snapshot file, just compare
   -b             brief output (node numbers only)
   -c             force console output if -e is used
   -z             no console or email output if nothing has changed
   -e             send e-mail (requires -t and -m, optionally -f and/o
+r -s)
   -t             e-mail addressee (yourname\@somesite.com)
   -f             whom the mail should as be from (myname\@planetx.com
+)
   -s             the subject (default is "Perlmonks.org Reputation Ch
+ange Report")
   -m             SMTP mail server address ('mailserver.myserver.com')
   -1             quick reputation report
   -2             detailed reputation change report
   -3             both -1 and -2 (default)
   -d             update mysql database with new/deleted/changed recor
+ds

   The -I and -n options are mutually exclusive.

   -I needs to be used the first time the script is run to initialize 
+the snapshot file.  No 
   other options affect -I, nor are they checked for validity.

   Using -t, -m, -f, or -s does not imply -e, since e-mail defaults ca
+n be embedded in the 
   script.  Specifying these flags without -e is meaningless, but not 
+an error.

   The script can be edited to set defaults for username, password, fi
+lename, mail options, etc.
   If the script is not edited, then -u and -p are always required, as
+ is -t if -e used.  For 
   -t and -f, use the form  '"James T. Kirk" <jtkirk\@starfleet.com>' 
+to get textual names in 
   the To: and From: fields, instead of the 'user\@address' form.

   By default, if -e is used, no output is sent to the console.  The -
+c flag will force the
   output to the console, in addition to mailing.  -c specified withou
+t -e is meaningless, but
   not an error.

ENDOFHELP
}

__END__

# MySQL dump 7.1
#
# Host: localhost    Database: Perlmonks
#--------------------------------------------------------
# Server version        3.22.32

#
# Table structure for table 'Reputation'
#
CREATE TABLE Reputation (
  ReputationID int(10) unsigned DEFAULT '0' NOT NULL auto_increment,
  Type char(1) DEFAULT 'U' NOT NULL,
  NodeId int(10) unsigned DEFAULT '0' NOT NULL,
  Title varchar(160) DEFAULT '' NOT NULL,
  Date datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
  LastReputation int(11) DEFAULT '0' NOT NULL,
  Reputation int(11) DEFAULT '0' NOT NULL,
  Modified timestamp(14),
  PRIMARY KEY (ReputationID)
);
Replies are listed 'Best First'.
Re: xluke_repwalker.pl
by mirod (Canon) on Mar 19, 2001 at 20:26 UTC

    You XP whore you!

    OK, I have just one comment on the use of XML::Twig (besides stating that this is how I use it myself most of the time): if you go through the pain of putting my ($t, $node) = @_; at the top of the anonymous sub you might as well use $node in the body:

    NODE => sub { my ($t, $node) = @_; my $nodeid = $node->att ('id'); !exists ($nodehash {$nodeid}) or croak "N +ode $nodeid is duplicated!"; $nodehash {$nodeid} = {'nodeid' => $nodei +d, 'title' => $node- +>text, 'rep' => $node- +>att ('reputation'), 'last' => $node- +>att ('reputation'), 'date' => $node- +>att ('createtime') }; $t->purge; }

    Also I found that -h does not work (-? does, you should check $args{h} on line 87, and I usually use $0 instead of hard-coding the name of the script in usage().

    Good job anyway, it seems to be much faster than the previous version, 4 to 5 times faster actually!

Re: xluke_repwalker.pl
by mojotoad (Monsignor) on May 09, 2002 at 19:32 UTC
    Simple patch to improve robust handling of node titles with funky characters, such as Ö, Ä, Ü and XML::Simple. Without binary mode this trips up the Text::CSV_XS read and write methods.

    Matt

    --- xluke_repwalker.old.pl Thu May 9 01:24:40 2002 +++ xluke_repwalker.pl Thu May 9 01:24:35 2002 @@ -362,7 +362,8 @@ defined ($fh) or croak "Can't open file \'$filename\": $!"; my $csv = Text::CSV_XS->new ({'always_quote' => 1, - 'eol' => "\n" + 'eol' => "\n", + 'binary' => 1, }); while (<$fh>) @@ -397,7 +398,8 @@ defined ($fh) or croak "Can't create file \"$filename\": $!"; my $csv = Text::CSV_XS->new ({'always_quote' => 1, - 'eol' => "\n" + 'eol' => "\n", + 'binary' => 1, }); for (sort {$a <=> $b} keys %$nodehash)
Re: xluke_repwalker.pl
by mojotoad (Monsignor) on May 18, 2002 at 01:08 UTC
    Patch incorporating PerlMonks::StatsWhore usage. Added -g (histogram) and -H (HTML fetch mode) options. This patch also includes my prior patch regarding the binary flag for CSV.

    Matt

    --- xluke_repwalker.old.pl Fri May 17 07:06:05 2002 +++ xluke_repwalker.pl Fri May 17 07:11:10 2002 @@ -7,6 +7,7 @@ # Version 1.00.30 - 2000/09/02 - Fix error if number of nodes is a m +ultiple of 50 # Version 1.10.00 - 2001/03/17 - Rip out HTML::TableExtract, convert + to XML::Twig # Version 1.10.01 - 2001/03/18 - Fixed mirods comments in node 65444 +# Version 1.??.?? - 2001/05/17 - Patched to use PerlMonks::StatsWhor +e # # Invoke with './luke_repwalker.pl -?' for help # @@ -33,9 +34,9 @@ # Requires: # LWP::Simple # Text::CSV_XS -# MIME::Lite; -# DBI; -# XML::Twig +# MIME::Lite +# DBI +# PerlMonks::StatsWhore # # Copyright 2000,2001(c) J.C.Wren jcwren@jcwren.com # No rights reserved, use as you see fit. I'd like to know about it +, though, just for kicks. @@ -43,18 +44,16 @@ use strict; use Carp; -use XML::Twig; -use LWP::Simple; use Text::CSV_XS; use MIME::Lite; use DBI; use IO::File; use Getopt::Std; +use PerlMonks::StatsWhore; use vars qw($def_username $def_password $def_filename); use vars qw($def_dbhost $def_dbdb $def_dbtable $def_dbuser $def_dbpw) +; use vars qw($def_mto $def_msubject $def_mserver $def_mfrom); -use vars qw($pmsite $pmpagelen); # # Set these accordingly, if you don't want command line parameters. @@ -66,8 +65,6 @@ $def_msubject = 'Perlmonks.org Reputation Change Report'; # default + title $def_mserver = 'localhost'; # default + mailserver $def_mfrom = '%s'; # %s mean +s use the 'to' parameter -$pmsite = 'http://www.perlmonks.net/index.pl'; # vroom's + house of illrepute -$pmpagelen = 50; # article +s returned per page $def_dbhost = 'localhost'; # Where our database is hosted $def_dbdb = 'Perlmonks'; # Name of our database @@ -82,7 +79,7 @@ my %args = (); my $out = ""; - getopts ('u:p:F:t:f:s:m:Inhe?cbzP123d', \%args); + getopts ('u:p:F:t:f:s:m:Inhe?cbzPgH123d', \%args); if ($args{'?'} || $args{h}) { @@ -110,18 +107,19 @@ # # # + + my $stats_whore = PerlMonks::StatsWhore->new(user => $username, pa +ssword => $password); + $stats_whore->mode('HTML') if $args{H}; + if ($args{I}) { - my $hreplist = initialize_rep_file ($username, $password, $file +name); + initialize_rep_file($stats_whore, $filename); if ($args{d}) { - my @nodelist = (); my %dbreplist = (); - push @nodelist, $_ foreach (keys %$hreplist); - - update_replist ('I', \%dbreplist, $hreplist, \@nodelist); + update_replist ('I', $stats_whore, \%dbreplist); db_update (\%dbreplist); } @@ -137,12 +135,15 @@ my $hmailopts = confirm_mailargs ($args{e}, $args{t}, $args{m}, $a +rgs{f}, $args{s}); - my ($outd, $outr, $dbreplist) = compare_reps ($username, $password +, $filename, $args{n}, $args{b}, $args{z}); + my ($outd, $dbreplist) = compare_reps ($stats_whore, $filename, $a +rgs{n}, $args{b}, $args{z}); - if (defined ($outd) && defined ($outr)) + if (defined ($outd)) { my $out; + my $outr; + $outr .= $stats_whore->histogram_as_string . "\n" if $args{g}; + $outr .= $stats_whore->summary_as_string . "\n"; $out = $outr . $outd . "\n"; $out = $outr . "\n" if ($args{1} && !$args{3}); $out = $outd . "\n" if ($args{2} && !$args{3}); @@ -169,9 +170,9 @@ sub compare_reps { - @_ == 6 or croak "Incorrect number of parameters"; + @_ == 5 or croak "Incorrect number of parameters"; - my ($username, $password, $filename, $noupdate, $brief, $zero) = @ +_; + my ($sw, $filename, $noupdate, $brief, $zero) = @_; my @newnodes = (); my @deletednodes = (); my @changednodes = (); @@ -180,16 +181,17 @@ my $outr = undef; my $holdreps = read_file ($filename); - my $hnewreps = get_article_list ($username, $password); + my $hnewreps = $sw->writeups_ref; scalar keys %$hnewreps != 0 or die "You have no articles, perhaps? +\n"; # # Find all the new, deleted and changed entries # - foreach (keys %$hnewreps) {push (@newnodes, $_) if !exists ($h +oldreps->{$_})} + foreach (keys %$hnewreps) {push (@newnodes, $_) if !exists ($h +oldreps->{$_})} foreach (keys %$holdreps) {push (@deletednodes, $_) if !exists ($h +newreps->{$_})} foreach (keys %$holdreps) {push (@changednodes, $_) if exists ($h +newreps->{$_}) && $hnewreps->{$_}->{'rep'} != $holdreps->{$_}->{'rep' +}} + # # For any article in the @changednodes array, move the 'rep' fiel +d from %holdreps into @@ -222,9 +224,7 @@ $outd .= sprintf ("\nChanged nodes: %d\n", scalar @changedno +des) . display_nodelist ($hnewreps, \@changednodes, $longest_title); } - $outr = reputation_report ($hnewreps); - - write_file ($filename, $hnewreps) unless $noupdate; + write_file ($filename, $sw) unless $noupdate; # # This builds the hash that might be written to the database @@ -234,7 +234,7 @@ update_replist ('C', \%replist, $hnewreps, \@changednodes); } - return ($outd, $outr, \%replist); + return ($outd, \%replist); } sub update_replist @@ -252,35 +252,6 @@ } } -sub reputation_report -{ - @_ == 1 or croak "Incorrect number of parameters"; - - my $hrephash = shift; - my $total = 0; - my $repmax = 0; - my $repmin = 999999999; - my $out = ""; - - scalar keys %$hrephash >= 0 or die "You have no articles, perhaps? +\n"; - - for (keys %$hrephash) - { - $total += $hrephash->{$_}->{rep}; - $repmax = max ($repmax, $hrephash->{$_}->{rep}); - $repmin = min ($repmin, $hrephash->{$_}->{rep}); - } - - $out = "\n"; - $out .= sprintf (" Total articles: %d\n", (scalar keys %$hre +phash) - 1); - $out .= sprintf (" Total reputation: %d\n", $total); - $out .= sprintf (" Min reputation: %d\n", $repmin); - $out .= sprintf (" Max reputation: %d\n", $repmax); - $out .= sprintf ("Average reputation: %3.2f\n", $total / ((scalar +keys %$hrephash) - 1)); - - return ($out); -} - sub display_nodelist { @_ == 3 or croak "Incorrect number of parameters"; @@ -314,7 +285,6 @@ foreach (@$hashlist) { my $nodes = $_->{'hash'}; - $linelen = max ($linelen, length ($nodes->{$_}->{'title'})) for +each (@{$_->{'array'}}); } @@ -337,17 +307,13 @@ sub initialize_rep_file { - @_ == 3 or croak "Incorrect number of parameters"; - - my ($username, $password, $filename) = @_; - - my $hnewreps = get_article_list ($username, $password); + @_ == 2 or croak "Incorrect number of parameters"; - scalar keys %$hnewreps >= 0 or die "You have no articles, perhaps? +\n"; + my ($sw, $filename) = @_; - write_file ($filename, $hnewreps); + write_file($filename, $sw); - return ($hnewreps); + return ($sw); } sub read_file @@ -362,7 +328,8 @@ defined ($fh) or croak "Can't open file \'$filename\": $!"; my $csv = Text::CSV_XS->new ({'always_quote' => 1, - 'eol' => "\n" + 'eol' => "\n", + 'binary' => 1, }); while (<$fh>) @@ -390,60 +357,26 @@ { @_ == 2 or croak "Incorrect number of parameters"; - my ($filename, $nodehash) = @_; + my ($filename, $sw) = @_; my $fh = IO::File->new (">$filename"); defined ($fh) or croak "Can't create file \"$filename\": $!"; my $csv = Text::CSV_XS->new ({'always_quote' => 1, - 'eol' => "\n" + 'eol' => "\n", + 'binary' => 1, }); - for (sort {$a <=> $b} keys %$nodehash) + for ($sw->writeups) { - $csv->print ($fh, [ @{ $nodehash->{$_} }{ qw(nodeid title rep d +ate) } ]) or croak "Text::CSV_XS->print failed"; + $csv->print ($fh, [ @{ $_ }{ qw(nodeid title rep date) } ]) or +croak "Text::CSV_XS->print failed"; } $fh->close; } # -# Don't display the URL when we die (which would be more informative +), because the users -# password might be e-mailed somewhere. And we sure don't want some + dweeb to be impersonating -# us on perlmonks.org, do we? -# -sub get_article_list -{ - @_ == 2 or croak "Incorrect number of parameters"; - - my ($username, $password) = @_; - my %nodehash = (); - - $LWP::Simple::FULL_LWP = 1; - - my $page = get ("$pmsite?user=$username&passwd=$password&op=login& +node=User+nodes+info+xml+generator") or croak "Get on $pmsite failed. +"; - - my $twig= new XML::Twig (TwigRoots => - { NODE => sub { + my ($t, $node) = @_; - my $nodeid = $node->att ('id'); - !exists ($nodehash {$nodeid}) or croak "N +ode $nodeid is duplicated!"; - $nodehash {$nodeid} = {'nodeid' => $nodei +d, - 'title' => $node- +>text, - 'rep' => $node- +>att ('reputation'), - 'last' => $node- +>att ('reputation'), - 'date' => $node- +>att ('createtime') - }; - $t->purge; - } - }); - - $twig->parse ($page); - - return (\%nodehash); -} - -# # OK, so if I was really smart, I'd have passed a hash in. Know wha +t? Too much work, # too little return. # @@ -531,6 +464,8 @@ -f whom the mail should as be from (myname\@planetx.co +m) -s the subject (default is "Perlmonks.org Reputation C +hange Report") -m SMTP mail server address ('mailserver.myserver.com' +) + -g histogram report + -H Fetch data via HTML rather than XML -1 quick reputation report -2 detailed reputation change report -3 both -1 and -2 (default) @@ -553,6 +488,8 @@ output to the console, in addition to mailing. -c specified witho +ut -e is meaningless, but not an error. + If you have problems using XML, use the -H option for HTML mode. + ENDOFHELP } @@ -578,6 +515,3 @@ Modified timestamp(14), PRIMARY KEY (ReputationID) ); - - -