http://www.perlmonks.org?node_id=65220
Category: PerlMonks Related Scripts
Author/Contact Info J. C. Wren
jcwren@jcwren.com
Description:

xstatswhore.pl is an update of statswhore.pl that uses mirods XML::Twig module, and includes larryls histogram code (if you like the histogram part, be sure to thank him. ++Re: statswhore.pl). This version has the advantage of being faster by generating fewer hits to the server, and fixing the problem of having exactly a multiple of 50 nodes. And the histogram part is really cool..

I posted this as new code so that the old one would still be available. Some people have had trouble installing the various XML modules, or don't wish to.

#!/usr/local/bin/perl -w
   
#
#  Invoke with './xstatswhore.pl [-u username] [-p password] [-b histo
+gram_binsize | -1]'
#
#  Alternatively, username and/or password can be embedded into the sc
+ript, if you don't want
#  command line arguments.  Use a -b option of -1 to suppress the hist
+ogram.
#
#  Displays a users total writeups, total reputation, along with min, 
+max, and average, and
#  a histogram.  Only works for your own account, since reps are 'prop
+rietary'.
#
#  Thanks to larryl for the histogram patch.  Be sure to give him ++ c
+redit if you like
#  the histogram portion (node id 65199)
#
#  Requires:
#    XML::Twig
#    LWP::Simple
#
#  Copyright 2000,2001(c) J.C.Wren   jcwren@jcwren.com
#  No rights reserved, use as you see fit.  I'd like to know about it,
+ though, just for kicks.
#
#  This module has more code than is actually necessary for the functi
+onality provided.
#  I was originally writing a module for another function, and this ca
+me out of it.  The
#  @articles array that is returned from get_article_list() contains a
+n array reference
#  that has the articles name, prefixed by the string 'node_id=xxx:', 
+where 'xxx' is the
#  number the article title refers to, the reputation of the article, 
+and the date the
#  article was written.  I'm sure you can imagine some uses for this..
+.
#
#  2000/08/03 - 1.00.00 - Initial release
#  2001/03/17 - 1.10.00 - Changed XML::TableExtract to using XML::Twig
+, added larryl's
#                         histogram code
#  2001/03/18 - 1.10.01 - Applied mirods change from node 65444
#

use strict;
use Carp;
use LWP::Simple;
use Getopt::Std;
use XML::Twig;
use POSIX qw(ceil floor);

my $def_username = "";  # Set this to your user name if you don't want
+ to use the -u option
my $def_password = "";  # Set this to your pass word if you don't want
+ to use the -p option
my $def_binsize = 5;    # Bin size for reputations.  Set to -1 to disa
+ble histograms.
my $pmsite = "http://www.perlmonks.org/index.pl";

{
   my %args = ();

   getopts ('u:p:b:', \%args);

   my $username = $args{u} || $def_username;
   my $password = $args{p} || $def_password;
   my $binsize  = $args{b} || $def_binsize;

   die "No password and/or username.  Program terminated.\n" if (!$use
+rname || !$password);

   my $hrephash = get_article_list ($username, $password) or croak "Ge
+t on $pmsite failed.";
   my $hsummary = summarize ($username, $hrephash) or croak "You have 
+no articles, perhaps?\n";;
   show_reps ($hsummary);
   show_histogram ($binsize, $hrephash, $hsummary) unless $binsize < 0
+;
}

#
#  Display the user that's the whole point of the program
#
sub show_reps
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $hsummary = shift;

   print  "\n";
   printf ("              User: %s\n",    $hsummary->{username});
   printf ("    Total articles: %d\n",    $hsummary->{articles});
   printf ("  Total reputation: %d\n",    $hsummary->{reputation});
   printf ("    Min reputation: %d\n",    $hsummary->{repmin});
   printf ("    Max reputation: %d\n",    $hsummary->{repmax});
   printf ("Average reputation: %3.2f\n", $hsummary->{average});
   print  "\n";
}

#
#  We subtract one from the total hash count because the site XML retu
+rns
#  the users homenode as a article (Dog knows why... Ask Vroom)
#
sub summarize
{
   @_ == 2 or croak "Incorrect number of parameters";

   my ($username, $hrephash) = @_;
   my $total = 0;
   my $repmax = 0;
   my $repmin = 999999999;
   my %hsummary = ();

   ((scalar keys %$hrephash) - 1) >= 0 or return undef;

   for (keys %$hrephash)
   {
      $total += $hrephash->{$_}->{rep};
      $repmax = max ($repmax, $hrephash->{$_}->{rep});
      $repmin = min ($repmin, $hrephash->{$_}->{rep});
   }

   $hsummary {articles}   = (scalar keys %$hrephash) - 1;
   $hsummary {repmax}     = $repmax;
   $hsummary {repmin}     = $repmin;
   $hsummary {reputation} = $total;
   $hsummary {average}    = $total / ((scalar keys %$hrephash) - 1);
   $hsummary {username}   = $username;
   
   return (\%hsummary);
}

#
#  Gets the XML from the site.  Much more reliable that the old 'get e
+ach
#  page of articles' method.  And for those verbose people (tilly...),
+ it
#  should result in a 26x reduction of server hits.
#
sub get_article_list
{
   @_ == 2 or croak "Incorrect number of parameters";

   my ($username, $password) = @_;
   my %nodehash = ();

   $LWP::Simple::FULL_LWP = 1;

   my $page = get ("$pmsite?user=$username&passwd=$password&op=login&n
+ode=User+nodes+info+xml+generator") or return undef;

   my $twig= new XML::Twig (TwigRoots => 
            { NODE => sub { my ($t, $node) = @_;
                            my $nodeid = $node->att ('id');
                            !exists ($nodehash {$nodeid}) or croak "No
+de $nodeid is duplicated!";
                            $nodehash {$nodeid} = {'nodeid' => $nodeid
+,
                                                   'title'  => $node->
+text,
                                                   'rep'    => $node->
+att ('reputation'),
                                                   'last'   => $node->
+att ('reputation'),
                                                   'date'   => $node->
+att ('createtime')
                                                  };
                            $t->purge;
                          }
            });

   $twig->parse ($page);

   return (\%nodehash);
}

#
#  This code was contributed by larryl.  I mucked with it a little bit
+, passing in the
#  summary hash, and a little reformatting.  This is a great idea, and
+ I'm glad it was
#  contributed.  I'da never thought of it...
#
sub show_histogram
{
   @_ == 3 or croak "Incorrect number of parameters";

   my ($binsize, $hrephash, $hsummary) = @_;

   #
   # Divide articles into bins based on reputation:
   #
   my %bins = ();

   $bins {floor (($hrephash->{$_}->{rep} + 0.5) / $binsize)}++ foreach
+ (keys %$hrephash);

   my @bins   = sort {$a <=> $b} keys %bins;
   my $minbin = $bins [0];    # lowest reputation bin
   my $maxbin = $bins [-1];   # highest reputation bin

   #
   # Try to keep histogram on one page:
   #
   my $width  = 50;
   my $scale  = 1;
   my $maxrep = $hsummary->{repmax};

   if ($maxrep > $width && $maxrep <= ($width * 5)) 
   {
      $scale = 5;
   }
   elsif ($maxrep > ($width*5)) 
   {
      while (($maxrep / $scale) > $width) 
      {
         $scale *= 10;
      }
   }

   my $start = $minbin * $binsize;
   my $end   = $start + $binsize - 1;

   print "  Reputation   Article Count\n";
   print "------------- -------", "-" x 50, "\n";

   do 
   {
      my $count = $bins {$minbin} || 0;
      my $extra = ($count % $scale) ? '.' : '';
      printf "%4d .. %4d  \[%4d\] %s$extra\n", $start, $end, $count, '
+#' x ceil ($count / $scale);
      $start += $binsize;
      $end   += $binsize;
   } 
   while ($minbin++ < $maxbin);

   print "\n  Scale: #=$scale\n\n" if $scale > 1;
}

#
#  Gotta wonder why these aren't core functions...
#
sub max
{
   my ($a, $b) = @_;

   return ($a > $b ? $a : $b);
}

sub min
{
   my ($a, $b) = @_;

   return ($a < $b ? $a : $b);
}