Beefy Boxes and Bandwidth Generously Provided by pair Networks
Clear questions and runnable code
get the best and fastest answer
 
PerlMonks  

xstatswhore.pl

by jcwren (Prior)
on Mar 18, 2001 at 04:16 UTC ( #65220=sourcecode: print w/ replies, xml ) Need Help??

Category: PerlMonks.org Related Scripts
Author/Contact Info J. C. Wren
jcwren@jcwren.com
Description:

xstatswhore.pl is an update of statswhore.pl that uses mirods XML::Twig module, and includes larryls histogram code (if you like the histogram part, be sure to thank him. ++Re: statswhore.pl). This version has the advantage of being faster by generating fewer hits to the server, and fixing the problem of having exactly a multiple of 50 nodes. And the histogram part is really cool..

I posted this as new code so that the old one would still be available. Some people have had trouble installing the various XML modules, or don't wish to.

#!/usr/local/bin/perl -w
   
#
#  Invoke with './xstatswhore.pl [-u username] [-p password] [-b histo
+gram_binsize | -1]'
#
#  Alternatively, username and/or password can be embedded into the sc
+ript, if you don't want
#  command line arguments.  Use a -b option of -1 to suppress the hist
+ogram.
#
#  Displays a users total writeups, total reputation, along with min, 
+max, and average, and
#  a histogram.  Only works for your own account, since reps are 'prop
+rietary'.
#
#  Thanks to larryl for the histogram patch.  Be sure to give him ++ c
+redit if you like
#  the histogram portion (node id 65199)
#
#  Requires:
#    XML::Twig
#    LWP::Simple
#
#  Copyright 2000,2001(c) J.C.Wren   jcwren@jcwren.com
#  No rights reserved, use as you see fit.  I'd like to know about it,
+ though, just for kicks.
#
#  This module has more code than is actually necessary for the functi
+onality provided.
#  I was originally writing a module for another function, and this ca
+me out of it.  The
#  @articles array that is returned from get_article_list() contains a
+n array reference
#  that has the articles name, prefixed by the string 'node_id=xxx:', 
+where 'xxx' is the
#  number the article title refers to, the reputation of the article, 
+and the date the
#  article was written.  I'm sure you can imagine some uses for this..
+.
#
#  2000/08/03 - 1.00.00 - Initial release
#  2001/03/17 - 1.10.00 - Changed XML::TableExtract to using XML::Twig
+, added larryl's
#                         histogram code
#  2001/03/18 - 1.10.01 - Applied mirods change from node 65444
#

use strict;
use Carp;
use LWP::Simple;
use Getopt::Std;
use XML::Twig;
use POSIX qw(ceil floor);

my $def_username = "";  # Set this to your user name if you don't want
+ to use the -u option
my $def_password = "";  # Set this to your pass word if you don't want
+ to use the -p option
my $def_binsize = 5;    # Bin size for reputations.  Set to -1 to disa
+ble histograms.
my $pmsite = "http://www.perlmonks.org/index.pl";

{
   my %args = ();

   getopts ('u:p:b:', \%args);

   my $username = $args{u} || $def_username;
   my $password = $args{p} || $def_password;
   my $binsize  = $args{b} || $def_binsize;

   die "No password and/or username.  Program terminated.\n" if (!$use
+rname || !$password);

   my $hrephash = get_article_list ($username, $password) or croak "Ge
+t on $pmsite failed.";
   my $hsummary = summarize ($username, $hrephash) or croak "You have 
+no articles, perhaps?\n";;
   show_reps ($hsummary);
   show_histogram ($binsize, $hrephash, $hsummary) unless $binsize < 0
+;
}

#
#  Display the user that's the whole point of the program
#
sub show_reps
{
   @_ == 1 or croak "Incorrect number of parameters";

   my $hsummary = shift;

   print  "\n";
   printf ("              User: %s\n",    $hsummary->{username});
   printf ("    Total articles: %d\n",    $hsummary->{articles});
   printf ("  Total reputation: %d\n",    $hsummary->{reputation});
   printf ("    Min reputation: %d\n",    $hsummary->{repmin});
   printf ("    Max reputation: %d\n",    $hsummary->{repmax});
   printf ("Average reputation: %3.2f\n", $hsummary->{average});
   print  "\n";
}

#
#  We subtract one from the total hash count because the site XML retu
+rns
#  the users homenode as a article (Dog knows why... Ask Vroom)
#
sub summarize
{
   @_ == 2 or croak "Incorrect number of parameters";

   my ($username, $hrephash) = @_;
   my $total = 0;
   my $repmax = 0;
   my $repmin = 999999999;
   my %hsummary = ();

   ((scalar keys %$hrephash) - 1) >= 0 or return undef;

   for (keys %$hrephash)
   {
      $total += $hrephash->{$_}->{rep};
      $repmax = max ($repmax, $hrephash->{$_}->{rep});
      $repmin = min ($repmin, $hrephash->{$_}->{rep});
   }

   $hsummary {articles}   = (scalar keys %$hrephash) - 1;
   $hsummary {repmax}     = $repmax;
   $hsummary {repmin}     = $repmin;
   $hsummary {reputation} = $total;
   $hsummary {average}    = $total / ((scalar keys %$hrephash) - 1);
   $hsummary {username}   = $username;
   
   return (\%hsummary);
}

#
#  Gets the XML from the site.  Much more reliable that the old 'get e
+ach
#  page of articles' method.  And for those verbose people (tilly...),
+ it
#  should result in a 26x reduction of server hits.
#
sub get_article_list
{
   @_ == 2 or croak "Incorrect number of parameters";

   my ($username, $password) = @_;
   my %nodehash = ();

   $LWP::Simple::FULL_LWP = 1;

   my $page = get ("$pmsite?user=$username&passwd=$password&op=login&n
+ode=User+nodes+info+xml+generator") or return undef;

   my $twig= new XML::Twig (TwigRoots => 
            { NODE => sub { my ($t, $node) = @_;
                            my $nodeid = $node->att ('id');
                            !exists ($nodehash {$nodeid}) or croak "No
+de $nodeid is duplicated!";
                            $nodehash {$nodeid} = {'nodeid' => $nodeid
+,
                                                   'title'  => $node->
+text,
                                                   'rep'    => $node->
+att ('reputation'),
                                                   'last'   => $node->
+att ('reputation'),
                                                   'date'   => $node->
+att ('createtime')
                                                  };
                            $t->purge;
                          }
            });

   $twig->parse ($page);

   return (\%nodehash);
}

#
#  This code was contributed by larryl.  I mucked with it a little bit
+, passing in the
#  summary hash, and a little reformatting.  This is a great idea, and
+ I'm glad it was
#  contributed.  I'da never thought of it...
#
sub show_histogram
{
   @_ == 3 or croak "Incorrect number of parameters";

   my ($binsize, $hrephash, $hsummary) = @_;

   #
   # Divide articles into bins based on reputation:
   #
   my %bins = ();

   $bins {floor (($hrephash->{$_}->{rep} + 0.5) / $binsize)}++ foreach
+ (keys %$hrephash);

   my @bins   = sort {$a <=> $b} keys %bins;
   my $minbin = $bins [0];    # lowest reputation bin
   my $maxbin = $bins [-1];   # highest reputation bin

   #
   # Try to keep histogram on one page:
   #
   my $width  = 50;
   my $scale  = 1;
   my $maxrep = $hsummary->{repmax};

   if ($maxrep > $width && $maxrep <= ($width * 5)) 
   {
      $scale = 5;
   }
   elsif ($maxrep > ($width*5)) 
   {
      while (($maxrep / $scale) > $width) 
      {
         $scale *= 10;
      }
   }

   my $start = $minbin * $binsize;
   my $end   = $start + $binsize - 1;

   print "  Reputation   Article Count\n";
   print "------------- -------", "-" x 50, "\n";

   do 
   {
      my $count = $bins {$minbin} || 0;
      my $extra = ($count % $scale) ? '.' : '';
      printf "%4d .. %4d  \[%4d\] %s$extra\n", $start, $end, $count, '
+#' x ceil ($count / $scale);
      $start += $binsize;
      $end   += $binsize;
   } 
   while ($minbin++ < $maxbin);

   print "\n  Scale: #=$scale\n\n" if $scale > 1;
}

#
#  Gotta wonder why these aren't core functions...
#
sub max
{
   my ($a, $b) = @_;

   return ($a > $b ? $a : $b);
}

sub min
{
   my ($a, $b) = @_;

   return ($a < $b ? $a : $b);
}

Comment on xstatswhore.pl
Download Code

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://65220]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others having an uproarious good time at the Monastery: (8)
As of 2014-12-28 04:56 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (178 votes), past polls