Beefy Boxes and Bandwidth Generously Provided by pair Networks
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

statswhore.pl

by jcwren (Prior)
on Aug 04, 2000 at 08:18 UTC ( #26134=sourcecode: print w/ replies, xml ) Need Help??

Category: Perlmonks.org Related Scripts
Author/Contact Info Chris 'jcwren' Wren
jcwren@jcwren.com
Description: Extracts a users total writeups, and total repuration, along with min, max, and average. Account and password can be embedded into the program, or supplied on the command line
#!/usr/local/bin/perl -w
   
#
#  Invoke with './statswhore.pl [-u username] [-p password]'
#
#  Alternatively, username and/or password can be embedded into the sc
+ript, if you don't want
#  command line arguments.
#
#  Displays a users total writeups, total reputation, along with min, 
+max, and average.  Only 
#  works for your own account, since reps are 'proprietary'
#
#  Requires:
#    HTML::TableExtract
#    LWP::Simple
#
#  Copyright 2000(c) J.C.Wren   jcwren@jcwren.com
#  No rights reserved, use as you see fit.  I'd like to know about it,
+ though, just for kicks.
#
#  This module has more code than is actually necessary for the functi
+onality provided.
#  I was originally writing a module for another function, and this ca
+me out of it.  The
#  @articles array that is returned from get_article_list() contains a
+n array reference
#  that has the articles name, prefixed by the string 'node_id=xxx:', 
+where 'xxx' is the
#  number the article title refers to, the reputation of the article, 
+and the date the
#  article was written.  I'm sure you can imagine some uses for this..
+.
#

use strict;
use LWP::Simple;
use Getopt::Std;

my $def_username = "";  # Set this to your user name if you don't want
+ to use the -u option
my $def_password = "";  # Set this to your pass word if you don't want
+ to use the -p option
my $pmsite = "http://www.perlmonks.org/index.pl";

{
   my %args = ();

   getopts ('u:p:', \%args);

   my $username = $args{u} || $def_username;
   my $password = $args{p} || $def_password;

   die "No password and/or username.  Program terminated.\n" if (!$use
+rname || !$password);

   show_reps ($username, $password);
}

sub show_reps
{
   @_ == 2 or die "Incorrect number of parameters";

   my ($username, $password) = @_;
   my $total = 0;

   my $rarticles = get_article_list ($username, $password);

   die "You have no articles, perhaps?\n" unless ($#$rarticles >= 0);

   $total += @$_[1] for (@$rarticles);
   @$rarticles = sort {@$a[1] <=> @$b[1]} @$rarticles;

   print "\n";
   print sprintf ("              User: %s\n",    $username);
   print sprintf ("    Total articles: %d\n",    $#$rarticles + 1);
   print sprintf ("  Total reputation: %d\n",    $total);
   print sprintf ("    Min reputation: %d\n",    @$rarticles [0]->[1])
+;
   print sprintf ("    Max reputation: %d\n",    @$rarticles [-1]->[1]
+);
   print sprintf ("Average reputation: %3.2f\n", $total / ($#$rarticle
+s + 1));
   print "\n";
}

sub get_article_list
{
   @_ == 2 or die "Incorrect number of parameters";

   my ($username, $password) = @_;
   my @articles = ();

   $LWP::Simple::FULL_LWP = 1;

   for (my $i = 0; 1; $i += 50)
   {
      my $url = "$pmsite?user=$username&passwd=$password&op=login&node
+=perl+monks+user+search&usersearch=$username&orderby=createtime%20des
+c&count=$i";

      if (my $page = get ($url))
      {
         last if (get_article_page ($page, ['Writeup', 'Rep', 'Create 
+Time'], \@articles, $i % 50) < 50);
      }
      else
      {
         die "Get on $url failed.";
      }
   }

   return (\@articles);
}

sub get_article_page
{
   @_ == 4 or die "Incorrect number of parameters";

   my ($html, $tablecols, $rarticles, $lines) = @_;
   my $rowcnt = 0;

   $html =~ s/bgcolor=>/bgcolor="">/mg;

   my $te = new jcwExtract (headers => $tablecols)->parse ($html);

   die sprintf ("Wrong number of tables (%d) returned! (Probably bad u
+sername/password)\n", scalar $te->table_states) if (scalar $te->table
+_states != 1) ;

   foreach my $ts ($te->table_states) 
   {
      foreach ($ts->rows)
      {
         last if (@$_[2] !~ /\d+-\d+\d+/);
         push @$rarticles, [ @$_ ];
         $rowcnt++;
      }
   }

   return ($rowcnt);
}

BEGIN
{
   #
   #  This is not good code.  It's really evil and the both the author
+ of this package, and the author
   #  of HTML::TableExtract should be severely beaten about the head a
+nd shoulders.
   #
   package jcwExtract;
   
   use strict;
   use HTML::TableExtract;
   
   @jcwExtract::ISA = qw(HTML::TableExtract HTML::TableExtract::TableS
+tate);
   
   my $node_id = undef;
   
   {
      local $^W = 0;
      #
      #  Override the _add_text mode that if $node_id is defined, we'l
+l insert the node_id
      #  value at the front of the string.
      #
      eval 'sub HTML::TableExtract::TableState::_add_text 
         {
            my ($self, $txt, $skew_column) = @_;
            defined $txt or return;
            my $row = $self->{content}[$#{$self->{content}}];
            $txt = sprintf ("node_id=%d:%s", $node_id, $txt) if define
+d ($node_id);
            $node_id = undef;
            $row->[$skew_column] .= $txt;
            $txt;
         }';
   }
   
   #
   #  Overridden start method, so we can look for <A HREF=...> tags
   #
   sub start
   {
      my $self = shift;
      my ($tag, $attr, $attrseq, $origtext) = @_;
   
      $self->SUPER::start (@_);
   
      #
      #  If it's a <A HREF=...> tag, and has a node_id, then $1 will c
+ontain the node_id.
      #  If we're in a table cell, set $node_id to $1, otherwise undef
+ it.  We don't simply
      #  set $node_id to undef if it's not a <A> tag, because we want 
+to save the last value
      #  if there is a subsequent <B> or <i> or somesuch tag between t
+he <A> and </A> tags.
      #
      if ($tag eq 'a' && defined ($attr->{'href'}) && $attr->{'href'} 
+=~ /\bnode_id=(\d+)/i)
      {
         $node_id = ($self->_current_table_state->{in_cell}) ? $1 : un
+def;
      }
   }
   
   1;
}

Comment on statswhore.pl
Download Code
Replies are listed 'Best First'.
Re: statswhore.pl
by larryl (Scribe) on Mar 18, 2001 at 01:27 UTC

    Here's a patch that will give you histogram output like so:

    User: larryl Total articles: 23 Total reputation: 186 Min reputation: -4 Max reputation: 22 Average reputation: 8.09 Reputation Article Count ------------- ------------------------------------------------------ -5 .. -1 [ 1] # 0 .. 4 [ 7] ####### 5 .. 9 [ 4] #### 10 .. 14 [ 7] ####### 15 .. 19 [ 3] ### 20 .. 24 [ 1] #

    The patch:

    rcsdiff -c -r1.1 -r1.2 statswhore.pl =================================================================== RCS file: RCS/statswhore.pl,v retrieving revision 1.1 retrieving revision 1.2 diff -c -r1.1 -r1.2 *** /tmp/T0fIaia5 Sat Mar 17 15:21:09 2001 --- /tmp/T1gIaia5 Sat Mar 17 15:21:09 2001 *************** *** 1,7 **** #!/usr/local/bin/perl -w # ! # Invoke with './statswhore.pl [-u username] [-p password]' # # Alternatively, username and/or password can be embedded into the +script, if you don't want # command line arguments. --- 1,9 ---- #!/usr/local/bin/perl -w + use strict; # ! # Invoke with ! # './statswhore.pl [-u username] [-p password] [-b histogram_bi +nsize]' # # Alternatively, username and/or password can be embedded into the +script, if you don't want # command line arguments. *************** *** 27,55 **** use strict; use LWP::Simple; use Getopt::Std; my $def_username = ""; # Set this to your user name if you don't wa +nt to use the -u option my $def_password = ""; # Set this to your pass word if you don't wa +nt to use the -p option my $pmsite = "http://www.perlmonks.org/index.pl"; { my %args = (); ! getopts ('u:p:', \%args); my $username = $args{u} || $def_username; my $password = $args{p} || $def_password; die "No password and/or username. Program terminated.\n" if (!$u +sername || !$password); ! show_reps ($username, $password); } sub show_reps { ! @_ == 2 or die "Incorrect number of parameters"; ! my ($username, $password) = @_; my $total = 0; my $rarticles = get_article_list ($username, $password); --- 29,60 ---- use strict; use LWP::Simple; use Getopt::Std; + use POSIX qw(ceil floor); my $def_username = ""; # Set this to your user name if you don't wa +nt to use the -u option my $def_password = ""; # Set this to your pass word if you don't wa +nt to use the -p option + my $def_binsize = 5; # bin size for reputations my $pmsite = "http://www.perlmonks.org/index.pl"; { my %args = (); ! getopts ('u:p:b:', \%args); my $username = $args{u} || $def_username; my $password = $args{p} || $def_password; + my $binsize = $args{b} || $def_binsize; die "No password and/or username. Program terminated.\n" if (!$u +sername || !$password); ! show_reps ($username, $password, $binsize); } sub show_reps { ! @_ == 3 or die "Incorrect number of parameters"; ! my ($username, $password, $binsize) = @_; my $total = 0; my $rarticles = get_article_list ($username, $password); *************** *** 67,72 **** --- 72,79 ---- print sprintf (" Max reputation: %d\n", @$rarticles [-1]->[ +1]); print sprintf ("Average reputation: %3.2f\n", $total / ($#$rartic +les + 1)); print "\n"; + show_histogram($binsize, $total, $rarticles); + print "\n"; } sub get_article_list *************** *** 121,126 **** --- 128,176 ---- return ($rowcnt); } + sub show_histogram + { + my ($binsize, $total, $rarticles) = @_; + + # Divide articles into bins based on reputation: + my %bins = (); + for ( @$rarticles ) { + my $rep = @$_[1]; + $bins{floor(($rep+.5)/$binsize)}++; + } + + my @bins = sort {$a<=>$b} keys %bins; + my $bin = $bins[0]; # lowest reputation bin + my $maxbin = $bins[-1]; # highest reputation bin + + # Try to keep histogram on one page: + my $width = 50; + my $scale = 1; + my $maxrep = @$rarticles [-1]->[1]; + if ( $maxrep > $width && $maxrep <= ($width*5) ) { + $scale = 5; + } + elsif ( $maxrep > ($width*5) ) { + while ( ($maxrep/$scale) > $width ) { + $scale *= 10; + } + } + + my $start = $bin * $binsize; + my $end = $start + $binsize - 1; + print " Reputation Article Count\n"; + print "------------- -------", "-" x 50, "\n"; + do { + my $count = $bins{$bin} || 0; + my $extra = ( $count % $scale ) ? '.' : ''; + printf "%4d .. %4d \[%4d\] %s$extra\n", + $start, $end, $count, '#' x ceil($count/$scale); + $start += $binsize; + $end += $binsize; + } while ( $bin++ < $maxbin ); + print "\n Scale: #=$scale\n" if $scale > 1; + } + BEGIN { #

Re: statswhore.pl
by darobin (Monk) on Mar 28, 2001 at 18:35 UTC

    The following simplistic patch adds reports that differentiate between Re: nodes reputation and top-level articles reputation, based on the idea that those two categories are often voted on differently.

    --- statswhore_old.pl Wed Mar 28 15:27:03 2001 +++ statswhore.pl Wed Mar 28 15:24:24 2001 @@ -59,13 +59,44 @@ $total += @$_[1] for (@$rarticles); @$rarticles = sort {@$a[1] <=> @$b[1]} @$rarticles; + # added by darobin + my (@top_articles,@re_articles); + my $top_total = 0; + my $re_total = 0; + for my $art (@$rarticles) { + $art->[0] =~ s/^node_id=\d+://i; + if ($art->[0] =~ /^\s*Re:/i) { + push @re_articles, $art; + $re_total += $art->[1]; + } + else { + push @top_articles, $art; + $top_total += $art->[1]; + } + } + @top_articles = sort { $a->[1] <=> $b->[1] } @top_articles; + @re_articles = sort { $a->[1] <=> $b->[1] } @re_articles; + print "\n"; - print sprintf (" User: %s\n", $username); - print sprintf (" Total articles: %d\n", $#$rarticles + 1); - print sprintf (" Total reputation: %d\n", $total); - print sprintf (" Min reputation: %d\n", @$rarticles [0]->[1] +); - print sprintf (" Max reputation: %d\n", @$rarticles [-1]->[1 +]); - print sprintf ("Average reputation: %3.2f\n", $total / ($#$rarticl +es + 1)); + print sprintf (" User: %s\n", $userna +me); + print sprintf (" Total articles: %d\n", $#$rart +icles + 1); + print sprintf (" Total reputation: %d\n", $total) +; + print sprintf (" Min reputation: %d\n", @$rarti +cles [0]->[1]); + print sprintf (" Max reputation: %d\n", @$rarti +cles [-1]->[1]); + print sprintf (" Avg reputation: %3.2f\n\n", $tota +l / ($#$rarticles + 1)); + + print sprintf (" Total top-level posts: %d\n", scalar @to +p_articles); + print sprintf ("Total top-level post reputation: %d\n", $top_total +); + print sprintf (" Max top-level post reputation: %d\n", $top_artic +les[-1]->[1]); + print sprintf (" Min top-level post reputation: %d\n", $top_artic +les[0]->[1]); + print sprintf (" Avg top-level post reputation: %3.2f\n\n", (@top +_articles)?($top_total / scalar @top_articles):0); + + print sprintf (" Total reply posts: %d\n", scalar @re +_articles); + print sprintf (" Total reply post reputation: %d\n", $re_total) +; + print sprintf (" Max reply post reputation: %d\n", $re_articl +es[-1]->[1]); + print sprintf (" Min reply post reputation: %d\n", $re_articl +es[0]->[1]); + print sprintf (" Avg reply post reputation: %3.2f\n", (@re_ar +ticles)?($re_total / scalar @re_articles):0); + print "\n"; }

    -- darobin

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://26134]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others rifling through the Monastery: (6)
As of 2015-07-29 02:11 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (260 votes), past polls