package PerlMonks::StatsWhore; # Three Scripts for the Acolytes under the sky, # seven for the Friar-lords in their halls of stone, # nine for Mortal Bishops doomed to die, # one for the Dark Saint on his dark throne # in the Land of Monkwhore where the Black Stats lie. # One Mod to rule them all, # one Mod to find them, # one Mod to bring them all # and in the darkness bind them # in the Land of Monkwhore where the Black Stats lie. # # # Mucho original code by jcwren and larryl # Objectified, merged, and tweaked by mojotoad # # Based on statswhore.pl, xstatswhore.pl, parts # of luke_repwalker.pl and xluke_repwalker.pl # # See the POD for instructions. # # This code is public domain. Feel free to contact jcwren with tales # of interesting applications. use LWP::Simple; use URI; use Carp; use POSIX qw(ceil floor); use vars qw($VERSION); $VERSION = '1.04'; my %Defaults = ( mode => 'XML', binsize => 5, ); sub new { my $self = {%Defaults}; bless $self, shift; my %parms = @_; $self->{$_} = $parms{$_} for keys %parms; $self->reset; $self; } sub user { my $self = shift; if (@_) { $self->{user} = shift; $self->reset; } $self->{user}; } sub password { my $self = shift; if (@_) { $self->{password} = shift; $self->reset; } $self->{password}; } sub mode { my $self = shift; if (@_) { $self->{mode} = shift; $self->reset; } $self->{mode}; } sub binsize { my $self = shift; if (@_) { $self->reset_histogram; $self->{binsize} = shift; } $self->{binsize}; } sub reset { my $self = shift; delete $self->{cache}; $self->reset_summary; $self->reset_histogram; } sub reset_summary { delete shift->{summary} } sub reset_histogram { delete shift->{histogram} } sub writeups_ref { my $self = shift; $self->fetch unless $self->{cache}; $self->{cache}; } sub node_ids { sort { $a <=> $b } keys %{shift->writeups_ref} } sub writeups { my $self = shift; @{$self->writeups_ref}{$self->node_ids}; } sub writeup_count { my $self = shift; $self->fetch; scalar keys %{$self->{cache}}; } sub fetch { my $self = shift; return $self->{cache} if ref $self->{cache}; my $fetch_class = __PACKAGE__ . '::' . $self->mode; eval "require $fetch_class"; my $fetcher = $fetch_class->new(user => $self->user, password => $self->password); ref $fetcher or croak "Could not load class $fetch_class\n"; $self->{cache} = $fetcher->nodes(@_); } sub summary { # code originally by jcwren # Crammed into object by mojotoad my $self = shift; return $self->{summary} if ref $self->{summary}; my $total = 0; my $repmax = 0; my $repmin = 999999999; $self->{summary}{repmax} = 0; $self->{summary}{repmin} = 999999999; foreach my $node ($self->writeups) { $total += $node->{rep}; $self->{summary}{repmax} = max($self->{summary}{repmax}, $node->{rep}); $self->{summary}{repmin} = min($self->{summary}{repmin}, $node->{rep}); } $self->{summary}{reputation} = $total; $self->{summary}{average} = $total / $self->writeup_count; $self->{summary}; } sub repmax { shift->summary->{repmax} } sub repmin { shift->summary->{repmin} } sub reputation { shift->summary->{reputation} } sub average { shift->summary->{average} } sub summary_as_string { # Code originally by jcwren # Crammed into object method by mojotoad my $self = shift; my $total = 0; $total += @$_[1] for (@$rarticles); @$rarticles = sort {@$a[1] <=> @$b[1]} @$rarticles; my $str; $str .= sprintf (" User: %s\n", $self->user); $str .= sprintf (" Total articles: %d\n", $self->writeup_count); $str .= sprintf (" Total reputation: %d\n", $self->reputation); $str .= sprintf (" Min reputation: %d\n", $self->repmin); $str .= sprintf (" Max reputation: %d\n", $self->repmax); $str .= sprintf ("Average reputation: %3.2f\n", $self->average); $str .= "\n"; $str; } sub histogram_as_string { # histogram code originally by larryl, modified by jcwren. # Unceremoniously crammed into an object method by mojotoad. my $self = shift; # Divide articles into bins based on reputation: my %bins = (); my $binsize = $self->binsize; $bins{floor (($_->{rep} + 0.5) / $binsize)}++ foreach ($self->writeups); my @bins = sort {$a <=> $b} keys %bins; my $minbin = $bins [0]; # lowest reputation bin my $maxbin = $bins [-1]; # highest reputation bin # Try to keep histogram on one page: my $width = 50; my $scale = 1; my $maxrep = $self->repmax; if ($maxrep > $width && $maxrep <= ($width * 5)) { $scale = 5; } elsif ($maxrep > ($width*5)) { while (($maxrep / $scale) > $width) { $scale *= 10; } } my $start = $minbin * $binsize; my $end = $start + $binsize - 1; my $str; $str .= " Reputation Article Count\n"; $str .= "------------- -------" . "-" x 50 . "\n"; do { my $count = $bins {$minbin} || 0; my $extra = ($count % $scale) ? '.' : ''; $str .= sprintf "%4d .. %4d \[%4d\] %s$extra\n", $start, $end, $count, '#' x ceil ($count / $scale); $start += $binsize; $end += $binsize; } while ($minbin++ < $maxbin); $str .= "\n Scale: #=$scale\n" if $scale > 1; $str; } sub max { my ($x, $y) = @_; return ($x > $y ? $x : $y); } sub min { my ($x, $y) = @_; return ($x < $y ? $x : $y); } #################################### { package PerlMonks::StatsWhore::Base; # Main accessor: nodes() # # Subclass and override extract_writeups() and init_query() use strict; use Carp; use LWP::Simple; my %Defaults = ( url => 'http://www.perlmonks.org/index.pl' ); sub new { my $class = shift; my %parms = @_; foreach (keys %Defaults) { next if defined $parms{$_}; $parms{$_} = $Defaults{$_}; } my $self = \%parms; bless $self, $class; $self->uri($self->make_uri($self->{url})) unless $self->uri; $self->reset_writeups; $self->init_query; $self; } sub user { my $self = shift; @_ ? $self->{user} = shift : $self->{user}; } sub password { my $self = shift; @_ ? $self->{password} = shift : $self->{password}; } sub uri { my $self = shift; @_ ? $self->{uri} = shift : $self->{uri} } sub make_uri { shift; URI->new(@_) } sub set_query { my $self = shift; my %parms = @_; if (defined $self->user) { $parms{user} = $self->user; $parms{op} = 'login'; } if (defined $self->password) { $parms{passwd} = $self->password; } $self->uri->query_form(%parms) } sub fetch { my $self = shift; my $str = get($self->uri) or croak("Fetch failed for " . $self->uri->as_string, "\n"); $str; } sub nodes { my $self = shift; $self->extract_writeups($self->fetch(@_)) unless scalar keys %{$self->{writeups}}; $self->{writeups}; } sub add_writeups { my($self, $wref) = @_; foreach (keys %$wref) { $self->{writeups}{$_} = $wref->{$_}; } } sub reset_writeups { shift->{writeups} = {} } # Override sub extract_writeups { shift->add_writeups(@_) } sub init_query { croak "init_query() method must be overidden\n" } } { package PerlMonks::StatsWhore::XML; use strict; use Carp; require XML::Twig; use base qw(PerlMonks::StatsWhore::Base); sub init_query { my $self = shift; $self->set_query( node => 'User nodes info xml generator' ); } sub extract_writeups { my($self, $page) = @_; $self->reset_writeups; return unless $page; my %nodehash = (); my $twig = $self->make_xml_twig( TwigRoots => { NODE => $self->make_xml_twig_sub(\%nodehash) } ); $twig->parse($page); # Remove home node from results foreach (keys %nodehash) { if ($nodehash{$_}{title} eq $self->{user}) { delete $nodehash{$_}; last; } } $self->add_writeups(\%nodehash); } sub make_xml_twig { shift; XML::Twig->new(@_) } sub make_xml_twig_sub { my($self, $nodehash) = @_; ref $nodehash or croak "Hash ref required.\n"; sub { my ($t, $node) = @_; my $nodeid = $node->att ('id'); !exists ($nodehash->{$nodeid}) or croak "Node $nodeid is duplicated!"; $nodehash->{$nodeid}{nodeid} = $nodeid; $nodehash->{$nodeid}{title} = $node->text; $nodehash->{$nodeid}{rep} = $node->att('reputation'); $nodehash->{$nodeid}{date} = $node->att('createtime'); $t->purge; } } } { package PerlMonks::StatsWhore::HTML; use strict; use Carp; require HTML::TableExtract; use LWP::Simple; use base qw(PerlMonks::StatsWhore::Base); sub init_query { my $self = shift; $self->set_query( node => 'Perl Monks User Search', orderby => 'nf', start => 0, ); } sub set_query { my $self = shift; my %params = @_; $params{length} = $self->fetch_writeup_count; $self->SUPER::set_query(%params); } sub fetch_writeup_count { my $self = shift; $self->user or croak "No username defined!\n"; my $c_uri = $self->uri->clone; $c_uri->query_form(node => $self->user); my $page = get(URI->new($c_uri)) or croak("Get failed for " . $c_uri->as_string . "\n"); my $te = $self->make_table_extract(headers => ['User since', '\w+']); $te->parse($page); my $w_count; foreach my $row ($te->first_table_state_found->rows) { if ($row->[0] =~ /Writeups/) { $w_count = $row->[1]; last; } } die "Failed to retrieve total writeup count." unless defined $w_count; $w_count; } sub extract_writeups { my $self = shift; $self->reset_writeups; return unless @_; my $page = shift; my $te = $self->make_table_extract(headers => ['Node ID', 'Writeup', 'Rep', 'Created']); $te->parse($page); my %nodehash; foreach ($te->first_table_state_found->rows) { $nodehash{$_->[0]}{nodeid} = $_->[0]; $nodehash{$_->[0]}{title} = $_->[1]; $nodehash{$_->[0]}{rep} = $_->[2]; $nodehash{$_->[0]}{date} = $_->[3]; } $self->add_writeups(\%nodehash); } sub make_table_extract { shift; HTML::TableExtract->new(@_) } } 1; __END__ =head1 NAME PerlMonks::StatsWhore - Perl module for tracking node stats on www.perlmonks.org =head1 SYNOPSIS use PerlMonks::StatsWhore; # Default relies on XML ticker and XML::Twig my $sw = PerlMonks::StatsWhore->new( user => 'username', password => 'password', ); print $sw->summary_as_string; print $sw->histogram_as_string; # If you do not want to mess with XML::Twig, or are having # trouble with odd characters in titles, fall back to # HTML pages and HTML::TableExtract my $sw2 = PerlMonks::StatsWhore->new( user => 'username', password => 'password', mode => 'HTML', ); print $sw->summary_as_string; # Or pull statistics directly print "User: ", $sw->user, "\n"; printf( "Average reputation: %5.2f\n", $sw->average ); printf( "Minimum reputation: %d\n", $sw->repmin ); printf( "Maximum reputation: %d\n", $sw->repmax ); printf( " Total reputation: %d\n", $sw->reputation ); =head1 DESCRIPTION PerlMonks::StatsWhore fetches and calculates the reputation of your nodes on www.perlmonks.org. The module encapsulates the functionality and much of the code from the scripts C, C, C, and C written by B, including the histogram code provided by B. The module has the advantage of allowing you to select either XML or HTML fetch modes -- XML, the default, is faster. For those people that have trouble installing XML::Twig(3) and its associated libraries, choose HTML mode which uses traditional HTML page fetches and HTML::TableExtract(3). If XML seems to be having problems with parsing node titles due to "odd" characters, then use HTML and things should work as intended. If behind a proxy, set your C environment variable to the URL of your proxy server, as per L =head1 METHODS =over =item new() Return a new PerlMonks::StatsWhore object. Valid attributes are: =over =item user PerlMonks account name. =item password Password for perlmonks account. =item mode Fetch mode. Valid choices are 'XML' and 'HTML' (case sensitive). XML mode requires XML::Twig(3), XML::Parser(3), etc. HTML mode requires HTML::TableExtract. XML mode is much faster and is the default mode. =item binsize Specifies the resolution of each bar of the histogram. Default 5. =back =item user() =item password() =item mode() =item binsize() Access or set the associated parameter. =item summary_as_string() String detailing username, article count, and reputation (total, min, max, average) =item histogram_as_string() String depicting a histogram of article frequency vs. reputation. =item repmax() =item repmin() =item reputation() =item average() Return relevant statistics for the current writeup list. Automatically fetch writeups if not fetched already. =item reset() Resets summary statistics and writeup cache -- subsequent queries will fetch a new list of writeups. =item writeups() Returns a list of writeup descriptors. Each is a reference to a hash with the following keys: C, C, C<rep>, C<date> =item writeups_ref() Returns a single reference to a hash of writeups, keyed by C<node_id>. =item node_ids() Returns a list of node_ids for all writeups -- suitable for use as keys in the writeups_ref() hash. =back =head1 REQUIRES LWP::Simple(3), XML::Twig(3) (only for XML mode), HTML::TableExtract(3) (only for HTML mode) =head1 AUTHORS Perlmonk B<jcwren> with contributions from B<larryl>. Crammed into a module and modified by B<mojotoad>. =head1 COPYRIGHT Public domain. In the spirit of the original scripts, feel free to let B<jcwren> know of any interesting applications inspired by the code. =head1 SEE ALSO LWP::Simple(3), URI(3), XML::Twig(3), HTML::TableExtract(3) =cut