Thanks. That makes my script that extracted the information from the
Wayback Machine useless.
#!/usr/bin/perl
use warnings;
use strict;
use Time::Piece;
use WWW::Mechanize;
use HTML::TableExtract;
3 == @ARGV
or die "Please specify start_date (yyyymmdd), user_id and user_nam
+e\n";
my ($start_date, $user_id, $user) = @ARGV;
my $url_prefix = 'http://web.archive.org/web/~DATE~000000'
. '/http://www.perlmonks.org/?node';
my @url_suffixes = ("_id=$user_id", "=$user");
my $t = 'Time::Piece'->strptime($start_date, '%Y%m%d');
my $today = localtime->ymd;
my $w = 'WWW::Mechanize'->new;
my %output;
my %last;
@last{ @url_suffixes } = (0) x @url_suffixes;
while ($t->ymd le $today) {
my $date = $t->ymd(q());
print STDERR "$date\r";
for my $suffix (@url_suffixes) {
my $url = $url_prefix . $suffix;
$url =~ s/~DATE~/$date/;
$w->get($url);
my ($real_date) = $w->uri =~ m%/web/([0-9]{8})%; # WBM redirec
+ts to future.
my $te = 'HTML::TableExtract'->new;
$te->parse($w->content);
TABLE:
for my $table ($te->tables) {
for my $row ($table->rows) {
if (grep defined && /Experience:/, @$row) {
my $xp = 0 + $row->[1];
last TABLE if $xp == $last{$suffix};
$output{$real_date} = $xp;
$last{$suffix} = $xp;
last TABLE
}
}
}
}
$t = $t->add_months(1);
}
for my $date (sort keys %output) {
print "$date\t$output{$date}\n";
}
Interestingly, the outputs of the XML and my script are almost the same. Here's how I extracted the information from the XML (specify its filename as a parameter to the following script):
#!/usr/bin/perl
use warnings;
use strict;
use WWW::Mechanize;
use HTML::TableExtract;
use XML::XSH2;
my $w = 'WWW::Mechanize'->new;
$w->get('http://www.perlmonks.org/?node=Voting%2FExperience%20System')
+;
my $te = 'HTML::TableExtract'->new( headers => [qw[ Level XP ]] );
$te->parse($w->content);
my $table = ($te->tables)[0]->rows;
package XML::XSH2::Map;
our $string;
package main;
xsh << 'end.';
open {$ARGV[0]} ;
$string = xsh:subst(normalize-space(//var[@name="levelchange"]), ';',
+"\n", 'g') ;
end.
$string =~ s/^[0-9]+-//gm;
$string =~ s/^([0-9]+)(.*)/$1$2 $table->[$1-1][1]/gm;
print $string;
You have to insert dashes into the dates to the output of the Wayback Machine to make it work:
perl -pe 's/(....)(..)/$1-$2-/'
I then used gnuplot to compare them:
set term pngcairo size 1024, 800
set xdata time
set timefmt "%Y-%m-%d"
set format x '%Y/%m'
plot 'pm-xp.txt' using 2:4 with lines title 'XP',\
'' using 2:($1*1000) with lines title 'Level',\
'wayback.txt' using 1:2 with lines title 'Wayback Machine'
Update: The image.