Beefy Boxes and Bandwidth Generously Provided by pair Networks
Syntactic Confectionery Delight
 
PerlMonks  

Comment on

( #3333=superdoc: print w/ replies, xml ) Need Help??
#!/usr/local/bin/perl -w # # Version 1.00.00 - 2000/08/05 - Initial incarnation # Version 1.00.10 - 2000/08/05 - A few cleanups per node 26390 # Version 1.00.20 - 2000/08/08 - Added DBI support # Version 1.00.30 - 2000/09/02 - Fix error if number of nodes is a mu +ltiple of 50 # # Invoke with './luke_repwalker.pl -?' for help # # The username and/or password can be embedded into the script, if yo +u don't want command # line arguments. # # Compares the users current writeups to a previous snapshot, display +ing articles that have # been added, deleted, or reputations that have changed since the las +t run. Unless disabled. # the new writeups info is saved as the snapshot for the next run. # # The output can either be displayed at the user's console, and/or it + can be emailed to a given # user, via MIME::Lite. # # For a cron job, the following entry will run every hour at 0 minute +s past, only generate output # when something has changed, e-mail us the results, and update the m +ySQL database. You will, of # course, have to change the fields to match who/what/where and when +you really are. # # 0 * * * * /PMUtils/luke_repwalker.pl -u pmuser -p pmpw -e -t '"Pe +rlDude" <perldude@hackers.com>' -z -d # # The SQL necessary to create the mySQL table is located at the botto +m of the output file, and may # be fed to 'mysqldump' to create the table. You'll need to create t +he database it's going to live # in, first. # # Requires: # HTML::TableExtract # LWP::Simple # Text::CSV_XS # MIME::Lite; # DBI; # # Copyright 2000(c) J.C.Wren jcwren@jcwren.com # No rights reserved, use as you see fit. I'd like to know about it, + though, just for kicks. # use strict; use Carp; use LWP::Simple; use Text::CSV_XS; use MIME::Lite; use DBI; use IO::File; use Getopt::Std; use vars qw($def_username $def_password $def_filename); use vars qw($def_dbhost $def_dbdb $def_dbtable $def_dbuser $def_dbpw); use vars qw($def_mto $def_msubject $def_mserver $def_mfrom); use vars qw($pmsite $pmpagelen); # # Set these accordingly, if you don't want command line parameters. # $def_username = ''; # username +, unless -u is preferred $def_password = ''; # password +, unless -p is preferred $def_filename = "$ENV{HOME}/.rep.%s"; # snapshot + file $def_mto = ''; # no defau +lt 'to' user $def_msubject = 'Perlmonks.org Reputation Change Report'; # default +title $def_mserver = 'localhost'; # default +mailserver $def_mfrom = '%s'; # %s means + use the 'to' parameter $pmsite = 'http://www.perlmonks.org/index.pl'; # vroom's +house of illrepute $pmpagelen = 50; # articles + returned per page $def_dbhost = 'localhost'; # Where our database is hosted $def_dbdb = 'Perlmonks'; # Name of our database $def_dbtable = 'Reputation'; # Name of our table $def_dbuser = 'pmuser'; # Our mySQL username $def_dbpw = 'pmdude'; # Our mySQL password # # # { my %args = (); my $out = ""; getopts ('u:p:F:t:f:s:m:Inhe?cbzP123d', \%args); if ($args{'?'}) { usage (); exit; } if ($args {P}) { local $| = 1; print "Password: "; $args {p} = <STDIN>; chomp ($args{p}); } my $username = $args{u} || $def_username; my $password = $args{p} || $def_password; my $filename = $args{F} || sprintf ($def_filename, $username); $username or die "No username. Program terminated.\n"; $password or die "No password. Program terminated.\n"; (!$args{I} || !$args{n}) or die "-I and -n are mutually exclusive. + Program terminated\n"; # # # if ($args{I}) { my $hreplist = initialize_rep_file ($username, $password, $filen +ame); if ($args{d}) { my @nodelist = (); my %dbreplist = (); push @nodelist, $_ foreach (keys %$hreplist); update_replist ('I', \%dbreplist, $hreplist, \@nodelist); db_update (\%dbreplist); } exit; } if (!-e $filename) { print "No previous reputation snapshot file exists. Use -I to c +reate\n"; exit; } my $hmailopts = confirm_mailargs ($args{e}, $args{t}, $args{m}, $ar +gs{f}, $args{s}); my ($outd, $outr, $dbreplist) = compare_reps ($username, $password, + $filename, $args{n}, $args{b}, $args{z}); if (defined ($outd) && defined ($outr)) { my $out; $out = $outr . $outd . "\n"; $out = $outr . "\n" if ($args{1} && !$args{3}); $out = $outd . "\n" if ($args{2} && !$args{3}); print $out if ($args{c} || !$args{e}); if ($args{e}) { MIME::Lite->send ('smtp', $hmailopts->{server}, Timeout=>60); my $msg = MIME::Lite->new (From => $hmailopts->{from}, To => $hmailopts->{to}, Subject => $hmailopts->{subject}, Type => 'TEXT', Encoding => '7bit', Data => $out) || croak "MIME:: +Lite->new failed"; $msg->send || croak "MIME::Lite->send failed."; } db_update ($dbreplist) if $args{d}; } } sub compare_reps { @_ == 6 or croak "Incorrect number of parameters"; my ($username, $password, $filename, $noupdate, $brief, $zero) = @_ +; my @newnodes = (); my @deletednodes = (); my @changednodes = (); my %replist = (); my $outd = undef; my $outr = undef; my $holdreps = read_file ($filename); my $hnewreps = get_article_list ($username, $password); scalar keys %$hnewreps != 0 or die "You have no articles, perhaps?\ +n"; # # Find all the new, deleted and changed entries # foreach (keys %$hnewreps) {push (@newnodes, $_) if !exists ($ho +ldreps->{$_})} foreach (keys %$holdreps) {push (@deletednodes, $_) if !exists ($hn +ewreps->{$_})} foreach (keys %$holdreps) {push (@changednodes, $_) if exists ($hn +ewreps->{$_}) && $hnewreps->{$_}->{'rep'} != $holdreps->{$_}->{'rep'} +} # # For any article in the @changednodes array, move the 'rep' field + from %holdreps into # the 'last' of %nhewreps. This makes displaying it really easy. # $hnewreps->{$_}->{'last'} = $holdreps->{$_}->{'rep'} foreach (@chan +gednodes); # # If no -z (zero output) flag, and we have changes, then generate +the reports. Otherwise, if # -z is set, then return undef for both reports. # if (!$zero || $#newnodes != -1 || $#deletednodes != -1 || $#changed +nodes != -1) { if ($brief) { $outd = "\n"; $outd .= "New nodes: " . ($#newnodes == -1 ? "none" : + join (',', @newnodes)) . "\n"; $outd .= "Deleted nodes: " . ($#deletednodes == -1 ? "none" : + join (',', @deletednodes)) . "\n"; $outd .= "Changed nodes: " . ($#changednodes == -1 ? "none" : + join (',', @changednodes)) . "\n"; } else { my $longest_title = find_longest_title ([{'array' => \@newnod +es, 'hash' => $hnewreps}, {'array' => \@delete +dnodes, 'hash' => $holdreps}, {'array' => \@change +dnodes, 'hash' => $hnewreps} ]); $outd = sprintf ("\nNew nodes: %d\n", scalar @newnodes) + . display_nodelist ($hnewreps, \@newnodes, $longest_title); $outd .= sprintf ("\nDeleted nodes: %d\n", scalar @deletednod +es) . display_nodelist ($holdreps, \@deletednodes, $longest_title); $outd .= sprintf ("\nChanged nodes: %d\n", scalar @changednod +es) . display_nodelist ($hnewreps, \@changednodes, $longest_title); } $outr = reputation_report ($hnewreps); write_file ($filename, $hnewreps) unless $noupdate; # # This builds the hash that might be written to the database # update_replist ('N', \%replist, $hnewreps, \@newnodes); update_replist ('D', \%replist, $holdreps, \@deletednodes); update_replist ('C', \%replist, $hnewreps, \@changednodes); } return ($outd, $outr, \%replist); } sub update_replist { @_ == 4 or croak "Incorrect number of parameters"; my ($type, $dbreplist, $replist, $repnodes) = @_; foreach (@$repnodes) { croak "Duplicate node_id $_" if exists ($dbreplist->{$_}); $dbreplist->{$_} = $replist->{$_}; $dbreplist->{$_}->{type} = $type; } } sub reputation_report { @_ == 1 or croak "Incorrect number of parameters"; my $hrephash = shift; my $total = 0; my $repmax = 0; my $repmin = 999999999; my $out = ""; scalar keys %$hrephash >= 0 or die "You have no articles, perhaps?\ +n"; for (keys %$hrephash) { $total += $hrephash->{$_}->{rep}; $repmax = max ($repmax, $hrephash->{$_}->{rep}); $repmin = min ($repmin, $hrephash->{$_}->{rep}); } $out = "\n"; $out .= sprintf (" Total articles: %d\n", scalar keys %$hreph +ash); $out .= sprintf (" Total reputation: %d\n", $total); $out .= sprintf (" Min reputation: %d\n", $repmin); $out .= sprintf (" Max reputation: %d\n", $repmax); $out .= sprintf ("Average reputation: %3.2f\n", $total / scalar key +s %$hrephash); return ($out); } sub display_nodelist { @_ == 3 or croak "Incorrect number of parameters"; my ($rnodehash, $rnodelist, $longest) = @_; my $out = ""; return (" (none)\n") if ($#$rnodelist == -1); my $fmt = '% 6d | %-' . $longest . 's | %s | % 4d -> % 4d'; foreach (@$rnodelist) { $out .= sprintf ("$fmt\n", $rnodehash->{$_}->{nodeid}, $rnodehash->{$_}->{title}, $rnodehash->{$_}->{date}, $rnodehash->{$_}->{last}, $rnodehash->{$_}->{rep}); } return ($out); } sub find_longest_title { @_ == 1 or croak "Incorrect number of parameters"; my $hashlist = shift; my $linelen = 0; foreach (@$hashlist) { my $nodes = $_->{'hash'}; $linelen = max ($linelen, length ($nodes->{$_}->{'title'})) fore +ach (@{$_->{'array'}}); } return ($linelen); } sub max { my ($a, $b) = @_; return ($a > $b ? $a : $b); } sub min { my ($a, $b) = @_; return ($a < $b ? $a : $b); } sub initialize_rep_file { @_ == 3 or croak "Incorrect number of parameters"; my ($username, $password, $filename) = @_; my $hnewreps = get_article_list ($username, $password); scalar keys %$hnewreps >= 0 or die "You have no articles, perhaps?\ +n"; write_file ($filename, $hnewreps); return ($hnewreps); } sub read_file { @_ == 1 or croak "Incorrect number of parameters"; my $filename = shift; my %nodehash = (); my $fh = IO::File->new ("<$filename"); defined ($fh) or croak "Can't open file \'$filename\": $!"; my $csv = Text::CSV_XS->new ({'always_quote' => 1, 'eol' => "\n" }); while (<$fh>) { $csv->parse ($_) or croak "Can't parse input fields"; my ($nodeid, $article, $rep, $date) = $csv->fields (); !exists ($nodehash {$nodeid}) or croak "Node ID $nodeid is dupli +cated!"; $nodehash {$nodeid} = {'nodeid' => $nodeid, 'title' => $article, 'rep' => $rep, 'last' => $rep, 'date' => $date }; } $fh->close; return (\%nodehash); } sub write_file { @_ == 2 or croak "Incorrect number of parameters"; my ($filename, $nodehash) = @_; my $fh = IO::File->new (">$filename"); defined ($fh) or croak "Can't create file \"$filename\": $!"; my $csv = Text::CSV_XS->new ({'always_quote' => 1, 'eol' => "\n" }); for (sort {$a <=> $b} keys %$nodehash) { $csv->print ($fh, [ @{ $nodehash->{$_} }{ qw(nodeid title rep da +te) } ]) or croak "Text::CSV_XS->print failed"; } $fh->close; } # # Don't display the URL when we die (which would be more informative) +, because the users # password might be e-mailed somewhere. And we sure don't want some +dweeb to be impersonating # us on perlmonks.org, do we? # sub get_article_list { @_ == 2 or croak "Incorrect number of parameters"; my ($username, $password) = @_; my %nodehash = (); $LWP::Simple::FULL_LWP = 1; for (my $i = 0; 1; $i += $pmpagelen) { my $url = "$pmsite?user=$username&passwd=$password&op=login&node +=perl+monks+user+search&usersearch=$username&orderby=createtime%20DES +C&count=$i"; my $page = 0; $page = get ($url) or croak "Get on $pmsite failed."; last if (get_article_page ($page, ['Writeup', 'Rep', 'Create Tim +e'], \%nodehash, $i % $pmpagelen, $i / $pmpagelen) < $pmpagelen); } return (\%nodehash); } sub get_article_page { @_ == 5 or croak "Incorrect number of parameters"; my ($html, $tablecols, $nodehash, $lines, $pageno) = @_; my $rowcnt = 0; $html =~ s/bgcolor=>/bgcolor="">/mg; my $te = new jcwExtract (headers => $tablecols)->parse ($html); if (scalar $te->table_states != 1) { # # Fix for if you have an even multiple of $pmpagelen writeups ( +50, 100, 150, 200...). An # error is caused since the PM code doesn't display the table h +eaders (Writeup, Rep, Date) # if the count parm exceeds the number of writeups. Presumably +, since we've gotten the # first page correctly, the password isn't going to change betw +een fetches on subsequent # pages (the window is *very* small) # return 0 if (scalar $te->table_states == 0 && $pageno); croak sprintf ("Wrong number of tables (%d) returned! (Probably +bad username/password)\n", scalar $te->table_states); } foreach my $ts ($te->table_states) { foreach ($ts->rows) { last if (@$_[2] !~ /\d+-\d+-\d+/); my ($nodeid, $title) = @$_[0] =~ /^node_id=(\d+):(.*)/i; !exists ($nodehash->{$nodeid}) or croak "Node $nodeid is dupl +icated!"; $nodehash->{$nodeid} = {'nodeid' => $nodeid, 'title' => $title, 'rep' => @$_[1], 'last' => @$_[1], 'date' => @$_[2] }; $rowcnt++; } } return ($rowcnt); } # # OK, so if I was really smart, I'd have passed a hash in. Know what +? Too much work, # too little return. # sub confirm_mailargs { @_ == 5 or croak "Incorrect number of parameters"; my ($eflag, $mto, $mserver, $mfrom, $msubject) = @_; my %mailargs = (); return undef if !$eflag; $mailargs {to} = $mto || $def_mto || die "-e specifi +ed, but no -t or script default\n"; $mailargs {server} = $mserver || $def_mserver || die "-e specifi +ed, but no -m or script default\n"; $mailargs {from} = $mfrom || $def_mfrom || die "-e specifi +ed, but no -f or script default\n"; $mailargs {subject} = $msubject || $def_msubject || die "-e specifi +ed, but no -s or script default\n"; $mailargs {from} = sprintf ($mailargs {from}, $mailargs {to}); return (\%mailargs); } sub db_update { @_ == 1 or croak "Incorrect number of parameters"; my $hreplist = shift; my $database = DBI->connect ("DBI:mysql:$def_dbdb:$def_dbhost", $de +f_dbuser, $def_dbpw); if (!defined $database) { warn "Can't open the $def_dbdb database\n"; return; } foreach (sort keys %$hreplist) { my $command = sprintf ("INSERT INTO %s (Type, NodeId, Title, Date, LastReputation, Reputation) VALUES (%s, %d, %s, %s, %d, %d)", $def_dbtable, $database->quote ($hreplist->{$_}->{type} +), $hreplist->{$_}->{nodei +d}, $database->quote ($hreplist->{$_}->{title +}), $database->quote ($hreplist->{$_}->{date} +), $hreplist->{$_}->{last} +, $hreplist->{$_}->{rep}) +; $database->do ($command) or croak; } $database->disconnect; } BEGIN { # # This is not good code. It's really evil and the both the author + of this package, and the author # of HTML::TableExtract should be severely beaten about the head a +nd shoulders. # package jcwExtract; use strict; use HTML::TableExtract; @jcwExtract::ISA = qw(HTML::TableExtract HTML::TableExtract::TableS +tate); my $node_id = undef; { local $^W = 0; # # Override the _add_text mode that if $node_id is defined, we'l +l insert the node_id # value at the front of the string. # eval 'sub HTML::TableExtract::TableState::_add_text { my ($self, $txt, $skew_column) = @_; defined $txt or return; my $row = $self->{content}[$#{$self->{content}}]; $txt = sprintf ("node_id=%d:%s", $node_id, $txt) if define +d ($node_id); $node_id = undef; $row->[$skew_column] .= $txt; $txt; }'; } # # Overridden start method, so we can look for <A HREF=...> tags # sub start { my $self = shift; my ($tag, $attr, $attrseq, $origtext) = @_; $self->SUPER::start (@_); # # If it's a <A HREF=...> tag, and has a node_id, then $1 will c +ontain the node_id. # If we're in a table cell, set $node_id to $1, otherwise undef + it. We don't simply # set $node_id to undef if it's not a <A> tag, because we want +to save the last value # if there is a subsequent <B> or <i> or somesuch tag between t +he <A> and </A> tags. # if ($tag eq 'a' && defined ($attr->{'href'}) && $attr->{'href'} +=~ /\bnode_id=(\d+)/i) { $node_id = ($self->_current_table_state->{in_cell}) ? $1 : un +def; } } 1; } sub usage { print <<ENDOFHELP; usage: luke_repwalker.pl [-h | -?] [-I] [-n] [-b] [-u username] [-p pa +ssword] [-P] [-F filename] [-e [-t toaddress] [-m mailserv +er] [-s subject] [-f fromaddress]] [-c] [-z] [-1 | -2 | -3] [- +d] Show differences between current reputation and last saved reputation -h this help list -? this help list -u username user name on Perlmonks.org -p password password for user -P forces interactive prompt for password. Overrides - +p or script defaults -F filename reputation snapshot (defaults to \$ENV{HOME}/.[usern +ame].rep) -I initialize snapshot file. Must be done first time s +cript is run -n don't update snapshot file, just compare -b brief output (node numbers only) -c force console output if -e is used -z no console or email output if nothing has changed -e send e-mail (requires -t and -m, optionally -f and/o +r -s) -t e-mail addressee (yourname\@somesite.com) -f whom the mail should as be from (myname\@planetx.com +) -s the subject (default is "Perlmonks.org Reputation Ch +ange Report") -m SMTP mail server address ('mailserver.myserver.com') -1 quick reputation report -2 detailed reputation change report -3 both -1 and -2 (default) -d update mysql database with new/deleted/changed recor +ds The -I and -n options are mutually exclusive. -I needs to be used the first time the script is run to initialize +the snapshot file. No other options affect -I, nor are they checked for validity. Using -t, -m, -f, or -s does not imply -e, since e-mail defaults ca +n be embedded in the script. Specifying these flags without -e is meaningless, but not +an error. The script can be edited to set defaults for username, password, fi +lename, mail options, etc. If the script is not edited, then -u and -p are always required, as + is -t if -e used. For -t and -f, use the form '"James T. Kirk" <jtkirk\@starfleet.com>' +to get textual names in the To: and From: fields, instead of the 'user\@address' form. By default, if -e is used, no output is sent to the console. The - +c flag will force the output to the console, in addition to mailing. -c specified withou +t -e is meaningless, but not an error. ENDOFHELP } __END__ # MySQL dump 7.1 # # Host: localhost Database: Perlmonks #-------------------------------------------------------- # Server version 3.22.32 # # Table structure for table 'Reputation' # CREATE TABLE Reputation ( ReputationID int(10) unsigned DEFAULT '0' NOT NULL auto_increment, Type char(1) DEFAULT 'U' NOT NULL, NodeId int(10) unsigned DEFAULT '0' NOT NULL, Title varchar(160) DEFAULT '' NOT NULL, Date datetime DEFAULT '0000-00-00 00:00:00' NOT NULL, LastReputation int(11) DEFAULT '0' NOT NULL, Reputation int(11) DEFAULT '0' NOT NULL, Modified timestamp(14), PRIMARY KEY (ReputationID) );

In reply to luke_repwalker.pl by jcwren

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?
    Username:
    Password:

    What's my password?
    Create A New User
    Chatterbox?
    and the web crawler heard nothing...

    How do I use this? | Other CB clients
    Other Users?
    Others wandering the Monastery: (12)
    As of 2015-07-07 09:38 GMT
    Sections?
    Information?
    Find Nodes?
    Leftovers?
      Voting Booth?

      The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









      Results (88 votes), past polls