#!/usr/local/bin/perl -w # # Bulk Node Title Editor # # This utility was borne out of hearing several people in the chatterbox # mentioning they were going through all their nodes and 'kudratizing' # them (see [id://21814]). After remembering how painful it was to touch # up all my titles when I did that, I figured "heck, why not write a # utility that can whore some more XP... I mean, help people with a dull # repetitive task?". So here ya go... # # Now, that being said, this utility is incredibly dangerous. You can do # something really stupid, like go through and replace all your node titles # with... NOTHING! Yes, that's right, the power of the regexp could let you # completely destroy your home node titles. To fix that dangerous self- # loading, self-cocking, and damn near self-firing gun, a backup file is # generated from any runs. A backup file can't be overwritten by a sub- # sequent run. And there is a magical restore function that allows you # to restore your hard-typed titles after they've been... inexplicably # altered. Yea, that's the ticket! # # Some serious POD reading, consideration, and understanding of a basic # regexp is in order before you use this utility. The default mode is # to perform every step except actually updating the node unless the -Z # switch is specified, so you can play around and watch an inexpertly # written regexp wreck a title before you actually commit it back to PM. # # !!! SEE THE SECURITY SECTION OF THE POD BEFORE RUNNING THIS PROGRAM !!! # # # $Id: bnteditor.pl,v 1.0.0.1 2001/11/30 23:46:30 jcw Exp $ # $Revision: 1.0.0.1 $ # $Author: jcw $ # $Date: 2001/11/30 23:46:30 $ # $Log: bnteditor.pl,v $ # Revision 1.0.0.1 2001/11/30 23:46:30 jcw # initial import into CVS # # use strict; use LWP::UserAgent; use URI::Escape; use HTTP::Cookies; use HTML::Form; use Getopt::Std; use XML::Twig; use HTML::Entities; # # Change this to suit your taste. # my %config = (username => undef, # 'myusername', password => undef, # 'mypassword', quiet => 0, reallydoit => 0, restore => 0, regex => undef, filename => 'savemybutt.txt', newest => 0, baseurl => 'http://www.perlmonks.org', pmsite => 'http://www.perlmonks.org/index.pl?', allnodes => 'node=user%20nodes%20info%20xml%20generator', shortnode => 'XP%20xml%20ticker', ); sub UNSAFE_CHARS {"^A-Za-z0-9\-_.!~*'()"} # # Ye olde main # { my %args = (); getopts ('u:p:r:f:qZRPnh?', \%args); if ($args {'?'} || $args {h}) { usage (); exit; } if ($args {P}) { local $| = 1; print "Password: "; $args {p} = ; chomp ($args {p}); } $config {username} = $args {u} || $config {username} || die "No username. Program terminated.\n"; $config {password} = $args {p} || $config {password} || die "No password. Program terminated.\n"; $config {regex} = $args {r} || $config {regex} || die "No regex. Program terminated.\n" if (!exists $args {R}); $config {filename} = $args {f} || $config {filename} || die "No filename. Program terminated.\n"; $config {restore} = $args {R} || $config {restore}; $config {reallydoit} = $args {Z} || $config {reallydoit}; $config {quiet} = $args {q} || $config {quiet}; $config {newest} = $args {n} || $config {newest}; $config {baseurl} || die "\$config {baseurl} not defined. Program terminated.\n"; $config {pmsite} || die "\$config {pmsite} not defined. Program terminated.\n"; $config {allnodes} || die "\$config {allnodes} not defined. Program terminated.\n"; die "-r and -R mutually exclusive\n" if ($config {regex} && $config {restore}); if (!$config {restore}) { die "regex needs to be formed as 's/match/replace/{options}'" if ($config {regex} !~ m|^s/|); eval '$a = ""; $a =~ ' . $config {regex}; die "badly formed regex: $@" if $@; die "Backup file \"", $config {filename}, "\" exists, pick another name\n" if (-e $config {filename}); } else { die "Backup file \"", $config {filename}, "\" does not exist: $!\n" if (! -e $config {filename}); } doUpdates (); } # # # sub doUpdates { my $useragent = new LWP::UserAgent; print scalar localtime, ": logging in\n" unless ($config {quiet}); $useragent->agent ("Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0)"); $useragent->cookie_jar (HTTP::Cookies->new (ignore_discard => 1)); doPerlmonksLogin ($useragent, $config {username}, $config {password}); print scalar localtime, ": login completed\n" unless ($config {quiet}); if ($config {restore}) { doRestoreTitles ($useragent); } else { doUpdateTitles ($useragent, getArticleList ($useragent)); } } # # # sub doRestoreTitles { my ($useragent) = @_; open BACKUPFILE, $config {filename} or die "Can't open backup file ", $config {filename}, ": $!\n"; chomp (my @nodelist = ); close BACKUPFILE; die "No nodes in backup list" if (!scalar @nodelist); print scalar localtime, ": ", scalar @nodelist, " nodes to update\n" unless ($config {quiet}); my %nodehash = map { split /,/, $_, 2 } @nodelist; foreach my $nodeid (sort {$config {newest} ? $b <=> $a : $a <=> $b} keys %nodehash) { print scalar localtime, ": getting node $nodeid\n" unless ($config {quiet}); my $url = $config {pmsite} . "node_id=$nodeid"; my $page = getProtectedPage ($useragent, $url) or die "Get on $url failed."; my @forms = HTML::Form->parse ($page, $config {baseurl}); my $found = 0; foreach my $form (@forms) { if ($form->find_input ("note_title")) { $found = 1; print scalar localtime, ": $nodeid=", $form->value ("note_title"), "\n" unless ($config {quiet}); print scalar localtime, ": new title: ", $nodehash {$nodeid}, "\n" unless ($config {quiet}); $form->value ("note_title", $nodehash {$nodeid}); if ($config {reallydoit}) { my $res = $useragent->request ($form->click ('sexisgood')); $res->is_error && die "Can't post changes"; print scalar localtime, ": updated $nodeid\n" unless ($config {quiet}); } else { print scalar localtime, ": updated $nodeid (faked)\n" unless ($config {quiet}); } last; } } print scalar localtime, ": node $nodeid is not editable (root node)\n" if (!$found && !$config {quiet}); } } # # # sub doUpdateTitles { my ($useragent, $nodehash) = @_; foreach my $nodeid (sort {$config {newest} ? $b <=> $a : $a <=> $b} keys %$nodehash) { print scalar localtime, ": getting node $nodeid\n" unless ($config {quiet}); my $url = $config {pmsite} . "node_id=$nodeid"; my $page = getProtectedPage ($useragent, $url) or die "Get on $url failed."; my @forms = HTML::Form->parse ($page, $config {baseurl}); my $found = 0; foreach my $form (@forms) { if ($form->find_input ("note_title")) { $found = 1; my $title = $form->value ("note_title"); print scalar localtime, ": $nodeid=$title\n" unless ($config {quiet}); open BACKUPFILE, ">>" . $config {filename} or die "Can't write backup file: $!\n"; print BACKUPFILE sprintf ("%d,%s\n", $nodeid, $title); close BACKUPFILE; eval '$title =~ ' . $config {regex}; die "regexp error: $@" if $@; print scalar localtime, ": new title: $title\n" unless ($config {quiet}); $form->value ("note_title", $title); if ($config {reallydoit}) { my $res = $useragent->request ($form->click ('sexisgood')); $res->is_error && die "Can't post changes"; print scalar localtime, ": updated $nodeid\n" unless ($config {quiet}); } else { print scalar localtime, ": updated $nodeid (faked)\n" unless ($config {quiet}); } last; } } print scalar localtime, ": node $nodeid is not editable (root node)\n" if (!$found && !$config {quiet}); } } # # # sub getArticleList { @_ > 0 or die "Incorrect number of parameters"; my ($useragent) = @_; my %nodehash = (); my $url = $config {pmsite} . $config {allnodes}; my $page = getProtectedPage ($useragent, $url) or die "Get on $url failed."; my $twig= new XML::Twig (TwigRoots => { NODE => sub { my ($t, $node) = @_; my $nodeid = $node->att ('id'); !exists ($nodehash {$nodeid}) or die "Node $nodeid is duplicated!"; my $title = decode_entities ($node->text ()); $title =~ s/'/'/g; # Why is this missed? $nodehash {$nodeid} = {'nodeid' => $nodeid, 'title' => $title, 'rep' => $node->att ('reputation'), 'last' => $node->att ('reputation'), 'date' => $node->att ('createtime') }; $t->purge; } }); $twig->parse ($page); return (\%nodehash); } # # # sub getProtectedPage { @_ > 1 or die "Incorrect number of parameters"; my ($useragent, $url) = @_; my $req = new HTTP::Request ('GET' => $url, HTTP::Headers->new ('Content-Type' => 'application/x-www-form-urlencoded')); $useragent->cookie_jar->add_cookie_header ($req); my $res = $useragent->request ($req); return undef if ($res->is_error); return $res->content (); } # # We use 'XP%20xml%20ticker' because we know that will be a really short page returned. # We don't care about the contents, but no point in loading the entire text of the # front page. # sub doPerlmonksLogin { @_ >= 3 || die "At least 4 arguments required"; my ($useragent, $username, $password) = @_; my %pairs = (); my $req = new HTTP::Request ('POST' => $config {pmsite}, HTTP::Headers->new ('Content-Type' => 'application/x-www-form-urlencoded')); @pairs {qw(user passwd op node)} = ($username, $password, 'login', $config {shortnode}); $req->content (join '&', map {$_ . "=" . uri_escape ($pairs {$_}, UNSAFE_CHARS)} keys %pairs); my $res = $useragent->request ($req); if ($res->is_success) { $useragent->cookie_jar->extract_cookies ($res); return $res if ($useragent->cookie_jar->as_string () =~ m/userpass/i); die "Eeek! Log in failed. Bad username or password?\n"; } die "Eeek! Request to ", $config {pmsite}, " failed!\n"; } # # # sub usage { print < | B<-?>] [B<-u> username] [B<-p> password] [B<-P>] [B<-r> regexp] [B<-f> filename] [B<-R>] [B<-q>] [B<-Z>] Bulk Node Title Editor -h this help list -? this help list -u username user name on Perlmonks.org -p password password for username -P prompt for password interactively -r regexp regexp to apply to each node title -f filename name of backup file for backup/restore -R restore damaged node titles from backup file -q run quietly -Z really do it, instead of 'safe mode' -n operate on newer nodes first -r and -R are mutually exclusive. To be safe, regexp should be quoted in the shell to prevent the shell from interpolating any recognized characters. =head1 DESCRIPTION bnteditor is a utility for applying a regular expression to each and every non-root home node title. Many people have started 'kudratizing' thier node titles (see [id://21814]). This is a slow and laborious process of going to each node, changing it, submitting it, then going back to the users list of nodes to see the next to edit. When a few dozen nodes, this can easily waste over an hour. This utility automates that process. The users list of nodes is fetched, each node is stepped through, and assuming it is not a root node (root nodes are not editable, except under special circumstances), applies the user supplied regular expression to the title, then submits the node back. Obviously, there is quite a potential for damage here. To help reduce the possibility of getting ones nodes into totally befunged state, as each node is edited, the original title is stored into a backup file. Only nodes that are changed are stored in the file, so if you have 2100 nodes, and 100 are edited successfully, but the 101st reveals a problem with the regular expression, only the first 101 will be restored. In addition, rather than defauting to behavior that automatically updates the nodes, the B<-Z> switch must be passed. Otherwise, all actions are taken except for actually posting the node back to perlmonks.org. Since the default is for B<-q> is off, a complete record of actions will be displayed. =head1 EXAMPLES The following example prepends the string '(myusername) ' to the front of every node title. B<-Z> indicates it should really be done, instead of a trial run. bnteditor -u myusername -p password -r 's/^/(myusername) /' -Z This example might be if you (somehow) changed your username from 'iamfred' to 'iamdaphne', regardless of the case of 'iamfred': bnteditor -u iamdaphne -p sc00by -r 's/iamfred/iamdaphne/i' -Z This example would simply generate a backup file for all current node titles, and save them to the file 'nodetitles': bnteditor -u iamdaphne -p sc00by -r 's///' -f nodetitles =head1 OPTIONS =over 4 =item B<-h> or B<-?> help Prints out a brief help message. =item B<-u> username The username of the perlmonks.org account that the image is to be uploaded to. =item B<-p> password The password for the username. Be sure to READ THE SECURITY SECTION, below. There is important information about keeping your password private. =item B<-P> Enter password interactively. The password is prompted for as soon as the script starts. While impractical if the script is started from a cronjob, it is the most secure method. See B. =item B<-r> regexp This is a normal Perl regular expression. Most likely you'll want to quote this in the shell so that parenthesis, dollar signs, and other characters are not attempted to be interpolated by the shell. The form is 's/match/replace/options'. The expression is applied to the node title via an eval(). Note that a basic check of expression validity is checked for before starting to process nodes. Basically, the check is eval '$a = ""; $a =~ ' . $regex; followed by a check of B<$@>. This option is mutually exclusive with the the B<-r> option, and specifying both terminates the script with an error. =item B<-f> filename This option specifies the file name that should be used as either the backup or restore file, depending on whether nodes are being updated or restored. When used as a backup file, the script will terminate if the file already exists. There is no way to override this, and it's for your own protection. When restoring, the presence of the file is checked for, and must contain at least one line before the restore operation is commenced. The default for this is ''savemybutt.txt'' =item B<-R> Replace backed up node titles. Rather than apply a regular expression to each node title, use the node titles saved in the backup file. Unless a B<-f> option is specified, the default ''savemybutt.txt'' file will be used as the source for the backup. This option is mutually exclusive with the B<-r> option, and specifying both terminates the script with an error. =item B<-q> Run script quietly. Normally, the script is rather verbose about what it's doing. Since the danger of wreaking havoc on node titles is pretty high, this is a desirable default. However, if for some bizarre reason one feels compelled to have the script quietly do it's thing, this option causes all progress messages to be suppressed. =item B<-Z> Actually make changes. Without this option, the script will perform all steps in the update or restore, with the exception of actually submitting the changes back to perlmonks.org. This option is required to actually make things happen. It's recommended that this option not be specified until you are completely sure that the regular expression is doing what is intended, and not replacing all node titles with 'Petrified Natalie Portman and hot grits!'. Use with care. Lots of care. Extreme care. So much care that you wonder if you shouldn't have done all of this by hand anyway, what with all these warnings and all. =item B<-n> This option causes newer nodes to be processed first. Normally, nodes are processed in older to newer older, as defined by the node ID. Specifying B<-n> causes the processing order to be newer to older order. =head1 SECURITY Remember that if you provide usernames and passwords on the command line, someone else can see those with ''ps'' on most systems. Since your perlmonks.org account should be more sacred than your winning lottery numbers, that rare signed Larry Bird baseball card, or the URL for that free Angelina Jolie pr0n site. There are two ways you can protect your password. You can either edit the script and set it as a default, then make sure you make the script owner readable only (chmod 700), or you can use the B<-P> option, and require the script to interactively prompt you for your password each time it's run. Obviously, on a single user system, this isn't much concern. However, if you're running from an account on perlmonk.org, where there are over 100 users at last count, your want to be careful about compromising information. =head1 IMPROVEMENTS A worthwhile improvment might be being able to specify a numeric range or list of nodes to change, rather than the whole list. Producing a log file might be useful, as would being able to generate a quicker backup file from the loaded XML, rather than stepping through each node. =head1 WARRANTY You B be high. =head1 AUTHOR J. C. Wren Ejcwren@jcwren.comE =head1 COPYRIGHT Copyright (c) 2001 J.C.Wren. All rights reserved. This is free software. You can redistribute it and/or modify it under the same terms as Perl itself. =head1 HISTORY $Log: bnteditor.pl,v $ Revision 1.0.0.1 2001/11/30 23:46:30 jcw initial import into CVS =cut