#!/usr/bin/perl # 20091019pj # http://perlmonks.org/?node_id=799017 by jdporter, errors by jakobi # (http://www.perlmonks.org/?node_id=544215 initial version) # script for offline-editing the text of existing perlmonks.org nodes # [in case you ask jakobi, remember to include the keyword pm_vi] my $myself=799017; # the script's perlmonks node # Usage: # pm_vi # pm_vi [node_id or node_title] # Tips # - the script asks interactively about the user, password, node, editor # to use unless the node is specified on the command line and other # informations are set via $EDITOR or your .netrc file: # ^machine perlmonks.org # ^ username USER # ^ password PASSWORD # ^ macdef editor # ^ vi "%s" # note that the filename might be # # numeric or a sanitized node title # - the most recent copy of the edited node is retained in $ENV{TMP} or /tmp # both before (.org) and after editing # - you can define short cuts in $0.short in the format WORD REPLACEMENTSTRING\n # default short cut are: self/myself for the script's home node # home your home node # scratch your public scratch pad # - after a recent hickup *, I added a timestamp to the file name and moved things # into a subdirectory. Check timestr / dir / logmaxage below. Files in $dir # older than logmaxage are deleted. (* 200 OK doesn't always mean that the # server side process actually successfully updates the database...) # NOTES # - might be currently restricted to less than 64K node content in html entity # format due to perlmonks.org --> paranoia warning (capture groups seem # to allow more than 64K nowadays; since at least 5.8 nowadays) # - cannot edit private scratchpad (displaytype clashes with readmode in server impl) # ??change the substitution list to execute perl or maybe even vimscript as # well, such as e.g. for 'self' remove the old pm_vi and read-in the # current copy for faster updating of nodes # - maybe reload the page from the server after 3sec and compare it to # protect against data loss *? # # BUGS (for small fry see NOTES and /BUG/ below) # - _updating_ home node and scratchpad seems broken # (server side issue with some superdoc-types??) # - no serious bugs known # # Related Examples (node updating/node creation) # - using wiki-style markup 2 perlmonks html in preparation # perl -MText::Textile=textile -lp000we 'INIT{$columns=78} s{\cM?\cJ} [ ]g; $_=wrap(q[],q[ ],textile($_)).$\;' DRAFT.TMP use LWP::UserAgent; use HTML::Entities qw( decode_entities ); use HTTP::Request::Common qw(POST); use Getopt::Long; use strict; use warnings; my $base_url = "http://perlmonks.org/"; my $edit_cmd = '"C:\\Program Files\\mozilla.org\\Mozilla\\mozilla.exe" -editor "file://%s"'; my $verbose=0; my $logmaxage=24*3600*7; my $dir="pm_vi"; my $time=time; my @time=(localtime($time)); $time[4]++; $time[5]+=1900; my $timestr=sprintf("%04d%02d%02d%02d%02d",reverse(@time[1..5])); my $node_id; my $username; my $password; GetOptions( 'node_id|id=s' => \$node_id, 'username=s' => \$username, 'password|pw=s' => \$password, 'editor|edit_cmd=s' => \$edit_cmd, ); #PJ slight unixification # work dir $ENV{TMP}="/tmp" if not $ENV{TMP}; $dir="$ENV{TMP}/$dir"; mkdir $dir if not -d $dir; chmod 0700, $dir; # use .netrc to store auth my $netrc=""; $netrc=$ENV{NETRC} if $ENV{NETRC}; $netrc=$ENV{HOME} . "/.netrc" if not -r $netrc; #PJ please validate & fix these guesses to sane windows locations ZZZZZZZ $netrc=$ENV{HOME} . "/_netrc" if not -r $netrc; $netrc=$ENV{USERPROFILE} . "/_netrc" if not -r $netrc; $netrc='%USERPROFILE%/_netrc' if not -r $netrc; $netrc='%USERPROFILE%/Application Data/_netrc' if not -r $netrc; $netrc="" if not -r $netrc; if ($netrc) { # BUG: this assumes just a single machine entry and doesn't allow # use of multiple IDs - workaround: change $ENV{NETRC} open(FH,"<",$netrc); local $/; $netrc=; close FH; if ($netrc=~/^machine perlmonks.org$/gm) { $netrc=substr($netrc,pos $netrc); if ($netrc=~/^(machine |default)/gm) { $netrc=substr($netrc,0,pos $netrc); $netrc=~s/.*\Z//; } $netrc=~/^\s*login (\S+)/m and $username=$1; $netrc=~/^\s*password (\S+)/m and $password=$1; # just EDITOR instead of e.g. a netrc macro -- $netrc=~/^macdef editor (\S+)/m and ... $edit_cmd=$ENV{EDITOR} if $ENV{EDITOR}; } } $edit_cmd =~ /%s/ or $edit_cmd .= ' "%s"'; sub prompt_for { my $p = shift; print "\n$p: "; local $_ = ; chomp; $_ =~ /^$/ and die "aborted (did you forget the -id option?)\n"; $_ } # PJ allow ARGV[0] as node_id not $node_id and 0==$#ARGV and $node_id=shift; $node_id ||= prompt_for('NodeID'); $username ||= prompt_for('UserName'); $password ||= prompt_for('Password'); # PJ allow a shortcut file of 'string URL' substitution lines my @substitutions=( "self $myself", "myself $myself", "home $username", "scratch $username\'s scratchpad", # sorry, public only, server doesn't seem to allow # mode combination to access private part ); open(FH,"<", "$0.short") and push @substitutions,(); close FH; foreach(@substitutions){ chomp; my($lhs,$rhs)=($1,$2) if /^(\S+)\s+(.+)$/ or next; $node_id=$rhs if $node_id=~/^\Q$lhs\E$/; } # PJ allow both name or numerical id, by itself or as url my $node_field="node_id"; if ($node_id=~/^(http|perlmonks\.org\/)/i) { if ($node_id=~/node_id=(\d+)/) { $node_id=$1; } elsif ($node_id=~/node=([^;&]*?)(\&|;|\s*$)/) { $node_id=$1; $node_field="node"; } else { die "cannot handle: $node_id\n"; } } else { $node_id=~/\D/ and $node_field="node"; } ## READ my %params = ( op => 'login', user => $username, ticker => 'yes', displaytype => 'xml', xmlstyle => 'flat', $node_field => $node_id, ); # PJ BUG: on a title change, node=... might be troublesome # (at the latest when rerunning pm_vi with the same args) next # is the dir and filename handling hopefully also valid on older windows? my $node_url = "$base_url?$node_field=$node_id"; my $filename=$node_id; $filename=~s![^a-z0-9_\-]!_!gi; $filename = "$dir/node_$node_id" . ( $timestr ? ".".$timestr : "" ) . ".html"; foreach (<$dir/node_*>) { # clean old logs unlink $_ if $time-(stat $_)[9]>$logmaxage; } warn "Node $node_url\nFile $filename\n\n"; # if $verbose; my $ua = LWP::UserAgent->new; $ua->agent("NodeEditor/0.1pj"); my $params = join '&', map { $_ . '=' . $params{$_} } keys %params; my $req = HTTP::Request->new( GET => $base_url.'?'.$params ); my $res = $ua->request($req); $res->is_success or die "GET Error: " . $res->status_line . "\n"; $_ = $res->content; # PJ BUG 64K regex capture limit (seems to be lifted since at least 5.8. Great!) # (but AFAIR a similar one applies to PM nodes on server side (?)) my ( $text ) = /]*>(.*)<\/doctext>/s; die "GET Error: not a valid node: $node_url\n" if not $text; $text = decode_entities( $text ); my( $title ) = /", $filename or die "write $filename - $!\n"; print F $text_to_edit; close F; open F, ">", "$filename.org" or die "write $filename.org - $!\n"; print F $text_to_edit; close F; print "\n"; system sprintf $edit_cmd, $filename; open F, "<", $filename or die "read $filename - $!\n"; $_ = do { local $/; }; close F; # PJ and compare, possibly skipping the update my $text_edited=$_; do{warn "unchanged - exiting.\n"; exit 0} if $text_to_edit eq $_; ## WRITE s/^]*>\s*//i; my( $new_title, $new_text ) = /^([^<]*)<\/title><\/head><body>(.*)<\/body><\/html>/s or die "you screwed up the format!"; #die "title='$new_title'\n\n'$new_text'\n\n"; $text_has_dos_eoln and $new_text =~ s/\n/\r\n/g; $req = POST $base_url, [ %params, sexisgood => "update", note_title => $new_title, note_doctext => $new_text, passwd => $password, ]; $res = $ua->request($req); $res->is_success or die "POST Error for $node_url: " . $res->status_line . "\n"; print $res->status_line, "(which probably is a lie for scratchpad or home node)\n"; ## SANITY TESTS if any # PJ if this is reached, we could delete the file (for now: keep them); # paranoid size warning: # 20K for node source may correspond to 60+K with entities and HTML if (length($text_edited) > length($text_to_edit) and length($text_edited) > 20000) { # BUG or necessary paranoia? warn "Size > 20K - please check the rendering of updated node\n"; warn "URL: $node_url\n"; warn "FILE: $filename\n"; } __END__