sourcecode
davido
<code>
# PerlMonks::Mechanized
# and its helper class 'PM::Mech::Janitor'.
# Start by creating a PerlMonks::Mechanized->new() object.
# If needed, login info can be stored in $ENV{PMPASS}, $ENV{PMUSER}.
# To override default site URL, set $ENV{PMSITE} to full URL.
# Janitor object is returned by the PerlMonks::Mechanized->janitor()
# method. Don't call Janitor->new() directly.
# Read on, for details.
package PerlMonks::Mechanized::Janitor;
use strict;
use warnings;
use WWW::Mechanize; # Needs a new WWW::Mech object in order to
# implement rollback / commit.
# Janitor class: Returned by janitor() method of PM::Mech. First
# call must be to $janitor->fetch(). After that initial hit, no
# server hits will occur until $janitor->commit() is invoked.
# You may spawn multiple janitors, for full cached transactions.
# Each Janitor invokes a new WWW::Mechanize object.
# Returns a Janitor object.
sub new {
my( $class, $monk, $id ) = @_;
my $self = {};
$self->{monk} = $monk;
$self->{id} = $id;
$self->{agent} = WWW::Mechanize->new(
'autocheck' => 1,
'agent' => 'PM::Mech::Janitor0.6'
);
$self->{site} = $self->{monk}{site};
$self->{fetched} = 0;
return bless $self, $class;
}
# If passed the argument of "unconsider", the target node will be
# unconsidered.
# Must fetch() before using any of the other methods in this class.
# No return value.
sub fetch {
my( $self, $unconsider ) = @_;
my $uncon_URI = '';
if(
defined( $unconsider )
and $unconsider =~ /^Un/i
) {
$uncon_URI = ";op=consider;"
. "$self->{id}=unconsider";
}
$self->{agent}->get(
$self->{site}
. '?node_id='
. $self->{id}
. ';displaytype=editors'
. $self->{monk}->_login_URI('force')
. $uncon_URI
);
$self->{agent}->success()
or die "Unable to fetch Janitors view of $self->{id}.\n";
$self->{fetched} = 1;
}
sub get_title {
my $self = shift;
_verify_fetch( $self );
my $form = $self->{agent}->form_name( 'edit_node' );
$form or die "Couldn't find 'edit_node' form in get_title.\n";
return $form->value( 'update_title', 1 );
}
sub set_title {
my( $self, $title ) = @_;
_verify_fetch( $self );
$self->{agent}->field( 'update_title', $title );
}
sub get_author {
my $self = shift;
_verify_fetch( $self );
my $content = $self->{agent}->content();
my $author = '';
if( $content =~
m/\s+by\s+<a HREF="\?node_id=\d+">([^<]+)<\/a>/i ) {
$author = $1;
} else {
die "Couldn't ascertain the author while scraping the "
. "editor view of ID: $self->{id}.\n";
}
return $author;
}
sub get_doctext {
my $self = shift;
_verify_fetch( $self );
my $form = $self->{agent}->form_name( 'edit_node' );
$form or die "Couldn't find 'edit_node' form in get_doctext.\n";
return $form->value( 'update_doctext', 1 );
}
sub set_doctext {
my( $self, $text ) = @_;
_verify_fetch( $self );
$self->{agent}->field( 'update_doctext', $text );
}
# Commits the changes made. The only change that cannot be rolled-
# back is the "unconsider" change. Sorry 'bout that. ;)
sub commit {
my $self = shift;
_verify_fetch( $self );
$self->{agent}->current_form->value( 'blah', 'update' );
$self->{agent}->click( 'blah' );
$self->{agent}->success()
or die "Couldn't commit changes to $self->{id}.\n";
}
# Private Janitor class function.
sub _verify_fetch {
my $self = shift;
die "ID: $self->{id} hasn't been fetched yet.\n"
unless $self->{fetched};
}
1;
package PerlMonks::Mechanized;
# This is your starting point. ...Create a new
# PerlMonks::Mechanized object, and have fun with it.
# Logins are not performed unless they are needed for the activity
# you're requesting. If a login is needed, it will be done
# automatically if you passed ( user, password ) to new(), or if
# $ENV{PMUSER} and $ENV{PMPASS} are set. Logins are automatic, and
# on-demand. However, once logged in, you stay logged in until
# your PM::Mech object is destroyed.
use strict;
use warnings;
use WWW::Mechanize;
use XML::Simple;
our $SITE = exists( $ENV{PMSITE} )
? $ENV{PMSITE}
: 'http://www.perlmonks.org/';
# Call new() to create PM::Mech object. Call with ( $user, $pass )
# to log in on demand, or set $ENV{} variables for on-demand login.
# If no login info is supplied through new() or $ENV, you can only
# do things that don't require login. You don't need to explicitly
# log in. If you have supplied the proper info, it will happen when
# needed, transparently.
sub new {
my $class = shift;
my $obj = {};
$obj->{user} = defined( $_[0] )
? $_[0]
: defined( $ENV{PMUSER} )
? $ENV{PMUSER}
: '';
$obj->{passwd} = defined( $_[1] )
? $_[1]
: defined( $ENV{PMPASS} )
? $ENV{PMPASS}
: '';
$obj->{logged_in} = 0;
$obj->{login_phrase} = ( $obj->{user} && $obj->{passwd} ) ?
";op=login;user=$obj->{user};"
. "passwd=$obj->{passwd};expires=+10y"
: '';
$obj->{site} = $SITE;
$obj->{agent} = WWW::Mechanize->new(
'autocheck' => 1,
'agent' => 'PM::Mech0.61'
);
bless $obj, $class;
}
# Given a base thread ID, returns a datastructure containing
# The thread's ID's. Uses the XML ticker, "xml node thread", at
# id://180684.
sub threaded_ids {
my( $self, $base ) = @_;
$self->{agent}->get( $self->{site}
. "?node_id=180684;id=$base" );
$self->{agent}->success()
or die "Unable to fetch thread ticker for id = $base.\n";
my $struct = XMLin(
$self->{agent}->content(),
ForceArray => 1,
KeepRoot => 1
);
return $struct;
}
# Given a base thread ID, returns a flat list of thread ID's.
# Uses the XML ticker, "xml node thread", at id://180684.
sub thread_list {
my( $self, $base ) = @_;
my $structref = threaded_ids( $self, $base );
return [
sort { $a <=> $b } _flatten_thread( $structref )
];
}
# Returns a datastructure containing info about a node or list of
# nodes. Uses the XML ticker, "Node Query XML Generator",
# at id://37150. Accepts a node id or list of nodes.
sub node_info {
my( $self, @ids ) = @_;
$self->{agent}->get(
$self->{site}
. "?node_id=37150;nodes="
. join( ',', @ids )
. ';xmlstyle=flat'
);
$self->{agent}->success()
or die "Unable to fetch node query XML generator.\n";
return XMLin(
$self->{agent}->content(),
ForceArray => 1
)->{node};
}
# Calls get_node_info() with a single ID or list of nodes.
# Returns an array of arrays holding id/title pairs. Relies on
# "Node Query XML Generator", at id://37150.
sub node_titles {
my( $self, @ids ) = @_;
my $info = node_info( $self, @ids );
return [
map { [ $_->{node_id}, $_->{content} ] }
@{ $info }
];
}
# Grab user stats. Uses the XP XML Ticker (id://16046).
# See the PM FAQ for details about valid args, and their meanings.
sub user_stats {
my( $self, %params ) = @_;
my $parameters = '';
foreach( ( 'for_user', 'showlevels',
'for_userid','shownorm', 'showall' ) ) {
$parameters .= exists( $params{$_} )
? ";$_=$params{$_}"
: '';
}
$self->{agent}->get(
$self->{site}
. "?node_id=16046;xmlstyle=flat"
. $parameters
. $self->_login_URI()
);
$self->{agent}->success()
or die "Unable to fetch user stats ticker.\n";
return XMLin(
$self->{agent}->content(),
ForceArray => 1
);
}
# Reads the New Chatterbox XML Ticker (id://207304) and returns a
# ref to a LoL structure of CB traffic.
sub chatterbox {
my $self = shift;
$self->{agent}->get(
$self->{site}
. "?node_id=207304"
);
$self->{agent}->success()
or die "Unable to fetch Chatterbox content XML generator.\n";
return XMLin(
$self->{agent}->content(),
ForceArray => 1
)->{message};
}
# Talks in the CB. Messages can't be longer than 250 characters.
sub say {
my( $self, $message ) = @_;
if( length( $message ) > 250 ) {
$message = substr $message, 0, 250;
}
$self->{agent}->get(
$self->{site}
. '?' . $self->_login_URI
. ';node_id=16046;op=message;message='
. $message
);
$self->{agent}->success()
or die "Unable to talk in the CB.\n";
}
# Returns logged-in user's private messages in a datastructure.
# Uses the Private Message XML Ticker (id://15848). See
# node_id=379320 for information on how the parameter fields work.
sub private_message {
my( $self, %params ) = @_;
my $parameters = '';
foreach( ( 'max_recs', 'since_id', 'prior_to', 'archived' ) ) {
$parameters .= exists( $params{$_} )
? ";$_=$params{$_}"
: '';
}
$self->{agent}->get(
$self->{site}
. "?node_id=15848;xmlstyle=clean"
. $parameters
. $self->_login_URI()
);
$self->{agent}->success()
or die "Unable to fetch Private Message ticker.\n";
return XMLin(
$self->{agent}->content(),
ForceArray => 1
)->{message};
}
# Reads the 'Other Users XML Ticker' (id://15851) and returns a ref
# to a list of other users currently logged in to the Monastery.
sub other_users {
my $self = shift;
$self->{agent}->get(
$self->{site}
. "?node_id=15851"
);
$self->{agent}->success()
or die "Unable to fetch Other Users XML generator.\n";
return XMLin(
$self->{agent}->content(),
ForceArray => 1
)->{user};
}
# Uses the 'displaytype=xml;xmlstyle=flat' ticker to grab an entire
# single node and any available related info for that node. The
# data is plopped into a datastructure that mirrors the original
# XML tags, which in turn, mirror PM database columns.
sub node_content {
my( $self, $id ) = @_;
$self->{agent}->get(
$self->{site}
. "?node_id=$id;displaytype=xml;xmlstyle=flat"
);
$self->{agent}->success()
or die "Unable to fetch node ID: $id\n";
return XMLin( $self->{agent}->content() );
}
# Uses the Scratchpad Viewer's XML displaytype (id://108949) to get
# a user's scratchpad. An attempt will be made to log self in, if
# possible. If the logged-in user is the same as the user who's pad
# we're retrieving, the private portion will also be retrieved.
# This returns a datastructure.
sub scratchpad {
my( $self, $pad ) = @_;
$pad = ( defined $pad ) ? $pad : $self->{user};
$self->{agent}->get(
$self->{site}
. "?node_id=108949;user=$pad;passthrough=1"
. ';displaytype=xml;xmlstyle=flat'
. $self->_login_URI()
);
$self->{agent}->success()
or die "Unable to fetch scratchpad for $pad.\n";
return XMLin(
$self->{agent}->content(),
ForceArray => 1
);
}
# This sub uses the Newest Nodes XML Generator (id://30175) to get
# a list of newest nodes. See the PerlMonks FAQ for a description
# of what "types=" options you have. You may optionally specify
# whether to use xmlstyle=flat (default) or xmlstyle=rss.
# You may specify sinceunixtime=epocseconds, or days=decimal to
# get up to 8 days worth of newest nodes.
# "types" should be passed in as 'types=>[type,type,type]'
# Optional params should be passed as a hashref.
# Currently no validity checking is really done on params passed
# to the method.
sub newest_nodes {
my( $self, %params ) = @_;
$self->{agent}->get(
$self->{site}
. '?node_id=30175;xmlstyle='
. (
exists( $params{xmlstyle} )
? $params{xmlstyle}
: 'flat'
)
. (
exists( $params{days} )
? ';days=' . $params{days}
: ''
)
. (
exists( $params{sinceunixtime} )
? ';sinceunixtime=' . $params{sinceunixtime}
: ''
)
. (
exists( $params{types} )
? ';types='
. join( ',', @{$params{types}} )
: ''
)
);
$self->{agent}->success()
or die "Unable to fetch newest nodes.\n";
return XMLin(
$self->{agent}->content(),
ForceArray => 1
);
}
# This sub fetches the displaytype=editors view of the node
# indicated in $id. It returns an object of class Janitors with
# the following methods:
# fetch(), get_title(), set_title(), get_author(), get_doctext(),
# set_doctext(), and commit(). You must always fetch() first, and
# after that, the rest of the methods will have relevancy.
sub janitor {
my( $self, $id ) = @_;
return PerlMonks::Mechanized::Janitor->new( $self, $id );
}
# Private class subs. Please don't use these externally.
# Called by methods that need the user to be logged in.
# If the user is already logged in, this sub returns empty string.
# If user isn't logged in, and it is possible to do so, this sub
# returns a URI suffix to log the user in.
sub _login_URI {
my( $self, $independant_agent ) = @_;
my $login = '';
my $logged_in = $self->{logged_in}; # Save old state.
my $independant; # Flag for independant agent.
if(
defined( $independant_agent )
and $independant_agent
) {
$self->{logged_in} = 0;
$independant = 1;
}
if(
$self->{logged_in} == 0
and $self->{login_phrase}
) {
$login = $self->{login_phrase};
$self->{logged_in} = 1;
}
if( $independant ) {
# if this is an independant agent, restore
# original login flag state.
$self->{logged_in} = $logged_in;
}
return $login;
}
# Used by get_thread_list() to flatten return value from
# get_thread_ids().
sub _flatten_thread {
my @nodes;
foreach my $key ( keys %{$_[0]} ) {
if ( ref( $_[0]->{$key} ) ) {
push @nodes, _flatten_thread( $_[0]->{$key} );
}
if ( $key =~ m/^\d+$/ ) {
push @nodes, $key;
}
}
return @nodes;
}
1;
</code>
<p>It's Christmas day in LA, and here's my present to the Monastery (whether you see it as a diamond or a lump of coal remains to be determined). Merry Christmas all...</p>
<p>This is Yet Another Attempt to gather Monastery automation tools under one roof. The following is a module, PerlMonks::Mechanized, which mechanizes many of the common PM tasks. It makes using the Monastery's XML tickers a trivial task, for example. I intend to polish it, and add features on request. For now, it just has what I have needed myself recently, plus a little more. But as requests come in, if they're well thought-out, I'll add them. I'm also very interested in comments that will help me to improve it. At this point, I don't consider any part of this module stable (not even its interface). But if people like it, I'll stabilize it, properly document it, and continue adding to it/ maintaining it. For now, this is a rough-draft.</p>
<p>
<p>It uses $ENV{PMPASS} and $ENV{PMUSER} for login info, or you can supply them as user, passwd to <code>my $monk = PerlMonks::Mechanize->new($user,$passwd);</code>
</p>
<p>Here is a simple example...</p>
<code>
use strict;
use warnings;
use PerlMonks::Mechanized;
my $monk = PerlMonks::Mechanized->new();
$monk->say( "Hello. This is a PerlMonks::Mechanized test." );
</code>
<p>The preceeding code assumes you've set your login env variables. The code only logs you in to the Monastery if the action you're requesting requires a login. Otherwise, you stay logged out. Login is on demand, assuming you've supplied the proper login info. Janitors can also use this for node edits / retitling. I'll be reworking the retitler to take advantage of this module's features.
</p>
<p>The code is well commented. Start your reading at the beginning of the PerlMonks::Mechanized package (ie, ignore the Janitor package until you've read PM::Mech first. Then if you're a janitor, go back and read the Janitor class). All public functions are documented at least enough that with the help of Data::Dumper you'll know what they're doing.
</p>
<p>
Enjoy!</p>
<br />
<small>
<p><b>Updates:</b>
<ul>
<li>PM::Mech::Janitor fixes (updated to v0.6):
<ul>
<li>Renamed Janitor class to PerlMonks::Mechanized::Janitor (more sensible)</li>
<li>PM::Mech::Janitor bug fixed where W::Mech was misspelled. Woops.</li>
<li>PM::Mech::Janitor's version number goes to 0.6.</li>
</ul></li>
<li>PM::Mechanized fixes (updated to v0.6)
<ul>
<li>Repaired login detection to work with PM::M::Janitor</li>
</ul></li>
<li>URL fix, per Dietz.</li>
<ul><li>PM::M becomes v0.61. Added trailing '/' to URL.</li></ul>
</ul>
</p>
</small>
PerlMonks Related Scripts
[davido]