Beefy Boxes and Bandwidth Generously Provided by pair Networks
Syntactic Confectionery Delight

Google Spreadsheet Distributed Agent System

by dmlond (Acolyte)
on Sep 30, 2009 at 11:59 UTC ( #798311=sourcecode: print w/ replies, xml ) Need Help??

Category: Utility Scripts
Author/Contact Info
Description: This object sets up a system to allow a user to configure a set of scripts to use a single Google Spreadsheet as a control panel when running across many servers. See The RFC for more information.
package IGSP::GoogleAgent;

use FindBin;
use YAML::Any qw/LoadFile/;
use Net::Google::Spreadsheets;
use Net::SMTP::TLS;
use IO::CaptureOutput qw/capture/;
use Sys::Hostname;
use Moose;
use Carp;

our $VERSION = '0.01';

sub BUILD {
    my $self = shift;

    my @required_key_fields = grep { $self->config->{key_fields}->{$_}
+->{required} } keys %{$self->config->{key_fields}};
    die ("Your configuration must have at least one required key_field
+s key!\n") unless (@required_key_fields);

    foreach my $required_query_field (@required_key_fields) {
        croak ("You must provide a bind_key_fields ${required_query_fi
+eld} key - value pair!\n")
          unless ($self->bind_key_fields->{$required_query_field});

has 'bind_key_fields' => (
                            is => 'ro',
                            isa => 'HashRef',
                            required => 1

has 'agent_name' => ( 
                      is => 'ro',
                      isa => 'Str',
                      required => 1

has 'page_name' => (
                    is => 'ro',
                    required => 1

has 'prerequisites' => (
                        is => 'ro',
                        isa => 'ArrayRef'

has 'debug' => (
                is => 'ro',
                isa => 'Bool'

has 'max_selves' => (
                     is => 'ro',
                     isa => 'Int'

has 'subsumed_by' => (
                      is => 'ro',
                      isa => 'HashRef'

has 'config_file' => (
                      is => 'ro',
                      isa => 'Str',

has 'config' => (
                 is => 'ro',
                 builder => '_build_config'

has 'google_db' => (
                    is => 'ro',
                    builder => '_build_google_db',
                    lazy => 1, # depends on config
                    init_arg => undef # google_db cannot be overridden


sub _build_config {
    my $self = shift;
    my $config_file = $self->config_file || $FindBin::Bin.'/../config/
    croak "Config ${config_file} not found!\n" unless (-e $config_file
    return YAML::Any::LoadFile($config_file);

sub _build_google_db {
    my $self = shift;
    my $service = Net::Google::Spreadsheets->new(
                                                 username => $self->co
                                                 password => $self->co
    return $service->spreadsheet({
        title => $self->config->{spreadsheet_name}


around 'run_my' => sub {
    my ($orig, $self, @args) = @_;

    if ($self->debug) {
        return $self->$orig(@args);
    else {
        my $capture_output;
        my $no_problems = capture {
            my $ret;
            eval {
                $ret = $self->$orig(@args);
            if ($@) {
                print STDERR $@;
            return $ret;
        } \$capture_output, \$capture_output;
        $self->mail_error($capture_output) unless ($no_problems);
        return $no_problems;

sub run_my {
    my ($self, $agent_code) = @_;
    return 1 if ($self->is_subsumed);
    my $entry = $self->run_entry();
    return unless ($entry);
    return 1 if ($entry->{'not_runnable'}); # this is one that is not 
+ready, already running, or already run

    my ($success, $update_entry) = $agent_code->($entry->content);
    if ($success) {
        return 1;
    else {

sub is_subsumed {
    my $self = shift;

    return unless ($self->max_selves || $self->subsumed_by); # nothing
+ to subsume here

    my $subsumed;
    my %running_subsumers;

    my $subsume_opened = open (my $subsuming_in, '-|', 'ps', '-eo', 'p
    unless ($subsume_opened) {
        print STDERR "Couldnt check subsumption $!\n";
        return 1; # subsume to be safe

    SUBIN: while (my $in = <$subsuming_in>) {
        next if ($in =~ m/emacs|vi|screen|SCREEN/); # skip editing and
+ screen
        next if ($in =~ m/\s*$$/); # skip this agent
        next if ($in =~ m/(\[|\])/); # skip daemons

        my $self_name = $self->agent_name;
        if ($self->max_selves 
            && $in =~ m/$self_name/) {
            if ($running_subsumers{$self->agent_name} == $self->max_se
+lves) {
                print STDERR "max_selves limit reached\n";
                $subsumed = 1;
                last SUBIN;

        if ($self->subsumed_by) {
            foreach my $subsumer (keys %{$self->subsumed_by}) {
                if ($in =~ m/$subsumer/) {
                    if ($running_subsumers{$subsumer} == $self->subsum
+ed_by->{$subsumer}) {
                        print STDERR "subsumed by ${subsumer}\n";
                        $subsumed = 1;
                        last SUBIN;
    close $subsuming_in;

    return $subsumed;

sub get_entry {
    my $self = shift;
    my $entry;

    my $worksheet = $self->google_db->worksheet({
        title => $self->page_name

    # note, the Google Spreadsheet Data API does supply an sq query op
    # which could be used here, but, as of 0.04 of Net::Google::Spread
    # this did not prove to be reliable during the tests.  This may be
    # a limitation of the Google API rather than Net::Google::Spreadsh
    # as it appeared that Net::Google::Spreadsheets was submitting val
    # url encoded queries that the Google system rejected. Instead thi
+s software
    # conducts a full table scan to ensure the correct row is returned
    if ($worksheet) {
        my @rows = $worksheet->rows();
        ROW: foreach my $row (@rows) {
            ARG: foreach my $arg (keys %{$self->config->{key_fields}})
+ {
                next ARG if (
                             && !($self->bind_key_fields->{$arg})
                             ); # skip args that are not required and 
+not bound
                next ROW unless ($row->content->{$arg} eq $self->bind_
            $entry = $row;
            last ROW;

    return $entry;

# this call initiates a race resistant attempt to make sure that there
+ is only 1 clear 'winner' among N potential
# agents attempting to run the same goal on the same spreadsheet agent
+'s cell
sub run_entry {
    my $self = shift;

    my $entry = $self->get_entry();

    my $output = '';
    foreach my $bound_arg (keys %{$self->bind_key_fields}) {
        next if (!($self->config->{key_fields}->{$bound_arg}) && !($se
        $output .= join(' ', $bound_arg, $self->bind_key_fields->{$bou
+nd_arg})." ";

    unless ($entry) {
        print STDERR $output." is not supported on ".$self->page_name.

    unless ($entry->content->{ready}) {
        print STDERR $output." is not ready to run ".$self->agent_name
        return {'not_runnable' => 1};

    if ($entry->content->{$self->agent_name}) {
        my ($status, $running_hostname) = split /\:/, $entry->content-
        if ($status eq 'r') {
            print STDERR $output." is already running ".$self->agent_n
+ame." on ${running_hostname}\n";
            return {'not_runnable' => 1};
        if ($status == 1) {
            print STDERR $output." has already run ".$self->agent_name
            return {'not_runnable' => 1};

        if ($status eq 'F') {
            print STDERR $output." has already Failed ".$self->agent_n
+ame." on a previous run and must be investigated on ${running_hostnam
            return {'not_runnable' => 1};

    if ($self->prerequisites) {
        foreach my $prereq_field (@{$self->prerequisites}) {
            unless ($entry->content->{$prereq_field} == 1) {
                print STDERR $output." has not finished ${prereq_field
                return {'not_runnable' => 1};

    my $content = $entry->content;

    # first attempt to set the hostname of the machine as the value of
+ the agent
    my $hostname = Sys::Hostname::hostname;
    $content->{$self->agent_name} = 'r:'.$hostname;
    eval { 
    if ($@) {
        # this is a collision, which is to be treated as if it is not 
        print STDERR $output." lost ".$self->agent_name." on ${hostnam
        return {'not_runnable' => 1};

    sleep 3;
    my $nentry;
    eval {
        $nentry = $self->get_entry();
    if ($@) {
        # this is a collision, which is to be treated as if it is not 
        print STDERR $output." lost ".$self->agent_name." on ${hostnam
        return {'not_runnable' => 1};

    my $check = $nentry->content->{$self->agent_name};
    my ($status, $running_hostname) = split /\:/, $check;
    return $nentry if ($hostname eq $running_hostname);
    print STDERR $output." lost ".$self->agent_name." on ${hostname}\n
    return {'not_runnable' => 1};

sub fail_entry {
    my $self = shift;
    my $update_entry = shift;

    my $entry = $self->get_entry();
    my $hostname = Sys::Hostname::hostname;
    my $content = $entry->content;
    if ($update_entry) {
        print STDERR "Updating entry\n";
        foreach my $key (keys %{$update_entry}) {
            $content->{$key} = $update_entry->{$key};

    $content->{$self->agent_name} = 'F:'.$hostname;

sub complete_entry {
    my $self = shift;
    my $update_entry = shift;

    print STDERR "All Complete\n";
    my $entry = $self->get_entry();
    my $content = $entry->content;
    if ($update_entry) {
        print STDERR "Updating entry\n";
        foreach my $key (keys %{$update_entry}) {
            $content->{$key} = $update_entry->{$key};
    $content->{$self->agent_name} = 1;

sub mail_error {
    my ($self, $error) = @_;

    my $output = '';
    foreach my $bound_arg (keys %{$self->bind_key_fields}) {
        $output .= join(' ', $bound_arg, $self->bind_key_fields->{$bou
+nd_arg})." ";

    my $prefix = join(' ', Sys::Hostname::hostname, $output, $self->ag
    eval {
        my $mailer = new Net::SMTP::TLS(  
                                          Hello   =>      '
                                          Port    =>      587,  
                                          User    =>      $self->confi
                                          Password =>      $self->conf
        $mailer->datasend(join("\n", $prefix,$error));

1;  # End of IGSP::GoogleAgent

=head1 NAME

IGSP::GoogleAgent - A Distributed Agent System using Google Spreadshee

=head1 VERSION

Version 0.01


  use IGSP::GoogleAgent;

  my $google_agent = IGSP::GoogleAgent->new(
                                          agent_name => $goal,
                                          page_name => $google_page,
                                          debug => $debug,
                                          max_selves => $max, 
                                          bind_key_fields => {
                                               'foo' => 'this_particul
                                          prerequisites => [ 'isitdone
+', 'isthisone' ],
                                          subsumed_by => {
                                                           'someother_' => 3,
+process' => 1

  $google_agent->run_my(sub {
                               print STDERR "THIS ONE PASSES!!!";
                               return 1;

  $google_agent->run_my(sub {
                               print STDERR "THIS ONE FAILS AND EITHER
+ EMAILS OR PRINTS THIS ERROR TO STDERR (depending on debug)!!!";

  $google_agent->run_my(sub {
                               print STDERR "THIS ONE PASSES AND UPDAT
+ES THE 'cool' field in the spreadsheet!!!";
                               return (1, {'cool' => 'really cool'});


  IGSP::GoogleAgent is a framework for creating massively distributed 
  across many different servers, each using the same google spreadshee
+t as a
  control panel.  It is extensible, and flexible.  It doesnt specify w
  goals any pipeline should be working towards, or which goals are pre
  for other goals, but it does provide logic for easily defining these
+ relationships
  based on your own needs.  It does this by providing a subsumption ar
  whereby many small, highly focused agents are written to perform spe
+cific goals,
  and also know what resources they require to perform them.  In addit
+ion, it is
  designed from the beginning to support the creation of simple human-


  Scripts which use IGSP::GoogleAgents must run in the 'agent_bin' dir
  within the same agent root as the 'config' directory where the agent
  file is contained.  See that file for more details on what is config

=head1 METHODS

=head2 new

 This method constructs a new instance of an IGSP::GoogleAgent.  An in
+stance must
 specify its name, the name of the Worksheet within the spreadsheet th
+at it is
 working off, and values for the required key_field(s) within the conf
 which will result in a single row being returned from the given sprea
 Optionally, you can specify an ArrayRef of prerequisite fields in the
+ spreadsheet
 which must be true before the agent can run, whether to print out deb
+ug information
 to the terminal, or email the errors using the configured email only 
+on errors (default),
 the maximum number of agents of this name to allow to run on the give
+n machine,
 and a HashRef of processes which, if a certain number are already run
+ning on the machine,
 should cause the agent to exit without running.

  agent_name => Str
  page_name => Str
  bind_key_fields => HashRef { key_field_name => bound_value, ... }

  prerequisites => []
  debug => Bool
  max_selves => Int
  subsumed_by => { process_name => max_allowed, ... }

  This method will throw an exception if bind_key_fields are
  not supplied for required key_fields, as specified in the

  Also, there must be a field in the spreadsheet name for the agent_na
  This field will be filled in with the status of the agent for a part
  row, e.g. 1 for finished, r:hostname for running, or f:hostname for 

=head2 run_my

  This method takes a subroutine codeRef as an argument.  It then chec
+ks to determine
  if the agent needs to run for the given bind_key_field(s) specified 
+row (it must
  have a 1 in the 'ready' field for the row, and the agent_name field 
+must be empty),
  whether any prerequisite fields are true, whether the agent is subsu
+med by something
  else running on the machine, and whether there are not already max_s
+elves other
  instances of the agent running on the machine.  If all of these are 
+true, it then
  attempts to fill its hostname into the field for the agent_name.  If
+ it succeeds,
  it will then run the code_ref.  If it does not succeed (such as if a
+n instance 
  running on another server already chose that job and won the field) 
+it exits.

  The coderef can do almost anything it wants to do, but it must retur
+n one of the following:

=over 3

=item return true

  This instructs IGSP::GoogleAgent to place a 1 (true) value in the fi
+eld for the agent on
  the spreadsheet, signifying that it has been completed.

=item return false

  This instructs IGSP::GoogleAgent to place F:hostname into the field 
+for the agent on the
  spreadsheet, signifying that it has failed.  It will not run again f
+or this job until the
  failure is cleared from the spreadsheet (by any other agent).

=item return (true|false, HashRef)

  This does what returning true or false does, as well as allowing spe
+cific fields in the 
  spreadsheet to also be modified by the calling code.  The HashRef sh
+ould contain keys
  only for those fields to be updated (it should not attempt to update
+ the field for the
  agent_name itself, as this will be ignored).


  In addition, the coderef can print to STDOUT and STDERR.  If the age
+nt was instantiated in
  debug mode (true), it will print these to their normal destination. 
+ If the agent was
  instantiated without debug mode (the default), STDOUT and STDERR are
+ captured, and, if
  the codeRef returned false, emailed to the address specified in the 
+configuration using the
  same google account that configures access to the google spreadsheet

  One thing the agent must try at all costs to avoid is dying during t
+he subref (e.g. use
  eval for anything that you dont have control over).  It should alway
+s try to return one
  of the valid return states so that the spreadsheet status can be upd
+ated correctly.

=head2 agent_name

 This returns the name of the agent, in case it is needed by the calli
+ng code for other reasons.

=head2 debug

 This returns the debug state specified in the constructor.

=head2 google_db

 This returns the actual Net::Google::Spreadsheet object used
 by the agent, in case other types of queries,  or modifications
 need to be made that do not fit within this system.

=head1 AUTHOR

Darin London, C<< < at> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-igsp-googleagent a
+t>, or through
the web interface at L<
+IGSP-GoogleAgent>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc IGSP::GoogleAgent

You can also look for information at:

=over 4

=item * RT: CPAN's request tracker


=item * AnnoCPAN: Annotated CPAN documentation



=head1 SEE ALSO



Copyright 2009 Darin London.

This program is free software; you can redistribute it and/or modify i
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See for more information.

=head1 PerlMonks Nodes

RFC: 798154
Code: 798311


Comment on Google Spreadsheet Distributed Agent System
Download Code
Replies are listed 'Best First'.
Re: Google Spreadsheet Distributed Agent System
by dmlond (Acolyte) on Oct 13, 2009 at 19:23 UTC

    updated to allow config to be overridden by constructor parameters.

Back to Code Catacombs

Log In?

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://798311]
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others chilling in the Monastery: (12)
As of 2015-12-01 19:01 GMT
Find Nodes?
    Voting Booth?

    My keyboard shows this many letters:

    Results (25 votes), past polls