Beefy Boxes and Bandwidth Generously Provided by pair Networks
Do you know where your variables are?
 
PerlMonks  

Parallel Unique Firefox Sessions

by ground0 (Novice)
on Apr 04, 2013 at 16:07 UTC ( #1027008=CUFP: print w/ replies, xml ) Need Help??

My first CUFP, hope you like it!

The following is an example of nested forks with Parallel::ForkManager, forked DBI calls with DBIx::Connector, and forked WWW::Mechanize::Firefox. This combination allows for easy concurrent unique HTTP sessions with WWW::Mechanize::Firefox on one URL.

MySQL is used here for housing subscription/login data, and a table to store Firefox profile names. The latter is used to avoid a race condition on selecting the Firefox profile to use when constructing $mech objects. Each Firefox profile has been pre-created, and configured with the Mozrepl plugin on a unique TCP/IP port.

The subsequent Perl module contains an example country subroutine which eludes to not-shown encapsulation of the WWW::Mechanize::Firefox and PDF::API2 calls.

Special thanks to Corion, and perlmonks.org chat boxers :)

fork_dbi_mech.pl
#!/usr/bin/perl use DBIx::Connector; use Parallel::ForkManager; use lib './'; use fork_dbi_mech; use strict; # I use constant here for limiting nested forks (4*4) use constant MAXPROCS => 4; # DBI object constructor with dsn my $conn = DBIx::Connector->new( 'DBI:mysql:MyDatabase;host=localhost', 'login', 'password') or die $DBI::errstr; # fork object constructors my $fork_cases = new Parallel::ForkManager(MAXPROCS); my $fork_countries = new Parallel::ForkManager(MAXPROCS); # example SQL query to suss subs to process my $subscription = $conn->dbh->selectall_hashref('SELECT customer_id FROM subscriptions WHERE active = 1 GROUP BY customer_id', 'customer_id' ); # Now get each customer login/id, and suss # their active country subs foreach $subscription_key (keys %$subscription) { $sth = $conn->dbh->prepare('SELECT login FROM customer_info WHERE id = ?' ); $sth->execute($subscription_key); $login = $sth->fetchrow_hashref; $counties = $conn->dbh->selectall_hashref("SELECT country_id FROM subscription_info WHERE customer_id = $subscription_key AND active = 1", 'country_id' ); # First fork by customer's country subs # Then suss each country's cases foreach $country_key (keys %$countries) { $fork_countries->start and next; $sth = $conn->dbh->prepare('SELECT country FROM country_info WHERE id = ? AND active = 1' ); $sth->execute($country_key); $results = $sth->fetchrow_hashref; if ($results->{'country'}) { $country_name = $results->{'country'}; $sth = $conn->dbh->prepare('SELECT filename, case_no FROM batch_info WHERE country LIKE ? AND customer_id = ?' ); $sth->execute('%' . $country_name . '%', $subscription_key); $case = $sth->fetchall_hashref('filename'); # Second fork by customer's cases # Then call subroutine named after country foreach $case_no (keys %$case) { $fork_cases->start and next; $country_name =~ s/[-|\s]//g; # Create fork's own DBI object $conn = DBIx::Connector->new('DBI:mysql:MyDatabase;host=localhost' +, 'login', 'password') or die $DBI::errstr; # Lock the table to avoid race condition $sth = $conn->dbh->prepare('LOCK TABLE profile_info WRITE'); $sth->execute(); # Suss list of available Firefox profiles $sth = $conn->dbh->prepare('SELECT * FROM profile_info'); $sth->execute(); # Pick a random hash value # Delete that value from db $ff_profile_hash = $sth->fetchall_hashref('id'); delete $_->{id} for values %$ff_profile_hash; foreach $ff_profile_temp (keys %$ff_profile_hash) { $ff_profile_hash->{$ff_profile_temp} = $ff_profile_temp; } $ff_profile = $ff_profile_hash->{(keys %$ff_profile_hash)[rand key +s %$ff_profile_hash]}; $sth = $conn->dbh->prepare('DELETE FROM profile_info WHERE id = ?' ); $sth->execute($ff_profile); # Unlock table for next fork $sth = $conn->dbh->prepare('UNLOCK TABLES'); $sth->execute(); $sth->finish(); # Call country subroutine { no strict 'refs'; &$country_name($case->{$case_no}->{'case_no'}, $case_no, $login->{'login'}, $ff_profile ); } # Lock table again and replace Firefox profile $sth = $conn->dbh->prepare('LOCK TABLE profile_info WRITE'); $sth->execute(); $sth = $conn->dbh->prepare('INSERT INTO profile_info (id) VALUES (?)' ); $sth->execute($ff_profile); $sth = $conn->dbh->prepare('UNLOCK TABLES'); $sth->execute(); $sth->finish(); $fork_cases->finish; } $fork_cases->wait_all_children; } $fork_countries->finish; } $fork_countries->wait_all_children; } 1;
fork_dbi_mech.pm
#!/usr/bin/perl package fork_dbi_mech; require Exporter; use DBIx::Connector; use Error qw(:try); use PDF::API2; use String::Random; use Switch; use WWW::Mechanize::Firefox; use strict; our @ISA = qw(Exporter); our @EXPORT = qw( USA UK JAPAN ); # I use $rand to pad cache files for $mech->content, etc. my $rand = new String::Random; sub USA { $args_case = $_[0]; $args_filename = $_[1]; $args_login = $_[2]; $ff_profile = $_[3]; # I based my Mozrepl ports off the default 4242 * 10 # Ports are the sum of of this and the Firefox profile $ff_port = 42420 + $ff_profile; # Now construct the unique $mech object $m = WWW::Mechanize::Firefox->new( launch => ['firefox', '-P', $ff_profile, '-no-remote', '-width', '1024', '-height', '768'], repl => "localhost:$ff_port", bufsize => 10_000_000, tab => 'current', autoclose => 1 ); $url = 'http://usa.example.com'; # Try a $mech->get($url); &tryMech($args_filename, $args_login, $args_case, 'URL', undef, $url ); if ($mech_status == 1) { &tryMech($args_filename, $args_login, $args_case, 'field', 'CaseField', $args_case ); if ($mech_status == 1) { &tryMech($args_filename, $args_login, $args_case, 'click', 'Search' ); if ($mech_status == 1) { &makePDF($args_filename, $args_login, $args_case ); } } } undef $m; sleep(1); } 1;

Comment on Parallel Unique Firefox Sessions
Select or Download Code

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: CUFP [id://1027008]
Approved by ahmad
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others surveying the Monastery: (4)
As of 2014-07-26 14:48 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    My favorite superfluous repetitious redundant duplicative phrase is:









    Results (178 votes), past polls