Beefy Boxes and Bandwidth Generously Provided by pair Networks
Problems? Is your data what you think it is?

Parallel Unique Firefox Sessions

by ground0 (Novice)
on Apr 04, 2013 at 16:07 UTC ( #1027008=CUFP: print w/replies, xml ) Need Help??

My first CUFP, hope you like it!

The following is an example of nested forks with Parallel::ForkManager, forked DBI calls with DBIx::Connector, and forked WWW::Mechanize::Firefox. This combination allows for easy concurrent unique HTTP sessions with WWW::Mechanize::Firefox on one URL.

MySQL is used here for housing subscription/login data, and a table to store Firefox profile names. The latter is used to avoid a race condition on selecting the Firefox profile to use when constructing $mech objects. Each Firefox profile has been pre-created, and configured with the Mozrepl plugin on a unique TCP/IP port.

The subsequent Perl module contains an example country subroutine which eludes to not-shown encapsulation of the WWW::Mechanize::Firefox and PDF::API2 calls.

Special thanks to Corion, and chat boxers :)
#!/usr/bin/perl use DBIx::Connector; use Parallel::ForkManager; use lib './'; use fork_dbi_mech; use strict; # I use constant here for limiting nested forks (4*4) use constant MAXPROCS => 4; # DBI object constructor with dsn my $conn = DBIx::Connector->new( 'DBI:mysql:MyDatabase;host=localhost', 'login', 'password') or die $DBI::errstr; # fork object constructors my $fork_cases = new Parallel::ForkManager(MAXPROCS); my $fork_countries = new Parallel::ForkManager(MAXPROCS); # example SQL query to suss subs to process my $subscription = $conn->dbh->selectall_hashref('SELECT customer_id FROM subscriptions WHERE active = 1 GROUP BY customer_id', 'customer_id' ); # Now get each customer login/id, and suss # their active country subs foreach $subscription_key (keys %$subscription) { $sth = $conn->dbh->prepare('SELECT login FROM customer_info WHERE id = ?' ); $sth->execute($subscription_key); $login = $sth->fetchrow_hashref; $counties = $conn->dbh->selectall_hashref("SELECT country_id FROM subscription_info WHERE customer_id = $subscription_key AND active = 1", 'country_id' ); # First fork by customer's country subs # Then suss each country's cases foreach $country_key (keys %$countries) { $fork_countries->start and next; $sth = $conn->dbh->prepare('SELECT country FROM country_info WHERE id = ? AND active = 1' ); $sth->execute($country_key); $results = $sth->fetchrow_hashref; if ($results->{'country'}) { $country_name = $results->{'country'}; $sth = $conn->dbh->prepare('SELECT filename, case_no FROM batch_info WHERE country LIKE ? AND customer_id = ?' ); $sth->execute('%' . $country_name . '%', $subscription_key); $case = $sth->fetchall_hashref('filename'); # Second fork by customer's cases # Then call subroutine named after country foreach $case_no (keys %$case) { $fork_cases->start and next; $country_name =~ s/[-|\s]//g; # Create fork's own DBI object $conn = DBIx::Connector->new('DBI:mysql:MyDatabase;host=localhost' +, 'login', 'password') or die $DBI::errstr; # Lock the table to avoid race condition $sth = $conn->dbh->prepare('LOCK TABLE profile_info WRITE'); $sth->execute(); # Suss list of available Firefox profiles $sth = $conn->dbh->prepare('SELECT * FROM profile_info'); $sth->execute(); # Pick a random hash value # Delete that value from db $ff_profile_hash = $sth->fetchall_hashref('id'); delete $_->{id} for values %$ff_profile_hash; foreach $ff_profile_temp (keys %$ff_profile_hash) { $ff_profile_hash->{$ff_profile_temp} = $ff_profile_temp; } $ff_profile = $ff_profile_hash->{(keys %$ff_profile_hash)[rand key +s %$ff_profile_hash]}; $sth = $conn->dbh->prepare('DELETE FROM profile_info WHERE id = ?' ); $sth->execute($ff_profile); # Unlock table for next fork $sth = $conn->dbh->prepare('UNLOCK TABLES'); $sth->execute(); $sth->finish(); # Call country subroutine { no strict 'refs'; &$country_name($case->{$case_no}->{'case_no'}, $case_no, $login->{'login'}, $ff_profile ); } # Lock table again and replace Firefox profile $sth = $conn->dbh->prepare('LOCK TABLE profile_info WRITE'); $sth->execute(); $sth = $conn->dbh->prepare('INSERT INTO profile_info (id) VALUES (?)' ); $sth->execute($ff_profile); $sth = $conn->dbh->prepare('UNLOCK TABLES'); $sth->execute(); $sth->finish(); $fork_cases->finish; } $fork_cases->wait_all_children; } $fork_countries->finish; } $fork_countries->wait_all_children; } 1;
#!/usr/bin/perl package fork_dbi_mech; require Exporter; use DBIx::Connector; use Error qw(:try); use PDF::API2; use String::Random; use Switch; use WWW::Mechanize::Firefox; use strict; our @ISA = qw(Exporter); our @EXPORT = qw( USA UK JAPAN ); # I use $rand to pad cache files for $mech->content, etc. my $rand = new String::Random; sub USA { $args_case = $_[0]; $args_filename = $_[1]; $args_login = $_[2]; $ff_profile = $_[3]; # I based my Mozrepl ports off the default 4242 * 10 # Ports are the sum of of this and the Firefox profile $ff_port = 42420 + $ff_profile; # Now construct the unique $mech object $m = WWW::Mechanize::Firefox->new( launch => ['firefox', '-P', $ff_profile, '-no-remote', '-width', '1024', '-height', '768'], repl => "localhost:$ff_port", bufsize => 10_000_000, tab => 'current', autoclose => 1 ); $url = ''; # Try a $mech->get($url); &tryMech($args_filename, $args_login, $args_case, 'URL', undef, $url ); if ($mech_status == 1) { &tryMech($args_filename, $args_login, $args_case, 'field', 'CaseField', $args_case ); if ($mech_status == 1) { &tryMech($args_filename, $args_login, $args_case, 'click', 'Search' ); if ($mech_status == 1) { &makePDF($args_filename, $args_login, $args_case ); } } } undef $m; sleep(1); } 1;

Log In?

What's my password?
Create A New User
Node Status?
node history
Node Type: CUFP [id://1027008]
Approved by ahmad
[virtualsue]: weird. my p6 simple web spider fails to extract anchor tags from
[holli]: Nice. Turns out I don't have to remove Sweet home Alabama from my playlist ;)
[Corion]: virtualsue: We have some links with a newline between <a and the attributes, maybe your extractor fails to handle that?
[virtualsue]: It's not getting any at all, which seems quite broken

How do I use this? | Other CB clients
Other Users?
Others musing on the Monastery: (6)
As of 2017-12-13 11:52 GMT
Find Nodes?
    Voting Booth?
    What programming language do you hate the most?

    Results (360 votes). Check out past polls.