#! /usr/bin/perl
use HTML::TreeBuilder::XPath;
use WWW::Mechanize;
use warnings;
use diagnostics;
use constant MAX_CHILDREN => 3;
open(INPUT,"<",$input) || die("Couldn't read from the file, $input with error: $!\n");
open(OUTPUT, ">>", $output) || die("Couldn't open the file, $output with error: $!\n");
$pm = Parallel::ForkManager->new(MAX_CHILDREN);
$mech=WWW::Mechanize->new();
$mech->stack_depth(0);
while()
{
chomp $_;
$url=$_;
$pm->start() and next;
$mech->get($url);
if($mech->success)
{
$tree=HTML::TreeBuilder::XPath->new();
$tree->parse($mech->content);
# do some processing here on the content and print the results to OUTPUT file
# once done then delete the root node
$tree->delete();
}
$pm->finish();
print "Child Processing finished\n"; # it never reaches this point!
}
$pm->wait_all_children;