The role
1.12
Please note that this version contains some annoying errors mistakes. Use 1.17 instead. See the explanations from marioroy below in this thread.
package MyRole;
# $Id: MyRole.pm,v 1.12 2017/06/17 14:00:17 karl Exp karl $
use Role::Tiny;
use threads;
use MCE::Loop;
use MCE::Shared;
use MCE::Mutex;
use WWW::Curl::Easy;
use Config::Tiny;
my $cfg = Config::Tiny->read(q(MyRole.cfg));
MCE::Loop::init {
max_workers => $cfg->{params}->{workers},
chunk_size => 1,
interval => $cfg->{params}->{interval},
};
my $fetch = sub {
my $curl = WWW::Curl::Easy->new;
my ( $header, $body );
$curl->setopt( CURLOPT_URL, shift );
$curl->setopt( CURLOPT_WRITEHEADER, \$header );
$curl->setopt( CURLOPT_WRITEDATA, \$body );
$curl->setopt( CURLOPT_FOLLOWLOCATION, $cfg->{params}->{followloca
+tion} );
$curl->setopt( CURLOPT_TIMEOUT, $cfg->{params}->{timeout} )
+;
$curl->perform;
{
header => $header,
body => $body,
info => $curl->getinfo(CURLINFO_HTTP_CODE),
error => $curl->errbuf,
};
};
sub uagent {
my $urls = $_[1];
my $shared = MCE::Shared->hash;
my $mutex = MCE::Mutex->new;
mce_loop {
MCE->yield;
$mutex->enter( $shared->set( $_ => $fetch->($_) ) );
}
$urls;
my $iter = $shared->iterator();
my $result;
while ( my ( $url, $data ) = $iter->() ) {
$result->{$url} = $data;
}
$result;
}
1;
__END__
package MyRole;
# $Id: MyRole.pm,v 1.17 2017/06/18 08:45:19 karl Exp karl $
use Role::Tiny;
use threads;
use MCE::Loop;
use MCE::Shared;
use WWW::Curl::Easy;
use Config::Tiny;
my $cfg = Config::Tiny->read(q(MyRole.cfg));
MCE::Loop::init {
max_workers => $cfg->{params}->{workers},
chunk_size => 1,
interval => $cfg->{params}->{interval},
};
my $fetch = sub {
my $curl = WWW::Curl::Easy->new;
my ( $header, $body );
$curl->setopt( CURLOPT_URL, shift );
$curl->setopt( CURLOPT_WRITEHEADER, \$header );
$curl->setopt( CURLOPT_WRITEDATA, \$body );
$curl->setopt( CURLOPT_FOLLOWLOCATION, $cfg->{params}->{followloca
+tion} );
$curl->setopt( CURLOPT_TIMEOUT, $cfg->{params}->{timeout} )
+;
$curl->perform;
{
header => $header,
body => $body,
info => $curl->getinfo(CURLINFO_HTTP_CODE),
error => $curl->errbuf,
};
};
sub uagent {
my $urls = $_[1];
my $shared = MCE::Shared->hash;
mce_loop {
MCE->yield;
$shared->set( $_ => $fetch->($_) );
}
$urls;
$shared->export;
}
1;
__END__
The config file
# $Id: MyRole.cfg,v 1.4 2017/06/17 13:48:19 karl Exp karl $
[params]
timeout=10
followlocation=1
interval=0.005
workers=auto
The class
# $Id: MyClass.pm,v 1.5 2017/06/16 15:35:32 karl Exp karl $
package MyClass;
use Class::Tiny;
use Role::Tiny::With;
with qw(MyRole);
1;
__END__
The app
#!/usr/bin/env perl
# $Id: run.pl,v 1.14 2017/06/17 14:43:57 karl Exp karl $
use strict;
use warnings;
use MyClass;
use Data::Dump;
use HTML::Strip::Whitespace qw(html_strip_whitespace);
use feature qw(say);
my @urls = grep { $_ ne "" } <DATA>;
chomp @urls;
my $object = MyClass->new;
my $result = $object->uagent( \@urls );
# dd $result;
while ( my ( $url, $data ) = each %$result ) {
say qq($url);
say $data->{header};
# my $html;
# html_strip_whitespace(
# 'source' => \$data->{body},
# 'out' => \$html
# );
# say $html;
}
__DATA__
http://fantasy.xecu.net
http://perlmonks.org
http://stackoverflow.com
http://www.trumptowerny.com
http://www.maralagoclub.com
http://www.sundialservices.com
Update: Fixed mistakes. Thank you marioroy.
Update2: Deleted unused module.
Best regards, Karl
«The Crux of the Biscuit is the Apostrophe»
Furthermore I consider that Donald Trump must be impeached as soon as possible