#!/usr/bin/perl --
use strict;
use warnings;
use Data::Dumper;
use Web::Scraper;
my $html_content = <<'__HTML__';
key1 | val1 |
key2 | val2 |
key3 | val3 |
key4 | val4 |
key5 | val5 |
key6 | val6 |
key7 | val7 |
key8 | val8 |
key9 | val9 |
key10 | val10 |
key11 | val11 |
|
__HTML__
my @pdata;
push @pdata, scraper {
process '//*/table[@class="someclass"]//tr[position() mod 2 = 1]',
'table[]' => scraper {
process '//tr/td[1]', 'name' => 'TEXT';
process '//tr/td[2]', 'attr' => 'TEXT';
};
};
push @pdata, scraper {
process 'div p',
## process '//div/p/*[@href or @src]',
'divp[]' => scraper {
process 'a', 'link' => '@href';
process 'img', 'image' => '@src';
};
};
for my $pagedata ( @pdata ){
my $res = $pagedata->scrape( \$html_content )
or die "Can't define content to parser $!";
print Dumper( $res ), "\n\n";
}
__END__
$VAR1 = {
'table' => [
{
'name' => 'key1',
'attr' => 'val1'
},
{
'name' => 'key3',
'attr' => 'val3'
},
{
'name' => 'key5',
'attr' => 'val5'
},
{
'name' => 'key7',
'attr' => 'val7'
},
{
'name' => 'key9',
'attr' => 'val9'
},
{
'name' => 'key11',
'attr' => 'val11'
}
]
};
$VAR1 = {
'divp' => [
{
'link' => 'http://example.com/',
'image' => 'http://example.com/example.png'
},
{
'link' => 'http://example.com/01',
'image' => 'http://example.com/01/example.png'
}
]
};