#!/usr/bin/perl --
use strict;
use warnings;
use Data::Dumper;
use Web::Scraper;
my $html_content = <<'__HTML__';
key1 | val1 |
key2 | val2 |
key3 | val3 |
key4 | val4 |
key5 | val5 |
key6 | val6 |
key7 | val7 |
key8 | val8 |
key9 | val9 |
key10 | val10 |
key11 | val11 |
|
__HTML__
my @pdata;
push @pdata, scraper {
process '//*/table[@class="someclass"]',
'table[]' => scraper {
process '//tr/td[1]', 'name' => 'TEXT';
process '//tr/td[2]', 'attr' => 'TEXT';
};
};
push @pdata, scraper {
process '//*/table[@class="someclass"]',
'table[]' => scraper {
process '//tr/td[1]', 'name[]' => 'TEXT';
process '//tr/td[2]', 'attr[]' => 'TEXT';
};
};
push @pdata, scraper {
process '//*/table[@class="someclass"]//tr',
'table[]' => {
process('//tr/td[1]', 'name[]' => 'TEXT'),
process('//tr/td[2]', 'attr[]' => 'TEXT'),
};
};
push @pdata, scraper {
process '//*/table[@class="someclass"]//tr',
'table[]' => scraper {
process '//tr/td[1]', 'name' => 'TEXT';
process '//tr/td[2]', 'attr' => 'TEXT';
};
};
push @pdata, scraper {
process '//*/table[@class="someclass"]//tr',
'table[]' => scraper {
process '//tr/td[1]', 'name' => 'TEXT';
process '//tr/td[2]', 'attr' => 'TEXT';
};
result 'table';
};
push @pdata, scraper {
process '//*/table[@class="someclass"]//tr',
'table[]' => scraper {
process '//tr/td[1]', 'name[]' => 'TEXT';
process '//tr/td[2]', 'attr[]' => 'TEXT';
};
};
for my $pagedata ( @pdata ){
my $res = $pagedata->scrape( \$html_content )
or die "Can't define content to parser $!";
print Dumper( $res ), "\n\n";
}
__END__
$VAR1 = {
'table' => [
{
'name' => 'key1',
'attr' => 'val1'
}
]
};
$VAR1 = {
'table' => [
{
'name' => [
'key1',
'key2',
'key3',
'key4',
'key5',
'key6',
'key7',
'key8',
'key9',
'key10',
'key11'
],
'attr' => [
'val1',
'val2',
'val3',
'val4',
'val5',
'val6',
'val7',
'val8',
'val9',
'val10',
'val11'
]
}
]
};
$VAR1 = {
'table' => [
undef,
undef,
undef,
undef,
undef,
undef,
undef,
undef,
undef,
undef,
undef
],
'name' => [
'key1val1key2val2key3val3key4val4key5val5key6val6key7val7key8val8key9val9key10val10key11val11',
'key1',
'key2',
'key3',
'key4',
'key5',
'key6',
'key7',
'key8',
'key9',
'key10',
'key11'
],
'attr' => [
'val1',
'val2',
'val3',
'val4',
'val5',
'val6',
'val7',
'val8',
'val9',
'val10',
'val11'
]
};
$VAR1 = {
'table' => [
{
'name' => 'key1',
'attr' => 'val1'
},
{
'name' => 'key2',
'attr' => 'val2'
},
{
'name' => 'key3',
'attr' => 'val3'
},
{
'name' => 'key4',
'attr' => 'val4'
},
{
'name' => 'key5',
'attr' => 'val5'
},
{
'name' => 'key6',
'attr' => 'val6'
},
{
'name' => 'key7',
'attr' => 'val7'
},
{
'name' => 'key8',
'attr' => 'val8'
},
{
'name' => 'key9',
'attr' => 'val9'
},
{
'name' => 'key10',
'attr' => 'val10'
},
{
'name' => 'key11',
'attr' => 'val11'
}
]
};
$VAR1 = [
{
'name' => 'key1',
'attr' => 'val1'
},
{
'name' => 'key2',
'attr' => 'val2'
},
{
'name' => 'key3',
'attr' => 'val3'
},
{
'name' => 'key4',
'attr' => 'val4'
},
{
'name' => 'key5',
'attr' => 'val5'
},
{
'name' => 'key6',
'attr' => 'val6'
},
{
'name' => 'key7',
'attr' => 'val7'
},
{
'name' => 'key8',
'attr' => 'val8'
},
{
'name' => 'key9',
'attr' => 'val9'
},
{
'name' => 'key10',
'attr' => 'val10'
},
{
'name' => 'key11',
'attr' => 'val11'
}
];
$VAR1 = {
'table' => [
{
'name' => [
'key1'
],
'attr' => [
'val1'
]
},
{
'name' => [
'key2'
],
'attr' => [
'val2'
]
},
{
'name' => [
'key3'
],
'attr' => [
'val3'
]
},
{
'name' => [
'key4'
],
'attr' => [
'val4'
]
},
{
'name' => [
'key5'
],
'attr' => [
'val5'
]
},
{
'name' => [
'key6'
],
'attr' => [
'val6'
]
},
{
'name' => [
'key7'
],
'attr' => [
'val7'
]
},
{
'name' => [
'key8'
],
'attr' => [
'val8'
]
},
{
'name' => [
'key9'
],
'attr' => [
'val9'
]
},
{
'name' => [
'key10'
],
'attr' => [
'val10'
]
},
{
'name' => [
'key11'
],
'attr' => [
'val11'
]
}
]
};