in reply to
Re: Extracting data-structure from HTML using Web::Scraper
in thread Extracting data-structure from HTML using Web::Scraper
And XML::Twig since the logic is the same
#!/usr/bin/perl --
use strict; use warnings;
use Data::Dump;
use XML::Twig;
my $sample = q{
<html><body>
<h4 class="bla">July 12</h4>
<p>Tim</p>
<p>Jon</p>
<h4 class="bla">July 13</h4>
<p>James</p>
<p>Eric</p>
<p>Jerry</p>
<p>Susie</p>
<h4 class="date">July 14</h4>
<p>Kami</p>
<p>Darryl</p>
</body></html>
};
my @root;
my $xml = XML::Twig->new(
twig_handlers => {
'//body/h4' => sub {
dd $_->path;
pop @root;
push @root, {}, $_->text;
},
'//body/p' => sub {
dd $_->path;
push @{
$root[-2]->{
$root[-1] # key
}
} , $_->text;
},
},
);
$xml->xparse( $sample );
pop @root if not ref $root[-1];
dd \@root;
__END__
"/html/body/h4"
"/html/body/p"
"/html/body/p"
"/html/body/h4"
"/html/body/p"
"/html/body/p"
"/html/body/p"
"/html/body/p"
"/html/body/h4"
"/html/body/p"
"/html/body/p"
[
{ "July 12" => ["Tim", "Jon"] },
{ "July 13" => ["James", "Eric", "Jerry", "Susie"] },
{ "July 14" => ["Kami", "Darryl"] },
]