use Modern::Perl;
use HTML::PullParser;
use Data::Dump qw/dump/;
my $document_to_parse = 'html_1.html';
my $p = HTML::PullParser->new(
file => $document_to_parse,
start => '"S", tagname, text',
end => '"E", tagname, text',
text => '"T", text',
) or die "Error: $!";
my @data;
my $text = '';
while ( my $token = $p->get_token ) {
if ( ( $token->[0] eq 'S' and $token->[1] eq 'ul' )
.. ( $token->[0] eq 'E' and $token->[1] eq 'ul' ) )
{
if ( $token->[0] eq 'T' ) {
$token->[1] =~ s/(\s*)$//g;
$text .= "$token->[1] " if $token->[1];
}
if ( ( $token->[0] eq 'S' and $token->[1] eq 'li' )
or ( $token->[0] eq 'E' and $token->[1] eq 'ul' ) )
{
$text =~ s/(\s*)$//g;
push @data, $text if $text;
$text = '';
}
}
}
say dump(@data);