#!/usr/bin/env perl
use strict;
use warnings;
use HTML::TreeBuilder;
use Data::Dumper; $Data::Dumper::Sortkeys = 1;
my $tree = HTML::TreeBuilder->new;
$tree->parse( <<'END_OF_HTML' );
Author_name
__filler__
- book 1 by Author_name
- book 2 by Author_name
New_Author
__filler__
END_OF_HTML
$tree->eof;
# Uncomment to show that as_HTML is a bad fit for this task.
# open my $fh , '<', \( $tree->as_HTML('', ' ') ) or die;
# print $_ while <$fh>;
# exit;
my @tags = $tree->find_by_tag_name( qw( span li ) );
my $current_author;
my %book_author;
my %author_books_HoA;
for my $t (@tags) {
my $tag_name = $t->tag;
if ( $tag_name eq 'span' ) {
$current_author = $t->as_trimmed_text;
}
elsif ( $tag_name eq 'li' ) {
next unless $t->parent->tag eq 'ul';
my $book_title = $t->as_trimmed_text;
warn if exists $book_author{$book_title};
$book_author{$book_title} = $current_author;
push @{ $author_books_HoA{$current_author} }, $book_title;
}
else {
die "Unexpected tag $tag_name"
}
}
print Dumper \%book_author, \%author_books_HoA;