#!/usr/bin/env perl use strict; use warnings; use HTML::TreeBuilder; use Data::Dumper; $Data::Dumper::Sortkeys = 1; my $tree = HTML::TreeBuilder->new; $tree->parse( <<'END_OF_HTML' ); Author_name __filler__ New_Author __filler__ END_OF_HTML $tree->eof; # Uncomment to show that as_HTML is a bad fit for this task. # open my $fh , '<', \( $tree->as_HTML('', ' ') ) or die; # print $_ while <$fh>; # exit; my @tags = $tree->find_by_tag_name( qw( span li ) ); my $current_author; my %book_author; my %author_books_HoA; for my $t (@tags) { my $tag_name = $t->tag; if ( $tag_name eq 'span' ) { $current_author = $t->as_trimmed_text; } elsif ( $tag_name eq 'li' ) { next unless $t->parent->tag eq 'ul'; my $book_title = $t->as_trimmed_text; warn if exists $book_author{$book_title}; $book_author{$book_title} = $current_author; push @{ $author_books_HoA{$current_author} }, $book_title; } else { die "Unexpected tag $tag_name" } } print Dumper \%book_author, \%author_books_HoA;