my $dom = Mojo::DOM->new($html); my $body = $dom->at('.article-bodycopy'); $body->find('p, table')->each(sub { my $node = shift; if ($node->{class} eq 'SubHead') { print '\subsection{' . $node->text . "}"; return; } elsif ($node->type eq "table") { my $img = $node->find('img')->[0]->{src}; my $cap = filter($node->find('.Figure1')->[0]); $img =~ s/\.gif/\.png/; print join("\n", '\begin{Figure}', '\centering', '\includegraphics[width=0.65\linewidth,' . 'height=0.85\textheight,keepaspectratio]{' . $img . '}', '\captionof{figure}{' . $cap . '}', '\end{Figure}'); return; } if ($node->children->size == 0) { print filter($node); } else { # node has sub-tags $node->children->each(sub { my $n = shift; my $tag = $n->type; if ($tag eq 'b') { $n->replace('{\bf ' . $n->text . '}'); } else { print STDERR "UNHANDLED MARKUP TYPE: " . $n->type . "\n"; } }); print filter($node); } });