my $dom = Mojo::DOM->new($html);
my $body = $dom->at('.article-bodycopy');
$body->find('p, table')->each(sub {
my $node = shift;
if ($node->{class} eq 'SubHead') {
print '\subsection{' . $node->text . "}";
return;
} elsif ($node->type eq "table") {
my $img = $node->find('img')->[0]->{src};
my $cap = filter($node->find('.Figure1')->[0]);
$img =~ s/\.gif/\.png/;
print join("\n", '\begin{Figure}',
'\centering',
'\includegraphics[width=0.65\linewidth,' .
'height=0.85\textheight,keepaspectratio]{' . $img . '}',
'\captionof{figure}{' . $cap . '}', '\end{Figure}');
return;
}
if ($node->children->size == 0) {
print filter($node);
} else {
# node has sub-tags
$node->children->each(sub {
my $n = shift;
my $tag = $n->type;
if ($tag eq 'b') {
$n->replace('{\bf ' . $n->text . '}');
} else {
print STDERR "UNHANDLED MARKUP TYPE: " . $n->type . "\n";
}
});
print filter($node);
}
});