I was able to use the HTML::TreeBuilder module to get some structure to the output.
# Parse all of the contents of $file.
my $parser = HTML::TreeBuilder->new ();
$parser->parse_file ($file);
# Now display the contents of $parser.
recurse ($parser, 0);
exit;
# This displays the contents of $node and any children it may
# have. The variable $depth is the indentation used.
sub recurse
{
my ($node, $depth) = @_;
# Print indentation according to the level of recursion.
print " " x $depth;
# If $node is a reference, then it is an HTML::Element.
if (ref $node) {
# Print the tag associated with $node, for example "html" or
# "li".
print $node->tag (), "\n";
# $node->content_list () returns a list of child nodes of
# $node, which we store in @children.
my @children = $node->content_list ();
for my $child_node (@children) {
recurse ($child_node, $depth + 1);
}
}
else {
# If $node is not a reference, then it is just a piece of text
# from the HTML file.
print $node, "\n";
}
}
How can I extract the data from the following tags?
div
div
FillDB File Size Limit:
div
0.0% ( 0 / 3145728 Bytes )
div
div
FillDB File Count Limit:
div
0.0% ( 0 / 10000 Files )