use HTML::TokeParser::Simple;
my $p = HTML::TokeParser::Simple->new(url => 'http://domain.com/?xxxxxxx');
my $level;
while (my $tag = $p->get_tag('div')) {
my $class = $tag->get_attr('id');
next unless defined($class) and $class eq 'content';
$level += 1;
while (my $token = $p->get_token) {
$level += 1 if $token->is_start_tag('div');
$level -= 1 if $token->is_end_tag('div');
#$_ = s/<([\w-\:]+)>(.*?)<\/\1>/$2 /g;
#print $_;
next unless $token->is_text;
#$cleaned = $token->as_is =~ s/\s{2,}/ /gs; # should remove extra spaces
#print $cleaned;
print $token->as_is;
unless ($level) {
last;
}
}
}