sub parsegooglealertbody {
my ($html,$results) = @_;
my $tree = HTML::TreeBuilder->new_from_content($html);
my @paragraphs = $tree->look_down("_tag","p");
my $type;
for my $i (0..$#paragraphs) {
my $p = $paragraphs[$i];
my $ptext = $p->as_text;
if ( $ptext =~ /blogs alert/i) {
$type = 'blog';
next;
}
elsif ($ptext =~ /news alert/i) {
$type = 'news';
next;
}
elsif ($ptext =~ /create another alert/i) {
$type = '';
next;
}
my ($link,$title,$source,$sourceurl,$excerpt);
if ($type eq 'news') {
[snip]
}
elsif ($type eq 'blog') {
my @anchors = $p->look_down("_tag","a");
if (@anchors) {
nstore \@anchors, "anchors.sto";
$link = $anchors[0]->{href};
$link =~ s/\s+$//;
($title = $anchors[0]->as_text()) =~ s/<.+?>//g ;
$title =~ s/^\s+|\s+$//g;
$sourceurl = $anchors[1]->{href};
(my $temp = $anchors[1]->as_text()) =~ s/<.+?>//g ;
($source) = split(/ \- /,$temp);
$source =~ s/^\s+|\s+$//g;
my $snippet = $p->as_HTML;
$snippet =~ s/
/
/ig;
my @segments = split(/
/i,$snippet);
unless ($segments[1] =~ /color\=\"\#666666\"/i) { # case with no byline
($excerpt = $segments[1]) =~ s/<.+?>//g;
}
else { # case with byline
($excerpt = $segments[2]) =~ s/<.+?>//g;
}
push(@{ $results->{blog} },{link => $link, title => $title, source=> $source, sourceurl => $sourceurl, excerpt => $excerpt});
}
}
}
return $results;
}