sub parsegooglealertbody { my ($html,$results) = @_; my $tree = HTML::TreeBuilder->new_from_content($html); my @paragraphs = $tree->look_down("_tag","p"); my $type; for my $i (0..$#paragraphs) { my $p = $paragraphs[$i]; my $ptext = $p->as_text; if ( $ptext =~ /blogs alert/i) { $type = 'blog'; next; } elsif ($ptext =~ /news alert/i) { $type = 'news'; next; } elsif ($ptext =~ /create another alert/i) { $type = ''; next; } my ($link,$title,$source,$sourceurl,$excerpt); if ($type eq 'news') { [snip] } elsif ($type eq 'blog') { my @anchors = $p->look_down("_tag","a"); if (@anchors) { nstore \@anchors, "anchors.sto"; $link = $anchors[0]->{href}; $link =~ s/\s+$//; ($title = $anchors[0]->as_text()) =~ s/<.+?>//g ; $title =~ s/^\s+|\s+$//g; $sourceurl = $anchors[1]->{href}; (my $temp = $anchors[1]->as_text()) =~ s/<.+?>//g ; ($source) = split(/ \- /,$temp); $source =~ s/^\s+|\s+$//g; my $snippet = $p->as_HTML; $snippet =~ s/
/
/ig; my @segments = split(/
/i,$snippet); unless ($segments[1] =~ /color\=\"\#666666\"/i) { # case with no byline ($excerpt = $segments[1]) =~ s/<.+?>//g; } else { # case with byline ($excerpt = $segments[2]) =~ s/<.+?>//g; } push(@{ $results->{blog} },{link => $link, title => $title, source=> $source, sourceurl => $sourceurl, excerpt => $excerpt}); } } } return $results; }