#!/usr/bin/perl -w
use HTML::TreeBuilder;
use Storable qw(nstore retrieve);
use Data::Dumper::HTML qw(dumper_html);
sub google_alert {
use strict 'refs';
my ( $html, $results ) = @_;
my $tree = 'HTML::TreeBuilder'->new_from_content($html);
my (@paragraphs) = $tree->look_down( '_tag', 'p' );
my $type;
foreach my $i ( 0 .. $#paragraphs ) {
my $p = $paragraphs[$i];
my $ptext = $p->as_text;
if ( $ptext =~ /blogs alert/i ) {
$type = 'blog';
next;
}
elsif ( $ptext =~ /news alert/i ) {
$type = 'news';
next;
}
elsif ( $ptext =~ /create another alert/i ) {
$type = '';
next;
}
my ( $link, $title, $source, $sourceurl, $excerpt );
if ( $type eq 'news' ) {
return;
}
elsif ( $type eq 'blog' ) {
my (%anchors) = $p->look_down( '_tag', 'a' );
if (%anchors) {
nstore( \%anchors, 'anchors.sto' );
$link = $anchors{'href'};
$link =~ s/\s+$//;
( $title = $anchors{'href'}->as_text ) =~ s/<.+?>//g;
$title =~ s/^\s+|\s+$//g;
$sourceurl = $anchors{'href'};
( my $temp = $anchors{'href'}->as_text ) =~ s/<.+?>//g;
($source) = split( / \- /, $temp, 2 );
$source =~ s/^\s+|\s+$//g;
my $snippet = $p->as_HTML;
$snippet =~ s[
][
]gi;
my(@segments) = split( /
/i, $snippet, 0 );
if ( not $segments[1] =~ /color="\#666666"/i ) {
( $excerpt = $segments[1] ) =~ s/<.+?>//g;
}
else {
( $excerpt = $segments[2] ) =~ s/<.+?>//g;
}
push @{ $$results{'blog'}; },
{
'link', $link, 'title', $title,
'source', $source, 'sourceurl', $sourceurl,
'excerpt', $excerpt
};
}
}
}
return $results;
}