#!/usr/bin/perl
use File::Path; use Data::Dumper;
use LWP::UserAgent;
use XML::RSS::LibXML;
use POSIX qw(strftime);
use Time::HiRes qw(gettimeofday tv_interval);
my $client = LWP::UserAgent->new();
my ($fh, $feed, $feed_title, $count, $node);
my $rss = XML::RSS::LibXML->new;
my $website_name = "usnews";
my $url = "http://www.usnews.com/rss/health-news/index.rss";
$firstListing = 1;
while (1)
{
if ( $website_name eq "" )
{ next; };
print "polling: $website_name url: $url\n";
$capture = $client->get("$url", ":content_file" => "/tmp/.rss_download_file") || die"$!\n";
$rss->parsefile('/tmp/.rss_download_file');
print "channel: $rss->{channel}->{title}\n";
@curListOfItems = ();
foreach my $item (@{ $rss->{items} })
{
my $node_link = $item->{link};
if (defined $node_link)
{
$curItem=$node_link ."\n";
push (@curListOfItems, $curItem);
}
}
if ($#prevListOfItems != -1 )
{
# @newlyAddedLinks will be latest in curListOfItems and not in @prevListOfItems
@newlyAddedLinks=grep!${{map{$_,1}@prevListOfItems}}{$_},@curListOfItems;
foreach my $l (@newlyAddedLinks)
{
my $fileName=getFileName();
$fileName="/tmp/.$website_name\_${fileName}";
my $capture = $client->get("$l", ":content_file" => "$fileName");
# TODO: Pull out the current Item tag ( - .....
)
}
print "Getting1 $filename\n";
}
elsif ( $firstListing == 1)
{
print "Getting2 $filename\n";
foreach my $l (@curListOfItems)
{
my $fileName=getFileName();
$fileName="/tmp/.$website_name\_${fileName}";
my $capture = $client->get("$l", ":content_file" => "$fileName");
# TODO: Pull out the current Item tag ( - .....
)
}
$firstListing = 0;
}
@prevListOfItems = @curListOfItems;
open OUT_FILE, "> /tmp/.$website_name" || die "could not open file $!";
print OUT_FILE "@prevListOfItems";
close OUT_FILE;
sleep 1;
}
sub getFileName
{
my ($seconds, $microseconds) = gettimeofday();
my $padded_usecs = sprintf ('%06d', $microseconds);
my ($logType, $str1, $str2) = split ('\|',$LogElement);
$todaysDate = strftime "%d", localtime;
$currentDateTime = strftime "%Y:%m:%d:%H:%M:%S", localtime;
($Year,$Month,$Date,$Hour,$Minute,$Seconds) = split /:/, $currentDateTime;
$curYear = sprintf ('%04d', $Year);
$curMonth = sprintf ('%02d', $Month);
$curHour = sprintf ('%02d', $Hour);
$curMinute = sprintf ('%02d', $Minute);
$curDate = sprintf ('%02d', $Date);
$curSec = sprintf ('%02d', $Seconds);
my $fname = "${curYear}${curMonth}${curMonth}${curHour}${curMinute}${curSec}.html";
return "$fname";
}