#!/usr/bin/perl use File::Path; use Data::Dumper; use LWP::UserAgent; use XML::RSS::LibXML; use POSIX qw(strftime); use Time::HiRes qw(gettimeofday tv_interval); my $client = LWP::UserAgent->new(); my ($fh, $feed, $feed_title, $count, $node); my $rss = XML::RSS::LibXML->new; my $website_name = "usnews"; my $url = "http://www.usnews.com/rss/health-news/index.rss"; $firstListing = 1; while (1) { if ( $website_name eq "" ) { next; }; print "polling: $website_name url: $url\n"; $capture = $client->get("$url", ":content_file" => "/tmp/.rss_download_file") || die"$!\n"; $rss->parsefile('/tmp/.rss_download_file'); print "channel: $rss->{channel}->{title}\n"; @curListOfItems = (); foreach my $item (@{ $rss->{items} }) { my $node_link = $item->{link}; if (defined $node_link) { $curItem=$node_link ."\n"; push (@curListOfItems, $curItem); } } if ($#prevListOfItems != -1 ) { # @newlyAddedLinks will be latest in curListOfItems and not in @prevListOfItems @newlyAddedLinks=grep!${{map{$_,1}@prevListOfItems}}{$_},@curListOfItems; foreach my $l (@newlyAddedLinks) { my $fileName=getFileName(); $fileName="/tmp/.$website_name\_${fileName}"; my $capture = $client->get("$l", ":content_file" => "$fileName"); # TODO: Pull out the current Item tag ( ..... ) } print "Getting1 $filename\n"; } elsif ( $firstListing == 1) { print "Getting2 $filename\n"; foreach my $l (@curListOfItems) { my $fileName=getFileName(); $fileName="/tmp/.$website_name\_${fileName}"; my $capture = $client->get("$l", ":content_file" => "$fileName"); # TODO: Pull out the current Item tag ( ..... ) } $firstListing = 0; } @prevListOfItems = @curListOfItems; open OUT_FILE, "> /tmp/.$website_name" || die "could not open file $!"; print OUT_FILE "@prevListOfItems"; close OUT_FILE; sleep 1; } sub getFileName { my ($seconds, $microseconds) = gettimeofday(); my $padded_usecs = sprintf ('%06d', $microseconds); my ($logType, $str1, $str2) = split ('\|',$LogElement); $todaysDate = strftime "%d", localtime; $currentDateTime = strftime "%Y:%m:%d:%H:%M:%S", localtime; ($Year,$Month,$Date,$Hour,$Minute,$Seconds) = split /:/, $currentDateTime; $curYear = sprintf ('%04d', $Year); $curMonth = sprintf ('%02d', $Month); $curHour = sprintf ('%02d', $Hour); $curMinute = sprintf ('%02d', $Minute); $curDate = sprintf ('%02d', $Date); $curSec = sprintf ('%02d', $Seconds); my $fname = "${curYear}${curMonth}${curMonth}${curHour}${curMinute}${curSec}.html"; return "$fname"; }