Install the required modules for this RSS aggregator, then customize the MySQL database table it uses to look like this, otherwise you won't be able to actually store the RSS feeds you poll, and the code will fail:
+---------+--------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+---------+--------------+------+-----+---------+-------+
| feedurl | varchar(255) | | PRI | | |
| nextup | int(11) | YES | | NULL | |
| lastmod | varchar(40) | YES | | NULL | |
| etag | varchar(250) | YES | | NULL | |
| content | longtext | YES | | NULL | |
+---------+--------------+------+-----+---------+-------+
Then set up the code to run as a cron job every hour.
#!/usr/bin/perl -w
use strict; use warnings;
# list your feeds below in the format shown; leave the rest of the fil
+e alone
my(@feeds) = (
# feedurl # forced refresh i
+n seconds
['http://rss.news.yahoo.com/rss/world', 60 * 60], # h
+ourly
['http://www.microsite.reuters.com/rss/topNews', 60 * 60], # h
+ourly
['http://feeds.feedburner.com/TommysNewsAndWorldReport', 60 * 60],
+# hourly
['http://perlmonks.org/index.pl?node_id=30175&xmlstyle=rss', 60 * 6
+0], # hourly
['http://www.wordsmith.org/awad/rss1.xml', 60 * 60 * 24], # d
+aily
['http://xml.education.yahoo.com/rss/wotd/', 60 * 60 * 24], # d
+aily
['http://netrn.net/spywareblog/feed/rss2/', 60 * 60 * 24], # d
+aily
);
# globals
use vars qw( $dbh );
# libraries
use XML::RSS::TimingBotDBI;
use DBI;
# connect to DB
$dbh = DBI->connect(
q[DBI:mysql:] .
qq[database=myrssfeeds;] .
qq[host=localhost;] .
qq[port=3306],
'[PUT YOUR USERNAME HERE]', # MySQL DB username
'[PUT YOUR PASSWORD HERE]', # ...and password
{ 'RaiseError' => 0, 'AutoCommit' => 1 }
) or die qq[Aborting! Failed to connect to database: $DBI::errstr];
foreach (@feeds) {
my($feed) = $_;
# check for an entry in the db corresponding to this feed
my($row) = ( $dbh->selectrow_array(<<__SQL__, undef, $feed->[0]) )[
+0];
SELECT feedurl FROM feeds WHERE feedurl = ?
__SQL__
unless ($row) { # auto-create db entry for this feed if it doesn't
+exist
$dbh->do(q[INSERT INTO feeds SET feedurl = ?], undef, $feed->[0]
+)
}
# grab the feed and thbbbtave it
getfeed(@$_);
}
sub getfeed {
my($rssurl,$maxage) = @_;
# initialize the RSS bot!
my($rssbot) = XML::RSS::TimingBotDBI->new;
$rssbot->rssagent_dbh($dbh);
$rssbot->rssagent_table('feeds');
$rssbot->maxAge($maxage) if $maxage;
$rssbot->maxAge($maxage) if $maxage;
# grab the RSS feed
my($response) = $rssbot->get($rssurl);
# check response code
if ($response->code == 200) {
# save RSS feed content if it was successfully retrieved
my($sth) =
$dbh->prepare(q[UPDATE feeds SET content = ? WHERE feedurl =
+?])
or die q[RSSBOT: Aborting! Problem encountered with MySQL: ]
. $DBI::errstr;
$sth->execute($response->content, $rssurl)
or die q[RSSBOT: Aborting! Problem encountered with MySQL: ]
. $DBI::errstr;
$sth->finish();
print qq[RSSBOT: RSS feed "$rssurl" freshly retrieved to databas
+e\n]
}
elsif ($response->code == 304) {
print qq[RSSBOT: feed "$rssurl" already up to date. No need to
+refresh\n]
}
else {
# report the error and abort if there was a problem getting the
+feed
die qq[RSSBOT: Aborting! Problem accessing feed "$rssurl": ]
. $response->status_line
}
# have the rss bot save it's RSS lookup history...
# $rssbot->commit; #<-- only necessary if MySQL auto-commit is off
# ...or die trying
die q[RSSBOT: Aborting! Problem encountered while working with MyS
+QL: ]
. $DBI::errstr if $DBI::errstr;
# update OK
print qq[RSSBOT: update OK at ${\ scalar localtime }\n];
}
# scram
exit;
# disconnect if not already disconnected
END { $dbh->disconnect() if defined $dbh }
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.