#!c:\perl\bin\perl
use LWP::UserAgent;
use XML::RSS;
#We're running this off of a Windows machine, connecting to a M$SQL se
+rver
# although any old SQL server would do (e.g. MySQL)
use Win32::ODBC;
$DSN = "TESTSERVER";
#Create a new UserAgent to pull the XML data down
$ua = new LWP::UserAgent;
$ua->agent("HeadlineAgent/0.1 ".$ua->agent);
#connect via ODBC to the SQL server
if(!($db = new Win32::ODBC($DSN))){
print "Error connecting to $DSN\n";
print "Error: " . Win32::ODBC::Error() . "\n";
exit;
}
# We'll be pulling in RSS files from various sources,
# their URL's are stored in the SQL database
my %sources;
if($db->Sql("SELECT * FROM ExternalNewsSources"))
{
print "SQL failed.\n";
print "Error: " . $db->Error() . "\n";
$db->Close();
exit;
}
while($db->FetchRow()){
my(%data) = $db->DataHash();
# ...process the data...
# Add to hash of hashes
$sources{$data{'ExternalNewsSourceID'}} = $data{'Source'};
}
#Create the RSS object to parse the RSS files retrieved...
my $rss = new XML::RSS;
($sec,$min,$hour,$mday,$mon,$year) = localtime(time);
# preformatted string compatible with SQLServer's timestamp field
$nowstring = sprintf("%02i/%02i/%i %02i:%02i:%02i",($mon+1),$mday,($ye
+ar+1900),$hour,$min,$sec);
#Walk through each of the XML sources
foreach $sourceid(keys %sources)
{
# fetch RSS file from the source's URL
my $request = new HTTP::Request GET => $sources{$sourceid};
my $result = $ua->request($request);
if($result->is_success)
{
# grok the RSS file retrieved
$rss->parse($result->content);
# Step through all the links in the RSS
for my $i (@{$rss->{items}})
{
# Check to see if we've already seen this link from this source
+before...
$db->Sql("SELECT * FROM ExternalNews WHERE SourceID=".$sou
+rceid." AND Link = '".$i->{'link'}."'");
if($db->FetchRow())
{
#skip it - it's here already...
}
#Sometimes the RSS mis-parses and give us an empty item
elsif(length($i->{'title'}) <= 0)
{
#skip it - it's empty...
}
else
{
#Plunk it into the database
$db->Sql("INSERT INTO ExternalNews (SourceID,PostDate,
+Title,Link,Description) VALUES ($sourceid,'$nowstring','".$i->{'title
+'}."','".$i->{'link'}."','".$i->{'description'}."')");
}
# Nuke the current values in the object, it appears that the XML lib r
+ecycles the variables without clearing them...
$i->{'title'} = '';
$i->{'link'} = '';
$i->{'description'} = '';
}
}
else
{
print "Doh! couldnt get ".$sources{$sourceid}.": $!\n";
}
}
#clean up
$db->Close();
Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
Read Where should I post X? if you're not absolutely sure you're posting in the right place.
Please read these before you post! —
Posts may use any of the Perl Monks Approved HTML tags:
- a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
Outside of code tags, you may need to use entities for some characters:
| |
For: |
|
Use: |
| & | | & |
| < | | < |
| > | | > |
| [ | | [ |
| ] | | ] |
Link using PerlMonks shortcuts! What shortcuts can I use for linking?
See Writeup Formatting Tips and other pages linked from there for more info.
|
|