Beefy Boxes and Bandwidth Generously Provided by pair Networks
Think about Loose Coupling
 
PerlMonks  

download mp3s listed in RSS feed

by blahblahblah (Priest)
on Jan 27, 2007 at 02:36 UTC ( #596817=sourcecode: print w/replies, xml ) Need Help??
Category: web stuff
Author/Contact Info Joe Cullin blahblahblah
Description: Scans WFMU's MP3 archive RSS Feed for certain show titles, and then downloads those shows.

There's no particular reason to use POE::Component::RSSAggregator rather than XML::RSS::Feed, other than the fact that I heard about the POE version first and was interested in trying something in POE. (Thanks again everyone for helping me get around the problems due to my out-of-date POE in POE::Component::RSSAggregator breaks LWP::Simple::get.)

Also, I heartily recommend this station to everyone!

use strict;
use warnings;
use POE qw(Component::RSSAggregator);

#################################################################

my $wantedShowsPattern = join ('|',
                   'sinner',
                   'soulville',
                   'Laura Cantrell',
                   'mister c',
                   'billy jam', # will get "unshackled..." too?
                   'coffee',    # does this feed include coffee2go als
+o?
                   'Dave Emory',

                   # 7 Second Delay?
                   # Ken's show?
                   # Pseu Braun?
                   # Irwin (calypso 2-3PM)?
                  );

my $downloadDir = 'E:/wfmu/';

my @feeds = (
         {
          url   => "http://wfmu.org/archivefeed/mp3.xml",
          name  => "wfmu_mp3",
          delay => 3600,
         },
        );

#################################################################

-d $downloadDir or die("download dir $downloadDir must be created.\n")
+;
-w $downloadDir or die("download dir $downloadDir must be writable.\n"
+);

POE::Session->create(
             inline_states => {
                       _start      => \&init_session,
                       handle_feed => \&handle_feed,
                      },
            );

$poe_kernel->run();

sub init_session
{
  my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
  $heap->{rssagg} = POE::Component::RSSAggregator->new(
        alias    => 'rssagg',
        debug    => 1,
        callback => $session->postback("handle_feed"),
        tmpdir   => 'f:/cgi/wfmu/',
    );
  $kernel->post( 'rssagg', 'add_feed', $_ ) for @feeds;
}

sub handle_feed
{
    my ( $kernel, $feed ) = ( $_[KERNEL], $_[ARG1]->[0] );
    printf "\n========= %s ===============\n", scalar(localtime);
    for my $headline ( $feed->late_breaking_news )
    {
      print $headline->headline() . "\n";

      next unless parseHeadline($headline->headline())->{'show'} =~ m/
+$wantedShowsPattern/i;

      print "\n----- DOWNLOADING ... ---------------------\n";
      print "     url:  " . $headline->url() . "\n";

      processUrl($headline->url());

      print "\n";
    }
}

sub processUrl
{
  my $url = shift;
  if ($url !~ /\.m3u/i) {
    print "Invalid playlist url?\n";
    return;
  }
  use LWP::Simple;
  print "retrieving m3u file...\n";
  my $mp3Url = LWP::Simple::get($url);
  print "mp3 url:  $mp3Url\n";
  if ($mp3Url !~ /mp3$/s) {
    print "Either the get failed or the content is unusable?\n";
    return;
  }

  # example url:
  # http://archive.wfmu.org:5555/archive/BJ/bj070119.mp3

  use URI;
  my $uriObj = URI->new($mp3Url);
  my $uriPath = $uriObj->path();
  use File::Basename;
  my $baseFileName = basename($uriPath);

  if ($baseFileName eq '') {
    print "Botched processing of filename?\n";
    return;
  }

  my $mp3File = $downloadDir . $baseFileName;
  print "file: $mp3File\n";

  if (-e $mp3File) {
    print "File already exists!\n";
    return;
  }

  print "SAVING MP3 TO FILE...\n";
  my $responseCode = getstore($mp3Url, $mp3File);
  print "done saving.\n";
}


sub parseHeadline
{
  my $headline = shift;
  return if $headline eq '';

  $headline =~ s/^WFMU\sMP3\sArchive:\s+//;
  if ($headline =~ s{
                      \sfrom
                      \s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|D
+ec)
                      \s(\d+),
                      \s(\d{4})
                      $
                     }{}six)
  {
    ($mon, $mday, $year) = ($1, $2, $3);
    my $show = $headline;
    return {'show' => $show};

    # (I might eventually with the date, but not yet.)
  }
  else
  {
    print "parse error on headline?\n  ( $_[0] )\n";
  }

  return;
}
Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://596817]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others studying the Monastery: (4)
As of 2019-12-15 05:08 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found

    Notices?