Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??
package WWW::CAKEmusic::News; use strict; # Debug use warnings; # use diagnostics; # use LWP::Debug qw( + ); # use Data::Dumper; use Carp; use WWW::Mechanize; use HTML::TokeParser::Simple; use constant URL => 'http://www.cakemusic.com/news.html'; use constant FEED_TITLE => 'CAKE News'; use constant FEED_DESC => 'Recent news from the band, CAKE.'; # Could've used a Date module... my %months = ( jan => '01', feb => '02', mar => '03', apr => '04', may => '05', jun => '06', jul => '07', aug => '08', sep => '09', oct => '10', nov => '11', dec => '12' ); sub new { my $class = shift; my $self = {}; bless $self, $class; $self->fetch; return $self; } # Fetch the news page sub fetch { my $self = shift; my $agent = WWW::Mechanize->new; $agent->get( URL ); croak 'Error fetching ' . URL . ': ' . $agent->response->status_li +ne unless $agent->success; $self->{ _raw } = $agent->content; $self->parse; } # Parse the data sub parse { my $self = shift; my $capture = 0; my $content; # remove all irrelevant parts of the content foreach( split( /\n/, $self->{ _raw } ) ) { $capture++ if /Newsflash for/ and not $capture; last if /<DL>/; next if not $capture or /^$/; s/\s+/ /g; $content .= $_; } my $parser = HTML::TokeParser::Simple->new( \$content ); # this will make sure text tokens won't be split $parser->unbroken_text(1); my @items; while( my $token = $parser->get_token ) { $_ = $token->as_is; next unless /\S/; if( $token->is_text ) { # new day of news if( /Newsflash for/ ) { push @items, { title => $_, descriptions => [] }; s/^Newsflash for (Week of)?\s*//i; /^(...)\S* (\d+). .*(.{4})$/; $items[ -1 ]->{ date } = join( '-', $3, $months{ lc( $ +1 ) }, sprintf( '%02d', $2 ) ); } # just plain text else { $items[ -1 ]->{ descriptions }->[ -1 ] .= $_; } } # each news item is a list-item elsif( $token->is_start_tag( 'li' ) ) { push @{ $items[ -1 ]->{ descriptions } }, ''; } } $self->{ items } = \@items; } # use XML::RSS to make an RSS feed sub as_rss { my $self = shift; require XML::RSS; my $feed = XML::RSS->new; $feed->channel( title => FEED_TITLE, link => URL, description => FEED_DESC ); for my $item ( @{ $self->{ items } } ) { for my $index ( 1..scalar @{ $item->{ descriptions } } ) { $feed->add_item( title => $item->{ title }, link => URL . '#' . $item->{ date } . '-' . ( $ +index ), description => $item->{ descriptions }->[ $index - 1 ] +, dc => { date => $item->{ date } } ); } } return $feed->as_string; } # use XML::Atom to make an Atom feed sub as_atom { my $self = shift; require XML::Atom::Feed; require XML::Atom::Entry; my $feed = XML::Atom::Feed->new; $feed->title( FEED_TITLE ); for my $item ( @{ $self->{ items } } ) { for my $desc ( @{ $item->{ descriptions } } ) { my $entry = XML::Atom::Entry->new; $entry->title( $item->{ title } ); $entry->content( $desc ); $feed->add_entry( $entry ); } } return $feed->as_xml; } package main; my $news = WWW::CAKEmusic::News->new; my $output = "as_$ARGV[ 0 ]"; print $news->$output; =head1 NAME cakenews - grab the latest CAKE news in Atom or RSS format =head1 SYNOPSIS cakenews.pl rss > cake.rss cakenews.pl atom > cake.atom =head1 DESCRIPTION This script grabs the contents of CAKE's news page, parses the entries + and outputs it as either an RSS feed or Atom feed. =head1 NOTICE Please do not abuse CAKE's server with this script. Consider using WWW::Mechanize::Cached if you want to use this on a regular basis.

In reply to cakenews.pl - Syndicate CAKE news by LTjake

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others making s'mores by the fire in the courtyard of the Monastery: (3)
As of 2024-04-24 21:37 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found