This small script generates a HTML page with all of my favorite online comics. Since it can takes some times to surf from one site to another each day, I whipped up this script to do it for me each morning, so that I can enjoy my breakfast without having to frantically visit each individual page.
update: added a list of the comics for quick access and changed a bit the %cartoons hash.
#!/usr/bin/perl -w
use strict;
use LWP::Simple qw(get);
use HTML::Parser;
use URI;
use CGI qw(:html);
my $file = 'daily_comic.html';
my $wday = (localtime)[6];
my %cartoons = (
'The New Bobbins Show' => {
url => 'http://www.bobbins.org',# URL of the page with the pi
+cture
src => qr/\d{8}\.png$/, # Regex to find the image on
+the page
days => [ 0, 1, 2, 3, 4, 5, ], # When is the comic published
},
'Diesel Sweeties' => {
url => 'http://www.dieselsweeties.com',
src => qr/sw\d+.png$/,
days => [ 0, 1, 2, 3, 4, 5, ],
},
'RPG World' => {
url => 'http://www.rpgworldcomic.com',
src => qr/\d{8}\w?\.jpg$/,
days => [ 0, 3, 5, ],
},
'Gene Catlow' => {
url => 'http://www.genecatlow.com',
src => qr/\d{8}\.gif$/,
days => [ 0, 1, 3, 5, ],
},
'User Friendly' => {
url => 'http://www.userfriendly.org/static',
src => qr/uf\d+\.gif$/,
days => [ 0, 1, 2, 3, 4, 5, 6 ],
},
'Goats' => {
url => 'http://www.goats.com',
src => qr/goats\d+.gif$/,
days => [ 0, 1, 3, 5, ],
},
'Penny Arcade' => {
url => 'http://www.penny-arcade.com/view.php3',
src => qr/\d{8}\w?.gif$/,
days => [ 0, 1, 3, 5, ],
},
'Angst Technologies' => {
url => 'http://www.inktank.com/AT/index.cfm',
src => qr/\d\d-\d\d-\d\d.gif$/,
days => [ 0, 1, 2, 3, 4, 5, ],
},
'Indy Rock Pete' => {
url => 'http://www.indierockpete.com/',
src => qr/p\d+f\.gif$/,
days => [ 0, 1, 2, 3, 4, 5, ],
},
'Sinfest' => {
url => 'http://www.sinfest.net/',
src => qr/sf\d{8}.gif$/,
days => [ 0, 1, 2, 3, 4, 5, 6 ],
},
'Dilbert' => {
url => 'http://www.dilbert.com/',
src => qr/dilbert\d+.gif$/,
days => [ 0, 1, 2, 3, 4, 5, 6 ],
},
);
# Write the HTML page
open( COMIC, "> $file" ) or die "Cannot create the file '$file': $!\n"
+;
select COMIC;
print start_html(
-head => meta(
{
-http_equiv => 'Content-Type',
-content => 'text/html; charset=utf-8'
}
),
-title => "Briac's Daily Cartoon Delivery",
),
h1("Briac's Daily Cartoon Delivery"),
ul(
li( [ map { a({-href=>"#$_"},$_) } sort keys %cartoons ] )
);
# Grab the different pictures
foreach my $site ( sort keys %cartoons ) {
# Get only the comics if it is published this day.
next unless grep { $_ == $wday } @{ $cartoons{$site}->{'days'} };
# Fetch the page with LWP::Simple
my $page = get($cartoons{$site}->{'url'})
or warn "Could not get '$site'\n";
# HTML Parser fetching the image matching the pattern defined
# in the %cartoons hash
my $parser = HTML::Parser->new(
start_h => [
sub {
my $attr = shift;
return if $attr->{'src'} !~ $cartoons{$site}->{'src'};
$cartoons{$site}->{'img'} = $attr->{'src'};
},
"attr"
],
report_tags => qw(img),
);
$parser->parse($page);
$parser->eof();
# Print the comic picture in the HTML page
# Check to see if the URI of the picture is relative or not
my $uri = URI->new( $cartoons{$site}->{'img'} );
my $src = $uri->scheme() ?
$cartoons{$site}->{'img'} :
$uri->abs( $cartoons{$site}->{'url'});
print a({-name =>$site}),
h2( a( { -href => $cartoons{$site}->{'url'} }, $site ) ),
img( { -src => $src, -alt => "$site comic" } );
}
print end_html();
<kbd>--
my $
OeufMayo = new PerlMonger::Paris({http => '
paris.mongueurs.net'});</kbd>