#!/usr/bin/perl -w use strict; use LWP::Simple qw(get); use HTML::Parser; use URI; use CGI qw(:html); my $file = 'daily_comic.html'; my $wday = (localtime)[6]; my %cartoons = ( 'The New Bobbins Show' => { url => 'http://www.bobbins.org',# URL of the page with the picture src => qr/\d{8}\.png$/, # Regex to find the image on the page days => [ 0, 1, 2, 3, 4, 5, ], # When is the comic published }, 'Diesel Sweeties' => { url => 'http://www.dieselsweeties.com', src => qr/sw\d+.png$/, days => [ 0, 1, 2, 3, 4, 5, ], }, 'RPG World' => { url => 'http://www.rpgworldcomic.com', src => qr/\d{8}\w?\.jpg$/, days => [ 0, 3, 5, ], }, 'Gene Catlow' => { url => 'http://www.genecatlow.com', src => qr/\d{8}\.gif$/, days => [ 0, 1, 3, 5, ], }, 'User Friendly' => { url => 'http://www.userfriendly.org/static', src => qr/uf\d+\.gif$/, days => [ 0, 1, 2, 3, 4, 5, 6 ], }, 'Goats' => { url => 'http://www.goats.com', src => qr/goats\d+.gif$/, days => [ 0, 1, 3, 5, ], }, 'Penny Arcade' => { url => 'http://www.penny-arcade.com/view.php3', src => qr/\d{8}\w?.gif$/, days => [ 0, 1, 3, 5, ], }, 'Angst Technologies' => { url => 'http://www.inktank.com/AT/index.cfm', src => qr/\d\d-\d\d-\d\d.gif$/, days => [ 0, 1, 2, 3, 4, 5, ], }, 'Indy Rock Pete' => { url => 'http://www.indierockpete.com/', src => qr/p\d+f\.gif$/, days => [ 0, 1, 2, 3, 4, 5, ], }, 'Sinfest' => { url => 'http://www.sinfest.net/', src => qr/sf\d{8}.gif$/, days => [ 0, 1, 2, 3, 4, 5, 6 ], }, 'Dilbert' => { url => 'http://www.dilbert.com/', src => qr/dilbert\d+.gif$/, days => [ 0, 1, 2, 3, 4, 5, 6 ], }, ); # Write the HTML page open( COMIC, "> $file" ) or die "Cannot create the file '$file': $!\n"; select COMIC; print start_html( -head => meta( { -http_equiv => 'Content-Type', -content => 'text/html; charset=utf-8' } ), -title => "Briac's Daily Cartoon Delivery", ), h1("Briac's Daily Cartoon Delivery"), ul( li( [ map { a({-href=>"#$_"},$_) } sort keys %cartoons ] ) ); # Grab the different pictures foreach my $site ( sort keys %cartoons ) { # Get only the comics if it is published this day. next unless grep { $_ == $wday } @{ $cartoons{$site}->{'days'} }; # Fetch the page with LWP::Simple my $page = get($cartoons{$site}->{'url'}) or warn "Could not get '$site'\n"; # HTML Parser fetching the image matching the pattern defined # in the %cartoons hash my $parser = HTML::Parser->new( start_h => [ sub { my $attr = shift; return if $attr->{'src'} !~ $cartoons{$site}->{'src'}; $cartoons{$site}->{'img'} = $attr->{'src'}; }, "attr" ], report_tags => qw(img), ); $parser->parse($page); $parser->eof(); # Print the comic picture in the HTML page # Check to see if the URI of the picture is relative or not my $uri = URI->new( $cartoons{$site}->{'img'} ); my $src = $uri->scheme() ? $cartoons{$site}->{'img'} : $uri->abs( $cartoons{$site}->{'url'}); print a({-name =>$site}), h2( a( { -href => $cartoons{$site}->{'url'} }, $site ) ), img( { -src => $src, -alt => "$site comic" } ); } print end_html();