#!/usr/bin/perl -w
use strict;
use LWP::Simple qw(get);
use HTML::Parser;
use URI;
use CGI qw(:html);

my $file = 'daily_comic.html';

my $wday = (localtime)[6];

my %cartoons = (
    'The New Bobbins Show' => {
        url  => 'http://www.bobbins.org',# URL of the page with the picture
        src  => qr/\d{8}\.png$/,         # Regex to find the image on the page
        days => [ 0, 1, 2, 3, 4, 5, ],   # When is the comic published
    },
	'Diesel Sweeties' => {
		url  => 'http://www.dieselsweeties.com',
        src  => qr/sw\d+.png$/,
        days => [ 0, 1, 2, 3, 4, 5, ],
    },
	'RPG World' => {
		url  => 'http://www.rpgworldcomic.com',
        src  => qr/\d{8}\w?\.jpg$/,
        days => [ 0, 3, 5, ],
    },
	'Gene Catlow' => {
		url  => 'http://www.genecatlow.com',
        src  => qr/\d{8}\.gif$/,
        days => [ 0, 1, 3, 5, ],
    },
	'User Friendly' => {
		url  => 'http://www.userfriendly.org/static',

        src  => qr/uf\d+\.gif$/,
        days => [ 0, 1, 2, 3, 4, 5, 6 ],
    },
	'Goats' => {
		url  => 'http://www.goats.com',
        src  => qr/goats\d+.gif$/,
        days => [ 0, 1, 3, 5, ],
    },
	'Penny Arcade' => {
	    url  => 'http://www.penny-arcade.com/view.php3',
        src  => qr/\d{8}\w?.gif$/,
        days => [ 0, 1, 3, 5, ],
    },
	'Angst Technologies' => {
		url  =>	'http://www.inktank.com/AT/index.cfm',
		src  => qr/\d\d-\d\d-\d\d.gif$/,
		days => [ 0, 1, 2, 3, 4, 5, ],
	},
     'Indy Rock Pete' => {
        url => 'http://www.indierockpete.com/',
        src  => qr/p\d+f\.gif$/,
        days => [ 0, 1, 2, 3, 4, 5, ],
    },
    'Sinfest' => {
        url => 'http://www.sinfest.net/',
        src  => qr/sf\d{8}.gif$/,
        days => [ 0, 1, 2, 3, 4, 5, 6 ],
    },
    'Dilbert' => {
        url => 'http://www.dilbert.com/',
        src  => qr/dilbert\d+.gif$/,
        days => [ 0, 1, 2, 3, 4, 5, 6 ],
    },

);

# Write the HTML page

open( COMIC, "> $file" ) or die "Cannot create the file '$file': $!\n";
select COMIC;

print start_html(
    -head => meta(
        {
            -http_equiv => 'Content-Type',
            -content    => 'text/html; charset=utf-8'
        }
    ),
    -title => "Briac's Daily Cartoon Delivery",
  ),
  h1("Briac's Daily Cartoon Delivery"),
  ul(
  	li( [ map { a({-href=>"#$_"},$_) } sort keys %cartoons ] )
  );

# Grab the different pictures
foreach my $site ( sort keys %cartoons ) {

    # Get only the comics if it is published this day.
    next unless grep { $_ == $wday } @{ $cartoons{$site}->{'days'} };

    # Fetch the page with LWP::Simple
    my $page = get($cartoons{$site}->{'url'})
               or warn "Could not get '$site'\n";

    # HTML Parser fetching the image matching the pattern defined
    # in the %cartoons hash
    my $parser = HTML::Parser->new(
        start_h => [
            sub {
                my $attr = shift;
                return if $attr->{'src'} !~ $cartoons{$site}->{'src'};
                $cartoons{$site}->{'img'} = $attr->{'src'};
            },
            "attr"
        ],
        report_tags => qw(img),
    );
    $parser->parse($page);
    $parser->eof();

    # Print the comic picture in the HTML page
    # Check to see if the URI of the picture is relative or not
    my $uri = URI->new( $cartoons{$site}->{'img'} );
    my $src = $uri->scheme() ? 
         $cartoons{$site}->{'img'} :
         $uri->abs( $cartoons{$site}->{'url'});

    print a({-name =>$site}),
		h2( a( { -href => $cartoons{$site}->{'url'} }, $site ) ),
		img( { -src => $src, -alt => "$site comic" } );
}

print end_html();