sub gimme_guten_tables { my ($decoded, $maximum) = @_; $decoded =~ s,
  • \n(.*?)\n
  • ,$1,g; $decoded =~ s,(.*?)
    .*?,$1,g; $decoded =~ s,,$1,g; $decoded =~ s,
  • (.*?)
  • ,$1,g; $decoded =~ s,<\/?ol>,,g; $decoded =~ s,\n.*,,; $decoded =~ s,^\n(.*?)(?: \((\d+)\))?<\/a>/) { my $splitguten = join('/', split(/ */, $1)); my $clipguten = substr($splitguten, -2, 2, ''); my $readmarks = $3 ? $3 : $1; my $title = $2; $title =~ s,by (.*?), by $1,g; my %gutentypes = ( plucker => { 'mirror' => "http://www.gutenberg.org/cache/plucker/$1/$1", 'content-type' => 'application/prs.plucker', 'string' => 'Plucker', 'format' => 'pdb' }, html => { 'mirror' => "http://www.gutenberg.org/dirs/$splitguten/$1/$1-h/$1-h.htm", 'content-type' => 'text/html', 'string' => 'Marked-up HTML', 'format' => 'html' }, text => { 'mirror' => "http://sailor.gutenberg.lib.md.us/$splitguten/$1/$1.txt", 'content-type' => 'text/plain', 'string' => 'Plain text', 'format' => 'txt' }, ); for my $types ( sort keys %gutentypes ) { my ($status, $type) = test_head($gutentypes{$types}{mirror}); if ($status == 200) { $gutentypes{$types}{link} = qq{$gutentypes{$types}{format}\n}; } else { $gutentypes{$types}{link} = qq{$gutentypes{$types}{format}}; } } $guten_tables .= qq{ $count $readmarks $title $gutentypes{plucker}{link} $gutentypes{html}{link} $gutentypes{text}{link} \n}; $count++; } } $guten_tables =~ s,\&,\&,g; $guten_tables =~ s,>\n\s+<,><,g; return $guten_tables; } sub test_head { my $url = shift; my $ua = LWP::UserAgent->new(); $ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1;) Firefox/2.0.0.6'); my $request = HTTP::Request->new(HEAD => $url); my $response = $ua->request($request); my $status = $response->status_line; my $type = $response->header('Content-Type'); my $content = $response->content; $status =~ m/(\d+)/; return ($1, $type); }