#!/usr/bin/perl -w use Time::Local; use strict; my $mbox = shift || "/home/lukas/mbox"; my $outputdir = shift || "/home/lukas/tmp/test"; my %monthmap = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6, Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 ); my %mails; &read_mbox(); &print_html(); &generate_index(); #### # Generate a HTML file for each email. sub print_html { my $count = 1; for (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys %mails) { # warn "WARNING: overwriting $outputdir/mail$count.html\n" #if (-f "$outputdir/mail$count.html"); open HTML, "> $outputdir/mail$count.html" or die "Couldn't open $outputdir/mail$count.html: $!\n"; my $html = qq( $mails{$_}->{subject}

$mails{$_}->{subject}

[ thread | date | subject | author ]


); my $date = scalar localtime($mails{$_}->{date}); my $from = &html_escape($mails{$_}->{from}); my $body = &html_escape($mails{$_}->{body}); $html .= qq( From: $from
Subject: $mails{$_}->{subject}
Date: $date

$body

[ thread | date | subject | author ]

); print HTML $html; close HTML; $mails{$_}->{html} = "mail$count.html"; $count++; } } #### # Generate the index files. sub generate_index { my $countmsg = scalar keys %mails; # Sorted by thread. open INDEX, "> $outputdir/index.html" or die "Couldn't open $outputdir/index.html: $!\n"; my $html = qq( Mailbox

Mailbox

$countmsg messages.
Ordered by thread.
Order by [ date | subject | author ].



); print INDEX $html; close INDEX; # Sorted by date. open DATE, "> $outputdir/date.html" or die "Couldn't open $outputdir/date.html: $!\n"; $html = qq( Mailbox

Mailbox

$countmsg messages.
Ordered by date.
Order by [ thread | subject | author ].



); print DATE $html; close DATE; # Sorted by subject. open SUBJECT, "> $outputdir/subject.html" or die "Couldn't open $outputdir/subject.html: $!\n"; $html = qq( Mailbox

Mailbox

$countmsg messages.
Ordered by subject.
Order by [ thread | date | author ].



); print SUBJECT $html; close SUBJECT; # Sorted by author open AUTHOR, "> $outputdir/author.html" or die "Couldn't open $outputdir/author.html: $!\n"; $html = qq( Mailbox

Mailbox

$countmsg messages.
Ordered by author.
Order by [ thread | date | subject ].



); } #### # Recursive subroutine the check for replies. sub check_replies { my $id = shift; my $html; $html = "\n"; return $html eq "\n" ? '' : $html; } #### # Beautify the output, create links of appropriate tags. sub html_escape { my $thing = shift; $thing =~ s//>/g; $thing =~ s/"/"/g; $thing =~ s/\n/
/g; $thing =~ s!\b([-\w+.]+\@[-\w+.]+)\b!$1!g; $thing =~ s!\b(https?://[-\w?&/.+]+)\b!!g; return $thing; } #### # Strip the email address sub strip_email { my $original = shift; my ($email) = $original =~ m/\b<[-\w.]+\@[-\w.]+>\b/; $original =~ s/<.*>//; return $original ? $original : $email; } #### # Read in the mailbox file and generate the data structure. sub read_mbox { # This will be our message container. my $current; # This indicates, that the last line was blank, initially set to true, # so we can parse the first mail correctly. my $blank = 1; open MBOX, $mbox or die "Couldn't open mailbox $mbox: $!\n"; while () { # There was a blank line before, and this line looks like the beginning # of a new mail, so we need to take some action. if ($blank && /^From .*\d{4}$/) { # Save the message that we've parsed before (if there was one). $mails{$current->{message_id}} = $current if scalar keys %{$current}; # Create a new container for this message. $current = {}; # Set the blank line to zero. $blank = 0; # We're still in the header part, so we save some. } elsif (!$blank && /^From: (.*)/i) { my $from = $1; $from =~ s/"//g; $current->{from} = $from; } elsif (!$blank && /^Subject: (.*)/i) { $current->{subject} = $1; my $clean_subject = $1; $clean_subject =~ s/Re: (.*)/$1/i; $current->{clean_subject} = $clean_subject; } elsif (!$blank && /^Message-Id: (.*)/i) { $current->{message_id} = $1; } elsif (!$blank && /^Date: (.*)/) { $current->{date} = parsedate($1); warn "Could parse date: $!\n" unless $current->{date}; } elsif (!$blank && /^(?:References|In-Reply-To): (<.+>)/i) { $current->{refs} = @{ [ split(/ /, $1) ] }[-1]; # There was a blank line before, but it wasn't catched by the if- # statement above, so it must be the message body. } elsif ($blank) { $current->{body} .= $_; } # Aha, we have a blank line. This could've been the end of the header. $blank = 1 if /^$/; } close MBOX; } sub parsedate { my $date = shift; # print $date, "\n"; my ($wday, $mday, $mon, $year, $time, $hrs, $min, $sec); if ($date =~ /^\d\d?\s/) { ($mday, $mon, $year, $time) = split(/ /, $date); } elsif ($date =~ /^\w{3},\s/) { ($wday, $mday, $mon, $year, $time) = split(/,?\s+/, $date); } ($hrs, $min, $sec) = split(/:/, $time); $mon = $monthmap{$mon}; $mon--; $year -= 1900; return timelocal($sec, $min, $hrs, $mday, $mon, $year); }