#!/usr/bin/perl -w
use Time::Local;
use strict;
my $mbox = shift || "/home/lukas/mbox";
my $outputdir = shift || "/home/lukas/tmp/test";
my %monthmap = (
Jan => 1,
Feb => 2,
Mar => 3,
Apr => 4,
May => 5,
Jun => 6,
Jul => 7,
Aug => 8,
Sep => 9,
Oct => 10,
Nov => 11,
Dec => 12
);
my %mails;
&read_mbox();
&print_html();
&generate_index();
####
# Generate a HTML file for each email.
sub print_html {
my $count = 1;
for (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys %mails) {
# warn "WARNING: overwriting $outputdir/mail$count.html\n"
#if (-f "$outputdir/mail$count.html");
open HTML, "> $outputdir/mail$count.html"
or die "Couldn't open $outputdir/mail$count.html: $!\n";
my $html = qq(
$mails{$_}->{subject}
$mails{$_}->{subject}
[ thread | date | subject | author ]
);
my $date = scalar localtime($mails{$_}->{date});
my $from = &html_escape($mails{$_}->{from});
my $body = &html_escape($mails{$_}->{body});
$html .= qq(
From: $from
Subject: $mails{$_}->{subject}
Date: $date
$body
[ thread | date | subject | author ]
);
print HTML $html;
close HTML;
$mails{$_}->{html} = "mail$count.html";
$count++;
}
}
####
# Generate the index files.
sub generate_index {
my $countmsg = scalar keys %mails;
# Sorted by thread.
open INDEX, "> $outputdir/index.html" or die "Couldn't open $outputdir/index.html: $!\n";
my $html = qq(
Mailbox
Mailbox
$countmsg messages.
Ordered by thread.
Order by [ date | subject | author ].
);
for (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys %mails) {
next if $mails{$_}->{seen_thread};
my $date = scalar localtime($mails{$_}->{date});
my $from = &strip_email($mails{$_}->{from});
my $anchor = $mails{$_}->{'html'};
$anchor =~ s/mail(\d+)\.html/$1/;
$html .= qq|
- $mails{$_}->{'subject'} $from ($date)
|;
$mails{$_}->{seen_thread}++;
$html .= &check_replies($_);
}
$html .= qq(
);
print INDEX $html;
close INDEX;
# Sorted by date.
open DATE, "> $outputdir/date.html"
or die "Couldn't open $outputdir/date.html: $!\n";
$html = qq(
Mailbox
Mailbox
$countmsg messages.
Ordered by date.
Order by [ thread | subject | author ].
);
for (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys %mails) {
my $date = scalar localtime($mails{$_}->{date});
my $from = &strip_email($mails{$_}->{from});
my $anchor = $mails{$_}->{'html'};
$anchor =~ s/mail(\d+)\.html/$1/;
$html .= qq|
- $mails{$_}->{'subject'} $from ($date)
|;
}
$html .= qq(
);
print DATE $html;
close DATE;
# Sorted by subject.
open SUBJECT, "> $outputdir/subject.html"
or die "Couldn't open $outputdir/subject.html: $!\n";
$html = qq(
Mailbox
Mailbox
$countmsg messages.
Ordered by subject.
Order by [ thread | date | author ].
);
foreach my $mail (sort { lc $mails{$a}->{clean_subject} cmp lc $mails{$b}->{clean_subject} } keys %mails) {
next if $mails{$mail}->{seen_subject};
$html .= qq(
- $mails{$mail}->{clean_subject}
);
foreach (keys %mails) {
if ($mails{$_}->{clean_subject} eq $mails{$mail}->{clean_subject}) {
my $date = scalar localtime($mails{$_}->{date});
my $from = &strip_email($mails{$_}->{from});
my $anchor = $mails{$_}->{'html'};
$anchor =~ s/mail(\d+)\.html/$1/;
$html .= qq|
- $from ($date)
|;
$mails{$_}->{seen_subject}++;
}
}
$html .= "
\n";
}
$html .= qq(
);
print SUBJECT $html;
close SUBJECT;
# Sorted by author
open AUTHOR, "> $outputdir/author.html"
or die "Couldn't open $outputdir/author.html: $!\n";
$html = qq(
Mailbox
Mailbox
$countmsg messages.
Ordered by author.
Order by [ thread | date | subject ].
);
foreach my $mail (sort { lc $mails{$a}->{from} cmp lc $mails{$b}->{from} } keys %mails) {
next if $mails{$mail}->{seen_author};
my $from = &html_escape($mails{$mail}->{from});
$html .= qq(
- $from
);
foreach (keys %mails) {
if ($mails{$mail}->{from} eq $mails{$_}->{from}) {
my $date = scalar localtime($mails{$_}->{date});
my $anchor = $mails{$_}->{'html'};
$anchor =~ s/mail(\d+)\.html/$1/;
$html .= qq|
- $mails{$_}->{subject} ($date)
|;
$mails{$_}->{seen_author}++;
}
}
$html .= "
\n";
}
print AUTHOR $html;
close AUTHOR;
$html .= qq(
);
}
####
# Recursive subroutine the check for replies.
sub check_replies {
my $id = shift;
my $html;
$html = "\n";
foreach (sort { $mails{$a}->{date} <=> $mails{$b}->{date} } keys %mails) {
next if $_ eq $id;
next unless $mails{$_}->{refs};
next if $mails{$_}->{seen_thread};
if ($mails{$_}->{refs} eq $id) {
my $date = scalar localtime($mails{$_}->{date});
my $from = &strip_email($mails{$_}->{from});
my $anchor = $mails{$_}->{'html'};
$anchor =~ s/mail(\d+)\.html/$1/;
$html .= qq|
- $mails{$_}->{subject} $from ($date)
|;
$mails{$_}->{seen_thread}++;
$html .= &check_replies($_);
}
}
$html .= "
\n";
return $html eq "\n" ? '' : $html;
}
####
# Beautify the output, create links of appropriate tags.
sub html_escape {
my $thing = shift;
$thing =~ s/</g;
$thing =~ s/>/>/g;
$thing =~ s/"/"/g;
$thing =~ s/\n/
/g;
$thing =~ s!\b([-\w+.]+\@[-\w+.]+)\b!$1!g;
$thing =~ s!\b(https?://[-\w?&/.+]+)\b!!g;
return $thing;
}
####
# Strip the email address
sub strip_email {
my $original = shift;
my ($email) = $original =~ m/\b<[-\w.]+\@[-\w.]+>\b/;
$original =~ s/<.*>//;
return $original ? $original : $email;
}
####
# Read in the mailbox file and generate the data structure.
sub read_mbox {
# This will be our message container.
my $current;
# This indicates, that the last line was blank, initially set to true,
# so we can parse the first mail correctly.
my $blank = 1;
open MBOX, $mbox or die "Couldn't open mailbox $mbox: $!\n";
while () {
# There was a blank line before, and this line looks like the beginning
# of a new mail, so we need to take some action.
if ($blank && /^From .*\d{4}$/) {
# Save the message that we've parsed before (if there was one).
$mails{$current->{message_id}} = $current
if scalar keys %{$current};
# Create a new container for this message.
$current = {};
# Set the blank line to zero.
$blank = 0;
# We're still in the header part, so we save some.
} elsif (!$blank && /^From: (.*)/i) {
my $from = $1;
$from =~ s/"//g;
$current->{from} = $from;
} elsif (!$blank && /^Subject: (.*)/i) {
$current->{subject} = $1;
my $clean_subject = $1;
$clean_subject =~ s/Re: (.*)/$1/i;
$current->{clean_subject} = $clean_subject;
} elsif (!$blank && /^Message-Id: (.*)/i) {
$current->{message_id} = $1;
} elsif (!$blank && /^Date: (.*)/) {
$current->{date} = parsedate($1);
warn "Could parse date: $!\n" unless $current->{date};
} elsif (!$blank && /^(?:References|In-Reply-To): (<.+>)/i) {
$current->{refs} = @{ [ split(/ /, $1) ] }[-1];
# There was a blank line before, but it wasn't catched by the if-
# statement above, so it must be the message body.
} elsif ($blank) {
$current->{body} .= $_;
}
# Aha, we have a blank line. This could've been the end of the header.
$blank = 1 if /^$/;
}
close MBOX;
}
sub parsedate {
my $date = shift;
# print $date, "\n";
my ($wday, $mday, $mon, $year, $time, $hrs, $min, $sec);
if ($date =~ /^\d\d?\s/) {
($mday, $mon, $year, $time) = split(/ /, $date);
} elsif ($date =~ /^\w{3},\s/) {
($wday, $mday, $mon, $year, $time) = split(/,?\s+/, $date);
}
($hrs, $min, $sec) = split(/:/, $time);
$mon = $monthmap{$mon};
$mon--;
$year -= 1900;
return timelocal($sec, $min, $hrs, $mday, $mon, $year);
}