in reply to
Parsing mail(mail::message)
Outlook (and most email clients) is sending your HTML/RTF email body along with a plain text rendition of the email. I'm surprised actually that gmail isn't - you must have it configured to send plain text only.
I use the following code in a system that parses commands sent by email to an automated build and test system:
sub ParseEmail {
my ($emailStr) = @_;
my $parser = new MIME::Parser;
my %fields;
$parser->tmp_to_core(1);
$parser->output_to_core(1);
my $entity = $parser->parse_data($emailStr);
my @parts = $entity->parts();
my $head = $entity->head();
$fields{subject} = $head->get('subject') // '';
$fields{subject} =~ s/^\s*(re:\s*)+//i;
$fields{from} = $head->get('from') // '';
$fields{from} =~ s/^"([^"]+)"/$1/;
$fields{ccList} = $head->get('Cc') // '';
$fields{to} = $head->get('To') // '';
$fields{date} = $head->get('Date') // '';
if (!@parts) {
$fields{body} = $entity->bodyhandle()->as_string();
} else {
$fields{body} = _parseParts(@parts);
}
return %fields;
}
sub _parseParts {
my $savedText = '';
for my $part (@_) {
my $type = $part->effective_type();
if (-1 < index $type, 'multipart') {
my @subParts = $part->parts();
$savedText = _parseParts(@subParts);
} elsif ($type eq 'text/plain') {
return $part->stringify_body();
} elsif ($type eq 'text/html') {
my $str = $part->stringify_body();
my $tree = HTML::TreeBuilder->new_from_content($str);
$savedText = $tree->as_text();
}
}
return $savedText;
}
Note that the heavy lifting is done by MIME::Parser and HTML::TreeBuilder. _parseParts returns the first plain text part or the text of the first HTML part it finds.
True laziness is hard work