"be consistent"

Re: Parsing mail(mail::message)

by GrandFather (Sage)
on Dec 05, 2012 at 23:20 UTC ( #1007425=note: print w/replies, xml ) Need Help??

in reply to Parsing mail(mail::message)

Outlook (and most email clients) is sending your HTML/RTF email body along with a plain text rendition of the email. I'm surprised actually that gmail isn't - you must have it configured to send plain text only.

I use the following code in a system that parses commands sent by email to an automated build and test system:

sub ParseEmail { my ($emailStr) = @_; my $parser = new MIME::Parser; my %fields; $parser->tmp_to_core(1); $parser->output_to_core(1); my $entity = $parser->parse_data($emailStr); my @parts = $entity->parts(); my $head = $entity->head(); $fields{subject} = $head->get('subject') // ''; $fields{subject} =~ s/^\s*(re:\s*)+//i; $fields{from} = $head->get('from') // ''; $fields{from} =~ s/^"([^"]+)"/$1/; $fields{ccList} = $head->get('Cc') // ''; $fields{to} = $head->get('To') // ''; $fields{date} = $head->get('Date') // ''; if (!@parts) { $fields{body} = $entity->bodyhandle()->as_string(); } else { $fields{body} = _parseParts(@parts); } return %fields; } sub _parseParts { my $savedText = ''; for my $part (@_) { my $type = $part->effective_type(); if (-1 < index $type, 'multipart') { my @subParts = $part->parts(); $savedText = _parseParts(@subParts); } elsif ($type eq 'text/plain') { return $part->stringify_body(); } elsif ($type eq 'text/html') { my $str = $part->stringify_body(); my $tree = HTML::TreeBuilder->new_from_content($str); $savedText = $tree->as_text(); } } return $savedText; }

Note that the heavy lifting is done by MIME::Parser and HTML::TreeBuilder. _parseParts returns the first plain text part or the text of the first HTML part it finds.

True laziness is hard work

node history
Node Type: note [id://1007425]
and all is quiet...

Others examining the Monastery: (3)
As of 2018-06-20 02:52 GMT
