<?xml version="1.0" encoding="windows-1252"?>
<node id="1007425" title="Re: Parsing mail(mail::message)" created="2012-12-05 18:20:51" updated="2012-12-05 18:20:51">
<type id="11">
note</type>
<author id="461912">
GrandFather</author>
<data>
<field name="doctext">
&lt;p&gt;Outlook (and most email clients) is sending your HTML/RTF email body along with a plain text rendition of the email. I'm surprised actually that gmail isn't - you must have it configured to send plain text only.&lt;/p&gt;
&lt;p&gt;I use the following code in a system that parses commands sent by email to an automated build and test system:&lt;/p&gt;
&lt;c&gt;
sub ParseEmail {
    my ($emailStr) = @_;
    my $parser = new MIME::Parser;
    my %fields;

    $parser-&gt;tmp_to_core(1);
    $parser-&gt;output_to_core(1);

    my $entity = $parser-&gt;parse_data($emailStr);
    my @parts  = $entity-&gt;parts();
    my $head   = $entity-&gt;head();

    $fields{subject} = $head-&gt;get('subject') // '';
    $fields{subject} =~ s/^\s*(re:\s*)+//i;
    $fields{from} = $head-&gt;get('from') // '';
    $fields{from} =~ s/^"([^"]+)"/$1/;
    $fields{ccList} = $head-&gt;get('Cc')   // '';
    $fields{to}     = $head-&gt;get('To')   // '';
    $fields{date}   = $head-&gt;get('Date') // '';

    if (!@parts) {
        $fields{body} = $entity-&gt;bodyhandle()-&gt;as_string();
    } else {
        $fields{body} = _parseParts(@parts);
    }

    return %fields;
}


sub _parseParts {
    my $savedText = '';

    for my $part (@_) {
        my $type = $part-&gt;effective_type();

        if (-1 &lt; index $type, 'multipart') {
            my @subParts = $part-&gt;parts();
            $savedText = _parseParts(@subParts);

        } elsif ($type eq 'text/plain') {
            return $part-&gt;stringify_body();

        } elsif ($type eq 'text/html') {
            my $str  = $part-&gt;stringify_body();
            my $tree = HTML::TreeBuilder-&gt;new_from_content($str);

            $savedText = $tree-&gt;as_text();
        }
    }

    return $savedText;
}
&lt;/c&gt;
&lt;p&gt;Note that the heavy lifting is done by [mod://MIME::Parser] and [mod://HTML::TreeBuilder]. _parseParts returns the first plain text part or the text of the first HTML part it finds.&lt;/p&gt;
&lt;div class="pmsig"&gt;&lt;div class="pmsig-461912"&gt;
True laziness is hard work
&lt;/div&gt;&lt;/div&gt;</field>
<field name="root_node">
1007419</field>
<field name="parent_node">
1007419</field>
</data>
</node>
