You get the complete message, but unparsed. You most likely want to parse that message,
A MIME mail message consists of multiple parts.
To store the interesting parts of an email, I use the following code:
...
body => $conn->get_bodystructure($uid),
envelope => $conn->get_envelope($uid),
You get at the part by using $client->bodypart_string( ... );.
| [reply] [d/l] [select] |
use Mail::IMAPClient;
use IO::Socket::SSL;
use Mail::IMAPClient::BodyStructure;
$sock = IO::Socket::SSL->new(
PeerAddr => "imap.gmail.com",
PeerPort => 993) or die $@;
$client = Mail::IMAPClient->new(
Socket => $sock,
User => 'abc@gmail.com',
Password => '*****');
$client->select("INBOX");
@messages = $client->messages;
$msg = pop @messages;
$obj = $client->get_bodystructure($msg);
$tekst = $client->bodypart_string($obj,0);
print $tekst;
$client->logout();
Any suggestions? | [reply] [d/l] |
I'm not really clear on where your problems are with incorporating the methods I posted and what part(s) don't work for you and how they fail to work for you.
If you tell me what you expect your code to do, what it does, and how it is problematic for you to incorporate the methods I showed you (and/or other parts of the Mail::IMAPClient documentation), maybe I can help you better.
For the time being, I paste some code from a larger application that extracts the mail body from an arbitrary mail using Mail::IMAPClient:
sub new {
my $class = shift;
bless {
text_formatter => Text::Markdown->new(),
@_,
} => $class;
};
sub from_imap_client {
my ($package, $conn, $uid, %info) = @_;
#weaken $conn;
my $dt = $conn->date( $uid );
my $timestamp;
if( $dt ) {
$dt =~ s!\s*\(([A-Z]+.*?)\)\s*$!!; # strip clear name of timez
+one at end
$dt =~ s!\s*$!!; # Strip whitespace at end
$dt =~ s!\bUTC$!GMT!; # "UTC" -> "GMT", ah, well...
# Let's be lenient about the time format:
my $format = '%d %b %Y %H:%M:%S';
if( $dt =~ /,/ ) {
# As good citizens, they added the weekday name:
$format = "%a, $format";
};
if( $dt =~ /[A-Z]+$/ ) {
# Instead of an offset, we have the name :-/
$format = $format . " %Z";
} else {
# The default, for well-formed servers, an TZ offset
$format .= " %z";
};
if( ! eval { $timestamp = Time::Piece->strptime( $dt, $format
+); 1 }) {
die "$@:\n$format - [$dt]\n";
};
} else {
$dt ||= '';
warn "No timestamp for $uid?! [$dt]";
};
my $self = $package->new(
%info,
uid => $uid,
date => $timestamp,
imap => $conn,
body => $conn->get_bodystructure($uid),
envelope => $conn->get_envelope($uid),
categories => [],
);
$self
};
sub imap() {
$_[0]->{imap}
};
=head2 C<< ->body( $pref ) >>
Finds the preferred body type or one of HTML, PLAIN
(in that order)
=cut
# XXX Check for inline images
# XXX Check for other linked media and inline it
sub best_alternative {
my( $self, $body ) = @_;
my $part;
($part)= grep { warn $_ . " (". $_->bodysubtype . ")";
'HTML' eq uc $_->bodysubtype
}
grep { 'HEAD' ne $_->bodytype }
$body->bodystructure;
if(! $part) {
($part) = grep { warn $_ . " (". $_->bodysubtype . ")";
'PLAIN' eq $_->bodysubtype
}
grep { 'HEAD' ne $_->bodytype }
$body->bodystructure;
};
return $part
}
sub body {
my $self = shift;
# We should query:
# If multipart/alternative:
# Choose text/html
# Choose text/plain
# else
# use whatever we got
# And then look for attached images
#warn Dumper [ $self->{body}->parts ];
#warn Dumper $self->{body}->bodystructure;
#warn $self->{body}->bodytype . "/" . $self->{body}->bodysubtype;
#for ($self->{body}->parts) {
# warn "Part: $_";
# warn Dumper $self->{imap}->bodypart_string( $self->{uid}, $_
+);
#};
# XXX Recursively enumerate the body parts
my %elements= (
text => [],
images => [],
);
my $body = $self->{body};
my @toplevel = grep { $_->id =~ /^\d+$/ } $body->bodystructure;
if( ! @toplevel ) {
@toplevel = $body;
};
for my $part (@toplevel) {
# This walks the whole mail structure
#warn sprintf "Part %s", $part->id;
my $mime= sprintf "%s/%s", lc $part->bodytype, lc $part->bodys
+ubtype;
#warn $mime;
if( 'multipart/alternative' eq $mime ) {
# -> subroutine: get_best_alternative()
warn "Have multipart bodytype, finding best alternative";
# find suitable part
# Assume it's about text anyway
# Cascade from HTML -> plaintext
$part = $self->best_alternative( $part );
push @{ $elements{ text } }, {
content => $self->imap()->bodypart_string( $self->
+{uid}, $part->id ),
mime => $mime,
part => $part
};
#warn Dumper $elements{ text }->[-1];
} elsif( 'multipart/mixed' eq $mime ) {
# Find the different types (image, text, sound?) and use
# one from each, resp concatenate
# -> subroutine: get_parts()
} else {
#warn sprintf "Have only one body type (%s)", $part->bodyt
+ype;
# take what we got
if( 'image' eq $part->bodytype ) {
my $ref = $part->bodyparms;
next if( ! ref $part->bodyparms );
my $name = $part->bodyparms->{name};
# Download the image so we can later serve it up again
+ under its name
#push @{ $elements{ images } }, $part->textlines;
$body = sprintf '<img src="/%s/%s">',
$self->permabase,
$name;
#warn "Image detected, serving up as '$body'";
@{ $elements{ text } } = { content => $body, mime => "
+text/html", part => undef };
} elsif( 'text/plain' eq $mime ) {
push @{ $elements{ text } }, {
content => $self->imap()->bodypart_string( $self->
+{uid}, $part->id ),
mime => $mime,
part => $part
};
# we should handle text/html more gracefully!
} else {
#my $body= $part->textlines;
#warn "Unknown / unhandled part $mime, hope it's text.
+";
#warn Dumper $part;
push @{ $elements{ text } }, {
content => $self->imap()->bodypart_string( $self->
+{uid}, $part->id ),
mime => $mime,
part => $part
};
};
};
};
if( ! @{$elements{ text }}) {
return 'No mail body';
};
$body= $elements{ text }->[0]->{content};
my $type= $elements{ text }->[0]->{mime};
my $part= $elements{ text }->[0]->{part};
# Decode the transport encoding
if( $part and 'base64' eq $part->bodyenc ) {
$body = decode_base64( $body );
} elsif( $part and 'quoted-printable' eq $part->bodyenc ) {
$body = decode_qp( $body );
};
# Decode to appropriate charset
if( $part and ref $part and my $enc = $part->bodyparms->{charset})
+ {
$body = decode( $enc, $body );
};
# Find out whether we have HTML or plaintext
if( 'text/plain' eq $type ) {
$part= $body;
# XXX Maybe this should go into a separate sub/module
# Strip mail footer
$body=~ s!\n--\s*\n.*!!sm;
# Strip obvious reply:
$body=~ s!----- Reply message -----.*!!sm;
# Strip quoted part in case we have a reply
# A reply counts as
# a line ending with ":" as the "$foo wrote on $date:"
# followed by more than three (consecutive!) quoted lines
$body=~ s!^[A-Z].*:(?:\s*^>.*$){3,}!!mg;
#or warn "No quoted part found";
# XXX Fix the hardcoded ->markdown method
# XXX Maybe this should go into a separate sub/module
$body= $self->{text_formatter}->markdown( $body );
} elsif( 'text/html' eq $type ) {
# XXX Strip JS and other stuff, resp. only allow good stuff
# Most likely Clinton's HTML::StripScripts is the module t
+o use
# XXX Maybe this should go into a separate sub/module
# XXX Also, we should use an HTML parser
$body=~ s!<br>\s*--\s*<br>.*!!ms; # Strip mail footer
};
$body
};
| [reply] [d/l] |
#!/usr/bin/perl
use strict;
use Mail::IMAPClient;
my $client = Mail::IMAPClient->new(
Server => 'imap.gmail.com',
User => 'abc@gmail.com',
Password => '**********',
Ssl => 1,
Uid => 1,
);
$client->select("INBOX");
my @messages = $client->messages;
my $msgid = pop @messages;
my $text = $client->bodypart_string($msgid,1)
or die "Could not get bodypart string: ", $client->LastError;
print $text;
$client->logout
or die "Logout error: ", $client->LastError, "\n";
poj | [reply] [d/l] [select] |