use strict; use warnings; use MIME::Lite; use HTML::Parser; use Text::Wrap; use File::Spec; use Getopt::Long; use Win32::OLE; BEGIN { Win32::OLE->Initialize(Win32::OLE::COINIT_OLEINITIALIZE); } # +MAPI bug use Win32::OLE::Const 'Microsoft Outlook'; use Win32::OLE::Const 'Microsoft CDO.*Library'; use Win32::OLE::Variant; $|++; my $ROOT_FOLDER = "Personal Folders"; my $TEXT_ONLY = 0; my $SAVEDIR = "C:\\mbox\\"; my $ATTACH_PATH = $SAVEDIR."attachments\\"; my $DEBUG = 0; my $DEFAULT_EMAIL= 'nobody@nowhere.com'; # common to use "-" when mis +sing my $NO_SUBJECT = 'No subject'; # common to use "-" when mis +sing my @LOGON_PARAMS = undef; # should work if not try unc +ommenting #my $SERVERNAME = "computername"; # local computer name #my $USERNAME = getlogin(); # should be able to find "us +ername" # @LOGON_PARAMS = ( {'ProfileInfo' => "$SERVERNAME\n$USERNAME" }, un +def ); my ($DUMP, $NO_FOLDERS, $NO_SPACES, $THUNDERBIRD, $HELP, $ITEMS); GetOptions( "root|r=s" => \$ROOT_FOLDER, "attach|a=s" => \$ATTACH_PATH, "savedir|s=s" => \$SAVEDIR, "dump" => \$DUMP, "nofolders|n" => \$NO_FOLDERS, "nospaces" => \$NO_SPACES, "text|t" => \$TEXT_ONLY, "thunderbird" => \$THUNDERBIRD, "debug|d=i" => \$DEBUG, "help|h|?" => \$HELP, ); if ( $HELP or $ARGV[0] and $ARGV[0] =~ m/[h\?]/ ) { print " Usage $0 --root=<str> # Root folder to extract. Defualt 'Personal Folder +s' (r) --savedir=<str> # Path to directory to save folders into. + (s) # Automagically created if it does not exist. # Defaults to $SAVEDIR --attach=<str> # Path to directory to save attachments into. + (a) # Automagically created if it does not exist. # Defaults to $ATTACH_PATH --nofolders # Flattens folder stucture, all files in root fold +er (n) # with names like root.folder.subfolder --nospaces # Remove spaces from filenames and paths --dump # Dumps entire parse to STDOUT for redirection --text # Override default MIME output. In text mode body +text (t) # and textified HTML are saved. Attachments extrac +ted # and saved in attach path. Links to files are als +o # added at the end of the email so you can find th +em. --thunderbird # RFC 1521 calls for multipart alternative email t +o be # sent with the text part before the RTF before th +e # HTML, however Thunderbird requires HTML first fo +r # it to behave as expected. Thunderbird avoids the + issue # when it imports email with text and HTML by simp +ly not # importing the text at all! --debug=N # Levels 0=off; 1=folders&count; 2=+From; 3=+heade +r (d) --help # Help but you must already know that + (h) /? # Windows style help request "; system('pause'); print qq! INSTRUCTIONS First you need to get a .pst file you can open in Outlook. Maybe it's already there, but if you have got one from Exchange you need to point Outlook at it. Make sure you have the folder view on View|Folder List. Right click Personal Folders and select Open Personal Folders File (*. +pst). Browse to the pst, select it and click OK Now run the script. Be aware this script is no speed demon and will ta +ke about 1 minute to process every 20 MB of pst (on my old laptop). Close + down your destination mail reader so it can find the new folders when it re +starts. Transfer everything in $SAVEDIR except for $ATTACH_PATH into (Win32): Evolution: C:\\Documents and Settings\\%USER%\\.evolution\\mail\\local\\ Thunderbird C:\\Documents and Settings\\%USER%\\Aplication Data\\Thunderbird \\Profiles\\<%SALT%.%USER%>\\Local Folders\\ When you open your mail agent the next time it will index all the new +mail. Note in the process of exporting all the attachments are saved to file +. These files are found in $ATTACH_PATH. PST files all have a root called "Personal Folders" that can't be chen +ged from within Outlook. To avoid name clashes the first one will be in 1. +sbd, the next in 2.sbd, etc. !; exit } print "Processing root folder: $ROOT_FOLDER\n" if $DEBUG; do { mkdir $SAVEDIR or die "Can't mkdir $SAVEDIR $!\n" } unless -d $SAVEDIR; do { mkdir $ATTACH_PATH or die "Can't mkdir $ATTACH_PATH $!\n" } unless -d $ATTACH_PATH; my $OUTLOOK = Win32::OLE->new('Outlook.Application') or die "Could not instatiate Outlook: " . Win32::OLE::LastError(); my $NS = $OUTLOOK->GetNamespace("MAPI") or die "Could not get namespace: " . Win32::OLE::LastError(); # Note that we are accessing Outlook via two similar but different OO +models # We have to use MAPI to get at the headers via the Fields method my $MAPI = Win32::OLE->new('MAPI.Session') or die "Could not instantiate MAPI Session: " . Win32::OLE::LastEr +ror(); $MAPI->Logon(@LOGON_PARAMS); # returns undef regardless of success die "Logon Failed: " . Win32::OLE::LastError() if Win32::OLE::LastErro +r(); for my $parent ( 1..$NS->Folders->Count ) { get_fh($parent); # required for mail programs to find parent.sbd. +normally # used to return a file handle but make an empty +file recurse_folders( $NS->Folders($parent), $parent ); } sub recurse_folders { my ($folder, $parent) = @_; return if $folder->DefaultItemType != olMailItem; # not a mail f +older my $fh = get_fh("$parent/".$folder->Name); if ( ("$parent/".$folder->Name) =~ m/$ROOT_FOLDER/i ) { printf "\n\nFolder: %s\nTotal entries: %s\n", "$parent/".$folder->Name, $folder->Items->Count if $DEBUG; my $headers = get_headers($folder); my $fake_header = 0; for my $i (1..$folder->Items->Count) { printf "Processing %d\r", ++$ITEMS unless $DEBUG; my $message = $folder->Items->Item($i); next unless $message->Class == olMail; # mail item my $header = $headers->{$message->EntryID}; do{$fake_header++} if $header =~ m/X-Fake-Header: True/; next if $DEBUG and $DEBUG < 4 ; (my $html = $message->HTMLBody || '' ) =~ s/\r//g; (my $body = $message->Body || '' ) =~ s/\r//g; my $attach = $message->Attachments ? get_attachments($mess +age) : ''; my $str = mbox_format($header,$body,$html,$attach); $str =~ tr/\11\12\40-\176//cd; # ascify aka remove all cr +ap chars print $fh $str; } print "Fake headers: $fake_header\n" if $DEBUG; } # recursive tree walk if ($folder->Folders and $folder->Folders->Count) { for my $i (1..$folder->Folders->Count) { recurse_folders($folder->Folders($i),"$parent/".$folder->N +ame); } } } sub get_headers { my $folder = shift; my %headers; # ***here we change from using Outlook object to using MAPI object +*** $folder = $MAPI->GetFolder($folder->EntryID, $folder->StoreID) or die Win32::OLE::LastError(); my $items = $folder->Messages; print "Got ", $items->Count, " MAPI message(s)\n" if $DEBUG; for my $i (1..$items->Count) { my $message = $items->item($i); next unless $message; my $entry_id = $message->id; my $header = $message->Fields->Item(CdoPR_TRANSPORT_MESSAGE_HE +ADERS); $header = $header ? $header->Value : missing_header($message); $header =~ s/\r//g; # create the From <someone> <asctime> line that starts an mbox + record my ($from) = $header =~ m/(?:From:|Return-path).*?\b([\w\.\-\% +\+]+\@[\w\.\-]+)/i; $from ||= $DEFAULT_EMAIL; my ($date) = $header =~ m/Date:\s+(.+)/i; $date ||= get_date($message); $date = asctime_format($date); $headers{$entry_id} = "From $from $date\n$header"; print "From $from $date\n" if $DEBUG > 1; print $header if $DEBUG > 2; } return \%headers } # Fake header a header sub missing_header { my $message = shift; my $date = get_date($message); # we need eval as these calls explode it there is no data to get my $to = eval{$message->Fields->Item(CdoPR_DISPLAY_TO)->Val +ue;} || $DEFAULT_EMAIL; my $from_email= eval{$message->Fields->Item(CdoPR_SENDER_EMAIL_ADD +RESS)->Value;} || $DEFAULT_EMAIL; my $from_name = eval{$message->Fields->Item(CdoPR_SENDER_NAME)->Va +lue;} || ''; my $subject = eval{$message->Fields->Item(CdoPR_SUBJECT)->Value; +} || $NO_SUBJECT; $from_name = qq|"$from_name" | if $from_name; my $boundary = get_boundary(); return <<HEADER; Return-path: <$from_email> From: $from_name<$from_email> To: <$to> Subject: $subject Date: $date MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="$boundary" X-Fake-Header: True HEADER } # Standard mail form is 'Tue, 1 Jan 1980 00:00:01' ie not asctime sub get_date { my $message = shift; my $str = $message->TimeLastModified->Date('ddd MMM d yyyy'); my($wd,$m,$d,$y) = split " ", $str; my $t = $message->TimeLastModified->Time('hh:mm:ss'); return sprintf "%3s, %2d %3s %4d %8s", $wd,$d,$m,$y,$t; } sub asctime_format { sprintf "%3s %3s %2d %8s %4d",(split/[,\s]+/,$_[0])[0,2,1,4,3]; } sub get_attachments { my $message = shift; my @attach; # need MAPI again to get CID via Fields method using "undocumented +" # definitely unexported constant PR_ATTACH_CONTENT_LOCATION (0x371 +2001E) my $mapi_message = $MAPI->GetMessage($message->EntryID) or die Win32::OLE::LastError(); for my $i ( 1..$message->Attachments->Count ) { my $attachment = $message->Attachments->Item($i); my $cid = $mapi_message->Attachments->Item($i)->Fields->Item(0 +x3712001E) || ''; $cid = $cid->Value if $cid; my $filename = get_filename($attachment->Filename); $filename =~ s/[^\w\.\-\/ ]+/_/g; $filename =~ s/\s+/_/g if $NO_SPACES; my $filepath = File::Spec->catfile($ATTACH_PATH, $filename); push @attach, [ $filepath, $attachment->Filename, $cid ]; $attachment->SaveAsFile($filepath) unless $DEBUG; } return \@attach; } sub attachment_path_map { my $attach = shift; return '' unless @$attach; return "\n\n". (join '', map{"Attachment: $_->[0]\n"} @$attach) } sub mbox_format { my ($header,$body,$html,$attach) = @_; my (undef,$boundary) = $header =~ m/\bBoundary\s*=\s*(['"]?)([^\1\ +n]+)\1/i; if ($TEXT_ONLY) { my $text = html2text($html); $body .= "\n\n<HTML>\n\n$text\n" if $text; $body .= attachment_path_map($attach); # (\n\s+.*)* removes extended lines below Content-Type: $header =~ s!Content-Type:\s.*(\n\s+.*)*!Content-Type: text/pl +ain!i; } elsif ($boundary) { my $msg = MIME::Lite->new( Type =>'multipart/mixed' ); # although the RFC says run the parts from low->high definitio +n # (ie TEXT->HTML) Thunderbird does not get it so as user may w +ant # preferential HTML you have to give Thunderbird the HTML firs +t $msg->attach( Type => 'TEXT', Data => $body ) if $body and not $THUNDERBIRD; $msg->attach( Type => 'HTML', Data => $html ) if $html; $msg->attach( Type => 'TEXT', Data => $body ) if $body and $THUNDERBIRD; for my $file (@$attach) { if ( $file->[2] ) { # found a cid so inline it $msg->attach( Type => 'AUTO', Path => $file->[0], Id => $file->[2], Filename => $file->[1], Disposition => 'inline' ) } else { $msg->attach( Type => 'AUTO', Path => $file->[0], Filename => $file->[1], Disposition => 'attachment' ) } } $msg->attr('content-type.boundary', $boundary ); my $email = $msg->as_string; (undef, my $mime) = split /\n\n/,$email,2; $body = $mime; } elsif ( $header =~ m!Content-Type:\s*text/html!i ) { $body = $html if $html; } return "$header$body\n\n"; } sub get_fh { my $folder = shift; $folder =~ s/[^\w\.\-\/ ]+/_/g; $folder =~ s!^/!!; $folder =~ s/\s+/_/g if $NO_SPACES; print "Folder: $folder\n" if $DEBUG; my $filepath; if ($NO_FOLDERS) { $folder =~ s!/!.!g; $filepath = File::Spec->catfile( $SAVEDIR, $folder ); } else { my @path = split "/", $folder; my $file = pop @path; @path = map{"$_.sbd"}@path; my $dir = File::Spec->catdir( $SAVEDIR, @path); safe_mkdir( $SAVEDIR, @path ) unless -d $dir; $filepath = File::Spec->catfile( $SAVEDIR, @path, $file ); + } print "Folder path: $filepath\n" if $DEBUG; return *STDOUT if $DEBUG or $DUMP; open my $fh, ">", $filepath or die "Can't write $filepath $!\n"; return $fh; } sub safe_mkdir { my @dirs = (); while ( my $dir = shift ) { my $mkdir = File::Spec->catdir(@dirs,$dir); print "Doing mkdir $mkdir\n" if $DEBUG; push @dirs, $dir; next if -d $mkdir; mkdir $mkdir or die "Can't create directory $mkdir $!\n"; } } # simple closure counter unique filename generator { my $c; sub get_filename { my $filename = shift; $c ||= time(); $filename =~ s/[^\w\.\-]/_/g; sprintf "MB%08d_%s", $c++, $filename } } # simple closure counter unique boundary generator { my $c; sub get_boundary { $c ||= time(); sprintf "----------_tachyon_%d", $c++; } } sub html2text { my $html = shift; my %inside; my $text = ''; my $tag = sub { $inside{$_[0]} += $_[1]; $text .= " " }; my $txt = sub { $text .= $_[0] unless $inside{script} or $inside{s +tyle} }; HTML::Parser->new( api_version => 3, handlers => [ start => [$tag, "tagname, '+1'"] +, end => [$tag, "tagname, '-1'"] +, text => [$txt, "dtext"] ], marked_sections => 1, )->parse($html); $text =~ tr/\11\12\40-\176//cd; # remove wide non ascii chars $text = Text::Wrap::fill('', '', $text); $text =~ s/^\s+//; return $text; } sub ole_dump { my $obj = shift; printf "%s %s\n", $_, $obj->$_ for sort keys %$obj; }

In reply to Migration from Outlook to any other mail program by tachyon-II

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":