#!/usr/bin/perl ############ # killtnef # ############ use strict; use Convert::TNEF; use MIME::Parser; use MIME::Types; my $VERSION = 1.0.3; my $mimetypes = MIME::Types->new; my $message; while (defined(my $line = )) { my $msg_bound; if ($line =~ /^(From\s+.+)\r?\n/ and length($message) or eof) { parse_message(\$message, $msg_bound); # All the action happens here... $message = ""; $msg_bound=$line; } elsif ($line =~ /^(From\s+.+)\r?\n/) { $msg_bound=$line; # The first } $message.=$line; } exit; # Subroutines ############################################################### sub parse_message { my $msg_body = shift @_; my $msg_bound = shift @_; my $mime_parser = new MIME::Parser; # This module likes to use tmp files, but I try to stop it here. $mime_parser->use_inner_files(1); $mime_parser->output_to_core(1); my $ent = $mime_parser->parse_data($$msg_body); my $num_parts=$ent->parts; # Determine if we have a MIME w/ms-tnef and act accordingly. if ( ($num_parts < 1) || ($$msg_body !~ /ms-tnef/i) ) { print "$$msg_body"; } else { # Get the head info my $head = $ent->head; my $ReturnPath = $head->get('Return-Path'); my @all_received = $head->get('Received'); my $Date = $head->get('Date'); my $From = $head->get('From'); my $XSender = $head->get('X-Sender'); my $To = $head->get('To'); my $Subject = $head->get('Subject'); my $MessageID = $head->get('Message-ID'); my $boundary = $head->multipart_boundary; # Build a new MIME message based on the one we are examining # - LHH: it would probably be better to build this $new_ent # using $ent->head as the basis, thus getting *all* of # the headers, instead of just these few. We only needed # these few headers for the project this script was # originally written for, but if someone wants to change # this and submit a patch, that would be great. my $new_ent = MIME::Entity->build( 'Type' => "multipart/mixed", 'Boundary' => $boundary, 'X-Mailer' => undef ); my $new_head=$new_ent->head; # Try to preserve the order of headers in the original message by # extracting it from the original formatted header. my(%did_tag); foreach my $tag (@{$head->header}, $head->tags) { $tag =~ s/:.*//s; next if ($did_tag{lc $tag}++); next if ($new_head->count($tag)); foreach my $value ($head->get_all($tag)) { $new_head->add($tag, $value); } } # Loop over each MIME part adding each to the new message foreach my $mime_part_i (0 .. ($num_parts - 1)) { my $ent_part=$ent->parts($mime_part_i); if ($ent_part->mime_type =~ /ms-tnef/i ) { add_tnef_parts($ent_part, $new_ent); } else { $new_ent->add_part($ent_part); } } # Set the preamble and epilogue equal to the original $new_ent->preamble($ent->preamble); $new_ent->epilogue($ent->epilogue); # Print the newly constructed MIME message print "$msg_bound"; print STDOUT $new_ent->stringify; } } sub add_tnef_parts { my $ent = shift; my $new = shift; ## Create a tnef object my %TnefOpts=('output_to_core' => '4194304', 'output_dir' => '/tmp'); my $tnef = Convert::TNEF->read_ent($ent, \%TnefOpts); my $head=$new->head; # Get the header object from the new message if (! $tnef) { warn "TNEF CONVERT DID NOT WORK: " . $Convert::TNEF::errstr . "\n"; warn " - Failed on msg w/subj: " . $head->get('Subject') . "\n"; return ''; } ############################################################################# # This section of code smokes lots of crack, and tries to dig the From: # header out of the $tnef->message if the new message we are appending # this attachment to does not already have a "From" header. This is # required on most of the Outlook emails that never touch SMTP, only # Exchange servers, and never had valid SMTP From headers placed! ############################################################################# my $msg=$tnef->message; my $mapi_props=$msg->data('MAPIProps'); #warn join(", ", keys %{$msg->{MAPIProps}}) . "\n"; #warn $msg->{MAPIProps}->{MBS_Data} . "\n\n----------------------------\n\n"; #warn "$mapi_props\n\n---------------------------------\n\n"; my $test=0x0024; #if ($mapi_props =~ m/(\0\0\0\xf8.{20})/) { warn "MATCHED a prop $1\n"; } #if (0) { if (! length($head->get('From')) ) { my $from=''; my $cntrl_chars='[\c' . join('\c', ('A' .. 'Z')) . ']'; if ($mapi_props =~ m/From:\s+([^\s\@]+\@[^\s]+)/) { $from=$1; } elsif ($mapi_props =~ m/\xf8\?\cA\0\0\0$cntrl_chars\0\0\0([^\0]+)\0+\cB\cA/) { $from=$1; } else { if ($mapi_props =~ m/(\xf8\?\cA.{30})/) { warn "MATCH: $1\n"; } #$from="Unknown Sender"; } if( length($from)) { $head->replace('from', $from); } } ############################################################################# for ($tnef->attachments) { $_->longname=~/^[\w\W]+\.(\w+)$/; my $ext = $1; my $type = $mimetypes->mimeTypeOf($ext); if (! $type) { warn "No MIME type for (" . $_->longname . "/" . $_->name . ")\n"; $type = "Application/OCTET-STREAM"; } my $encoding; if ($type) { if ($type =~ m,^text/,) { if ($_->data =~ /[^\001-\177]/) { $encoding = '8bit'; } else { $encoding = '7bit'; } } else { $encoding = 'base64'; } } elsif ($_->data =~ /[^\t\n\r\f\040-\177]/) { $encoding = 'base64'; } else { $encoding = '7bit'; } $new->attach( Type => $type, Encoding => $encoding, Data => $_->data, Disposition => 'attachment', Filename => $_->longname ); } # If you want to delete the working files $tnef->purge; } # POD documentation =head1 SYNOPSIS cat mbox_msg_w_tnef | killtnef > mbox_msg_mime_compliant =head1 README killtnef - Converts emails with MS-TNEF, Microsoft's proprietary Transport Neutral Encapsulated Format, attachments into standard MIME-compliant emails. This script reads an mbox, or a single email message, from STDIN, extracts data from any ms-tnef attachments that may exist, and writes a new mbox (or a single email message) to STDOUT which has each of the files that were encoded in any ms-tnef attachments attached separately, as RFC-822/RFC-1521 compliant MIME emails/attachments. Any email(s) containing no ms-tnef MIME attachments are passed through this script untouched. =head1 DESCRIPTION This script was originally written to convert about 35,000 emails from some Microsoft Outlook *.pst (post office) files, almost all of which had ms-tnef encoded attachments, into MIME-compliant emails so that they could be imported into an email-archiving system that 10East supplies to some of its customers. If anyone is curious, an imapd was used to move the emails from the *.pst files to mbox format using Outlook 2000 as an IMAP client. This script can also be used as an incoming mail filter which will automatically convert ms-tnef attachments into MIME-compliant attachments. =head1 AUTHORSHIP Andrew Orr (no longer a maintainer) Lester Hightower (maintainer) =head1 LICENSE This software is licensed under the terms of the GNU Public License, which is available for review at http://www.gnu.org/copyleft/gpl.html =head1 CHANGE LOG Feb-22-2002: Originally created by Andy Orr Feb-26-2002: A few enhancements and bug-fixes by Lester Hightower. Mar-06-2002: Documentation and a few comments added by Lester Hightower in preparation for submitting this script to CPAN. Mar-07-2002: After realizing that a POD README section is needed for the HTML pages generated for the script index in CPAN, LHH added one and submitted this as killtnef-1.0.1.pl. Sep-20-2005: Applied a patch provided by Jonathan Kamens and released that as killtnef-1.0.2.pl. The patch did: * Use /usr/bin/perl instead of /usr/local/bin/perl. * Use MIME::Types instead of hard-coded list of extensions and MIME types. * Preserve MIME boundary and headers from original message. * Try to use 7bit or 8bit encoding instead of base64 whenever possible. This makes resulting messages smaller and easier to full-text index. May-30-2007: Applied a patch provided by Stefan Bertels and released that as killtnef-1.0.3.pl. The patch uses any "\w" characters after the last dot in each attachments' longname as the file extenstion, instead of just "([A-Za-z]{2,4})" and also defaults the type to "Application/OCTET-STREAM" if no mimetype can be found for the file extenstion. =head1 PREREQUISITES This script requires the C, C, C, and C modules. =head1 COREQUISITES None. =pod OSNAMES Any Unix-like. =pod SCRIPT CATEGORIES Mail Mail/Converters Mail/Filters =cut