#!/usr/local/bin/perl # $Id: stripmail.pl,v 1.2 2005/11/16 10:04:59 rej Exp $ # # Simple mail filter designed as a filter for printing mail messages. # Removes all headers except for To, Cc, Bcc, Subject, From and Date. # If the body has type # text/html runs it through lynx to remove html'ery # application/msword runs it through antiword # Also deals with recursive mail (e.g. a mail message # contains a mail message that contains a word attachment) # # Richard Jones 2005 # use Mail::Box::Manager; use FileHandle; use Data::Dumper; use strict; my $msg = Mail::Message->read(\*STDIN); print "To: " . $msg->get('to') . "\n"; print "Cc: " . $msg->get('cc') . "\n" if $msg->cc; print "Bcc: " . $msg->get('bcc') . "\n" if $msg->bcc; print "Subject: " . $msg->subject . "\n"; print "From: " . $msg->get('from') . "\n"; print "Date: " . $msg->get('date') . "\n"; print "----\n"; # Would be better to remove HTML'ery my $multi = $msg->isMultipart; # NB. could also use obj->foreachComponent(CODE) to execute the CODE # for each component of the message: the preamble, the epilogue, and # each of the parts. if ($multi) { #warn "Multipart"; my @parts = $msg->parts('RECURSE'); for(my $i = 0; $i < @parts; $i++) { printf("%c", 12) unless $i == 0; # NP my $body = $parts[$i]->decoded; doPart(\$body); } } else { #warn "Not Multipart"; my $body = $msg->decoded; doPart(\$body); } sub doPart ($) { my $partRef = shift; my $type = lc($$partRef->mimeType); # if it's HTML, run it through lynx my $str = $$partRef->string; my $txt; if ($type eq 'text/html') { $txt = &html2text(\$str); } # if it's word, run it through antiword elsif ($type eq 'application/msword') { $txt = &word2text(\$str); } # guess that it's word elsif ($type eq 'application/octet-stream' and $$partRef->dispositionFilename() =~ /\.doc$/i) { warn $$partRef->dispositionFilename() . " looks like a Word document\n"; warn "If it's not, you probably want to kill this job now.\n"; $txt = &word2text(\$str); } # it's printable text elsif ($$partRef->isText) { $txt = ''; } # Don't print it if it's binary else { warn "I'm skipping this component, MIME type: $type\n"; return; } if (length($txt)) { print $txt; } else { $$partRef->print; } } # Convert html to text sub html2text { my $rhtml = shift; if (ref($rhtml) ne 'SCALAR') { return ''; } my $LYNX = 'lynx'; my $body=''; my $tmp_file = "/tmp/stripmail.$$.tmp"; my $h_out=new FileHandle; my $h_in=new FileHandle; if (open($h_out, "| $LYNX -stdin -dump -nolog -noredir -localhost -nolist -width=76 1>$tmp_file 2>/dev/null")) { print $h_out $$rhtml; close $h_out; if (-r $tmp_file && open($h_in, "< $tmp_file")) { my @newtext_arr=<$h_in>; close($h_in); unlink $tmp_file; my $newtext=join('', @newtext_arr); if (length($newtext)) { $body = $newtext . "\n[HTML converted to text by lynx]\n"; } } } return $body; } # Convert word to text sub word2text { my $rhtml = shift; if (ref($rhtml) ne 'SCALAR') { return ''; } my $ANTIWORD = 'antiword'; my $body=''; my $doc_file = "/tmp/stripmail.$$.doc"; my $tmp_file = "/tmp/stripmail.$$.tmp"; my $h_out=new FileHandle; my $h_in=new FileHandle; open ($h_out, ">$doc_file"); print $h_out $$rhtml; close $h_out; system ("$ANTIWORD -i 0 -t $doc_file 1>$tmp_file 2>/dev/null"); unlink $doc_file; if (-r $tmp_file && open($h_in, "< $tmp_file")) { my @newtext_arr=<$h_in>; close($h_in); unlink $tmp_file; my $newtext=join('', @newtext_arr); if (length($newtext)) { $body = $newtext . "\n[Word converted to text by antiword]\n"; } } return $body; }