Master to 4.2.8
[usit-rt.git] / lib / RT / EmailParser.pm
CommitLineData
84fb5b46
MKG
1# BEGIN BPS TAGGED BLOCK {{{
2#
3# COPYRIGHT:
4#
320f0092 5# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC
84fb5b46
MKG
6# <sales@bestpractical.com>
7#
8# (Except where explicitly superseded by other copyright notices)
9#
10#
11# LICENSE:
12#
13# This work is made available to you under the terms of Version 2 of
14# the GNU General Public License. A copy of that license should have
15# been provided with this software, but in any event can be snarfed
16# from www.gnu.org.
17#
18# This work is distributed in the hope that it will be useful, but
19# WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21# General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26# 02110-1301 or visit their web page on the internet at
27# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
28#
29#
30# CONTRIBUTION SUBMISSION POLICY:
31#
32# (The following paragraph is not intended to limit the rights granted
33# to you to modify and distribute this software under the terms of
34# the GNU General Public License and is only of importance to you if
35# you choose to contribute your changes and enhancements to the
36# community by submitting them to Best Practical Solutions, LLC.)
37#
38# By intentionally submitting any modifications, corrections or
39# derivatives to this work, or any other work intended for use with
40# Request Tracker, to Best Practical Solutions, LLC, you confirm that
41# you are the copyright holder for those contributions and you grant
42# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
43# royalty-free, perpetual, license to use, copy, create derivative
44# works based on those contributions, and sublicense and distribute
45# those contributions and any derivatives thereof.
46#
47# END BPS TAGGED BLOCK }}}
48
49package RT::EmailParser;
50
51
52use base qw/RT::Base/;
53
54use strict;
55use warnings;
56
57
58use Email::Address;
59use MIME::Entity;
60use MIME::Head;
61use MIME::Parser;
62use File::Temp qw/tempdir/;
63
64=head1 NAME
65
66 RT::EmailParser - helper functions for parsing parts from incoming
67 email messages
68
69=head1 SYNOPSIS
70
71
72=head1 DESCRIPTION
73
74
75
76
77=head1 METHODS
78
79=head2 new
80
81Returns a new RT::EmailParser object
82
83=cut
84
85sub new {
86 my $proto = shift;
87 my $class = ref($proto) || $proto;
88 my $self = {};
89 bless ($self, $class);
90 return $self;
91}
92
93
94=head2 SmartParseMIMEEntityFromScalar Message => SCALAR_REF [, Decode => BOOL, Exact => BOOL ] }
95
96Parse a message stored in a scalar from scalar_ref.
97
98=cut
99
100sub SmartParseMIMEEntityFromScalar {
101 my $self = shift;
102 my %args = ( Message => undef, Decode => 1, Exact => 0, @_ );
103
104 eval {
105 my ( $fh, $temp_file );
106 for ( 1 .. 10 ) {
107
108 # on NFS and NTFS, it is possible that tempfile() conflicts
109 # with other processes, causing a race condition. we try to
110 # accommodate this by pausing and retrying.
111 last
112 if ( $fh, $temp_file ) =
01e3b242 113 eval { File::Temp::tempfile( UNLINK => 0 ) };
84fb5b46
MKG
114 sleep 1;
115 }
116 if ($fh) {
117
118 #thank you, windows
119 binmode $fh;
120 $fh->autoflush(1);
121 print $fh $args{'Message'};
122 close($fh);
123 if ( -f $temp_file ) {
124
125 # We have to trust the temp file's name -- untaint it
126 $temp_file =~ /(.*)/;
127 my $entity = $self->ParseMIMEEntityFromFile( $1, $args{'Decode'}, $args{'Exact'} );
128 unlink($1);
129 return $entity;
130 }
131 }
132 };
133
84fb5b46
MKG
134 #If for some reason we weren't able to parse the message using a temp file
135 # try it with a scalar
136 if ( $@ || !$self->Entity ) {
137 return $self->ParseMIMEEntityFromScalar( $args{'Message'}, $args{'Decode'}, $args{'Exact'} );
138 }
139
140}
141
142
143=head2 ParseMIMEEntityFromSTDIN
144
145Parse a message from standard input
146
147=cut
148
149sub ParseMIMEEntityFromSTDIN {
150 my $self = shift;
151 return $self->ParseMIMEEntityFromFileHandle(\*STDIN, @_);
152}
153
154=head2 ParseMIMEEntityFromScalar $message
155
156Takes either a scalar or a reference to a scalar which contains a stringified MIME message.
157Parses it.
158
159Returns true if it wins.
160Returns false if it loses.
161
162=cut
163
164sub ParseMIMEEntityFromScalar {
165 my $self = shift;
166 return $self->_ParseMIMEEntity( shift, 'parse_data', @_ );
167}
168
169=head2 ParseMIMEEntityFromFilehandle *FH
170
171Parses a mime entity from a filehandle passed in as an argument
172
173=cut
174
175sub ParseMIMEEntityFromFileHandle {
176 my $self = shift;
177 return $self->_ParseMIMEEntity( shift, 'parse', @_ );
178}
179
180=head2 ParseMIMEEntityFromFile
181
182Parses a mime entity from a filename passed in as an argument
183
184=cut
185
186sub ParseMIMEEntityFromFile {
187 my $self = shift;
188 return $self->_ParseMIMEEntity( shift, 'parse_open', @_ );
189}
190
191
192sub _ParseMIMEEntity {
193 my $self = shift;
194 my $message = shift;
195 my $method = shift;
196 my $postprocess = (@_ ? shift : 1);
197 my $exact = shift;
198
199 # Create a new parser object:
200 my $parser = MIME::Parser->new();
201 $self->_SetupMIMEParser($parser);
202 $parser->decode_bodies(0) if $exact;
203
204 # TODO: XXX 3.0 we really need to wrap this in an eval { }
205 unless ( $self->{'entity'} = $parser->$method($message) ) {
206 $RT::Logger->crit("Couldn't parse MIME stream and extract the submessages");
207 # Try again, this time without extracting nested messages
208 $parser->extract_nested_messages(0);
209 unless ( $self->{'entity'} = $parser->$method($message) ) {
210 $RT::Logger->crit("couldn't parse MIME stream");
211 return ( undef);
212 }
213 }
214
215 $self->_PostProcessNewEntity if $postprocess;
216
217 return $self->{'entity'};
218}
219
220sub _DecodeBodies {
221 my $self = shift;
222 return unless $self->{'entity'};
223
224 my @parts = $self->{'entity'}->parts_DFS;
225 $self->_DecodeBody($_) foreach @parts;
226}
227
228sub _DecodeBody {
229 my $self = shift;
230 my $entity = shift;
231
232 my $old = $entity->bodyhandle or return;
233 return unless $old->is_encoded;
234
235 require MIME::Decoder;
236 my $encoding = $entity->head->mime_encoding;
237 my $decoder = MIME::Decoder->new($encoding);
238 unless ( $decoder ) {
239 $RT::Logger->error("Couldn't find decoder for '$encoding', switching to binary");
240 $old->is_encoded(0);
241 return;
242 }
243
244 require MIME::Body;
245 # XXX: use InCore for now, but later must switch to files
246 my $new = MIME::Body::InCore->new();
247 $new->binmode(1);
248 $new->is_encoded(0);
249
250 my $source = $old->open('r') or die "couldn't open body: $!";
251 my $destination = $new->open('w') or die "couldn't open body: $!";
252 {
253 local $@;
254 eval { $decoder->decode($source, $destination) };
255 $RT::Logger->error($@) if $@;
256 }
257 $source->close or die "can't close: $!";
258 $destination->close or die "can't close: $!";
259
260 $entity->bodyhandle( $new );
261}
262
263=head2 _PostProcessNewEntity
264
265cleans up and postprocesses a newly parsed MIME Entity
266
267=cut
268
269sub _PostProcessNewEntity {
270 my $self = shift;
271
272 #Now we've got a parsed mime object.
273
274 # Unfold headers that are have embedded newlines
275 # Better do this before conversion or it will break
276 # with multiline encoded Subject (RFC2047) (fsck.com #5594)
277 $self->Head->unfold;
278
279 # try to convert text parts into utf-8 charset
280 RT::I18N::SetMIMEEntityToEncoding($self->{'entity'}, 'utf-8');
281}
282
283=head2 ParseCcAddressesFromHead HASHREF
284
285Takes a hashref object containing QueueObj, Head and CurrentUser objects.
286Returns a list of all email addresses in the To and Cc
403d7b0b 287headers b<except> the current Queue's email addresses, the CurrentUser's
84fb5b46
MKG
288email address and anything that the RT->Config->Get('RTAddressRegexp') matches.
289
290=cut
291
292sub ParseCcAddressesFromHead {
293 my $self = shift;
294 my %args = (
295 QueueObj => undef,
296 CurrentUser => undef,
297 @_
298 );
299
300 my (@Addresses);
301
c33a4027
MKG
302 my @ToObjs = Email::Address->parse( Encode::decode( "UTF-8", $self->Head->get('To') ) );
303 my @CcObjs = Email::Address->parse( Encode::decode( "UTF-8", $self->Head->get('Cc') ) );
84fb5b46
MKG
304
305 foreach my $AddrObj ( @ToObjs, @CcObjs ) {
306 my $Address = $AddrObj->address;
307 my $user = RT::User->new(RT->SystemUser);
308 $Address = $user->CanonicalizeEmailAddress($Address);
309 next if lc $args{'CurrentUser'}->EmailAddress eq lc $Address;
310 next if $self->IsRTAddress($Address);
311
312 push ( @Addresses, $Address );
313 }
314 return (@Addresses);
315}
316
317
318=head2 IsRTaddress ADDRESS
319
320Takes a single parameter, an email address.
321Returns true if that address matches the C<RTAddressRegexp> config option.
322Returns false, otherwise.
323
324
325=cut
326
327sub IsRTAddress {
328 my $self = shift;
329 my $address = shift;
330
331 if ( my $address_re = RT->Config->Get('RTAddressRegexp') ) {
332 return $address =~ /$address_re/i ? 1 : undef;
333 }
334
335 # we don't warn here, but do in config check
336 if ( my $correspond_address = RT->Config->Get('CorrespondAddress') ) {
337 return 1 if lc $correspond_address eq lc $address;
338 }
339 if ( my $comment_address = RT->Config->Get('CommentAddress') ) {
340 return 1 if lc $comment_address eq lc $address;
341 }
342
343 my $queue = RT::Queue->new( RT->SystemUser );
344 $queue->LoadByCols( CorrespondAddress => $address );
345 return 1 if $queue->id;
346
347 $queue->LoadByCols( CommentAddress => $address );
348 return 1 if $queue->id;
349
350 return undef;
351}
352
353
354=head2 CullRTAddresses ARRAY
355
356Takes a single argument, an array of email addresses.
357Returns the same array with any IsRTAddress()es weeded out.
358
359
360=cut
361
362sub CullRTAddresses {
363 my $self = shift;
364 my @addresses = (@_);
365
366 return grep { !$self->IsRTAddress($_) } @addresses;
367}
368
369
370
371
372
373# LookupExternalUserInfo is a site-definable method for synchronizing
374# incoming users with an external data source.
375#
376# This routine takes a tuple of EmailAddress and FriendlyName
377# EmailAddress is the user's email address, ususally taken from
378# an email message's From: header.
379# FriendlyName is a freeform string, ususally taken from the "comment"
380# portion of an email message's From: header.
381#
382# If you define an AutoRejectRequest template, RT will use this
383# template for the rejection message.
384
385
386=head2 LookupExternalUserInfo
387
388 LookupExternalUserInfo is a site-definable method for synchronizing
389 incoming users with an external data source.
390
391 This routine takes a tuple of EmailAddress and FriendlyName
392 EmailAddress is the user's email address, ususally taken from
393 an email message's From: header.
394 FriendlyName is a freeform string, ususally taken from the "comment"
395 portion of an email message's From: header.
396
397 It returns (FoundInExternalDatabase, ParamHash);
398
399 FoundInExternalDatabase must be set to 1 before return if the user
400 was found in the external database.
401
402 ParamHash is a Perl parameter hash which can contain at least the
403 following fields. These fields are used to populate RT's users
404 database when the user is created.
405
406 EmailAddress is the email address that RT should use for this user.
407 Name is the 'Name' attribute RT should use for this user.
408 'Name' is used for things like access control and user lookups.
409 RealName is what RT should display as the user's name when displaying
410 'friendly' names
411
412=cut
413
414sub LookupExternalUserInfo {
415 my $self = shift;
416 my $EmailAddress = shift;
417 my $RealName = shift;
418
419 my $FoundInExternalDatabase = 1;
420 my %params;
421
422 #Name is the RT username you want to use for this user.
423 $params{'Name'} = $EmailAddress;
424 $params{'EmailAddress'} = $EmailAddress;
425 $params{'RealName'} = $RealName;
426
427 return ($FoundInExternalDatabase, %params);
428}
429
430=head2 Head
431
432Return the parsed head from this message
433
434=cut
435
436sub Head {
437 my $self = shift;
438 return $self->Entity->head;
439}
440
441=head2 Entity
442
443Return the parsed Entity from this message
444
445=cut
446
447sub Entity {
448 my $self = shift;
449 return $self->{'entity'};
450}
451
452
453
454=head2 _SetupMIMEParser $parser
455
456A private instance method which sets up a mime parser to do its job
457
458=cut
459
460
461 ## TODO: Does it make sense storing to disk at all? After all, we
462 ## need to put each msg as an in-core scalar before saving it to
463 ## the database, don't we?
464
465 ## At the same time, we should make sure that we nuke attachments
466 ## Over max size and return them
467
468sub _SetupMIMEParser {
469 my $self = shift;
470 my $parser = shift;
471
472 # Set up output directory for files; we use $RT::VarPath instead
473 # of File::Spec->tmpdir (e.g., /tmp) beacuse it isn't always
474 # writable.
475 my $tmpdir;
476 if ( -w $RT::VarPath ) {
477 $tmpdir = File::Temp::tempdir( DIR => $RT::VarPath, CLEANUP => 1 );
478 } elsif (-w File::Spec->tmpdir) {
479 $tmpdir = File::Temp::tempdir( TMPDIR => 1, CLEANUP => 1 );
480 } else {
481 $RT::Logger->crit("Neither the RT var directory ($RT::VarPath) nor the system tmpdir (@{[File::Spec->tmpdir]}) are writable; falling back to in-memory parsing!");
482 }
483
484 #If someone includes a message, extract it
485 $parser->extract_nested_messages(1);
486 $parser->extract_uuencode(1); ### default is false
487
488 if ($tmpdir) {
489 # If we got a writable tmpdir, write to disk
490 push ( @{ $self->{'AttachmentDirs'} ||= [] }, $tmpdir );
491 $parser->output_dir($tmpdir);
492 $parser->filer->ignore_filename(1);
493
494 # Set up the prefix for files with auto-generated names:
495 $parser->output_prefix("part");
496
497 # From the MIME::Parser docs:
498 # "Normally, tmpfiles are created when needed during parsing, and destroyed automatically when they go out of scope"
499 # Turns out that the default is to recycle tempfiles
500 # Temp files should never be recycled, especially when running under perl taint checking
501
502 $parser->tmp_recycling(0) if $parser->can('tmp_recycling');
503 } else {
504 # Otherwise, fall back to storing it in memory
505 $parser->output_to_core(1);
506 $parser->tmp_to_core(1);
507 $parser->use_inner_files(1);
508 }
509
510}
511
512=head2 ParseEmailAddress string
513
514Returns a list of Email::Address objects
515Works around the bug that Email::Address 1.889 and earlier
516doesn't handle local-only email addresses (when users pass
517in just usernames on the RT system in fields that expect
518Email Addresses)
519
520We don't handle the case of
521bob, fred@bestpractical.com
522because we don't want to fail parsing
523bob, "Falcone, Fred" <fred@bestpractical.com>
524The next release of Email::Address will have a new method
525we can use that removes the bandaid
526
527=cut
528
af59614d
MKG
529use Email::Address::List;
530
84fb5b46
MKG
531sub ParseEmailAddress {
532 my $self = shift;
533 my $address_string = shift;
534
af59614d
MKG
535 my @list = Email::Address::List->parse(
536 $address_string,
537 skip_comments => 1,
538 skip_groups => 1,
539 );
540 my $logger = sub { RT->Logger->error(
541 "Unable to parse an email address from $address_string: ". shift
542 ) };
84fb5b46
MKG
543
544 my @addresses;
af59614d
MKG
545 foreach my $e ( @list ) {
546 if ($e->{'type'} eq 'mailbox') {
547 if ($e->{'not_ascii'}) {
548 $logger->($e->{'value'} ." contains not ASCII values");
549 next;
550 }
551 push @addresses, $e->{'value'}
552 } elsif ( $e->{'value'} =~ /^\s*(\w+)\s*$/ ) {
553 my $user = RT::User->new( RT->SystemUser );
554 $user->Load( $1 );
555 if ($user->id) {
556 push @addresses, Email::Address->new($user->Name, $user->EmailAddress);
557 } else {
558 $logger->($e->{'value'} ." is not a valid email address and is not user name");
559 }
84fb5b46 560 } else {
af59614d 561 $logger->($e->{'value'} ." is not a valid email address");
84fb5b46 562 }
84fb5b46 563 }
320f0092
MKG
564
565 $self->CleanupAddresses(@addresses);
566
84fb5b46 567 return @addresses;
84fb5b46
MKG
568}
569
320f0092
MKG
570=head2 CleanupAddresses ARRAY
571
572Massages an array of L<Email::Address> objects to make their email addresses
573more palatable.
574
575Currently this strips off surrounding single quotes around C<< ->address >> and
576B<< modifies the L<Email::Address> objects in-place >>.
577
578Returns the list of objects for convienence in C<map>/C<grep> chains.
579
580=cut
581
582sub CleanupAddresses {
583 my $self = shift;
584
585 for my $addr (@_) {
586 next unless defined $addr;
587 # Outlook sometimes sends addresses surrounded by single quotes;
588 # clean them all up
589 if ((my $email = $addr->address) =~ s/^'(.+)'$/$1/) {
590 $addr->address($email);
591 }
592 }
593 return @_;
594}
595
84fb5b46
MKG
596=head2 RescueOutlook
597
598Outlook 2007/2010 have a bug when you write an email with the html format.
599it will send a 'multipart/alternative' with both 'text/plain' and 'text/html'
600in it. it's cool to have a 'text/plain' part, but the problem is the part is
601not so right: all the "\n" in your main message will become "\n\n" :/
602
603this method will fix this bug, i.e. replaces "\n\n" to "\n".
604return 1 if it does find the problem in the entity and get it fixed.
605
606=cut
607
608
609sub RescueOutlook {
610 my $self = shift;
611 my $mime = $self->Entity();
84fb5b46 612
403d7b0b
MKG
613 return unless $mime && $self->LooksLikeMSEmail($mime);
614
615 my $text_part;
616 if ( $mime->head->get('Content-Type') =~ m{multipart/mixed} ) {
617 my $first = $mime->parts(0);
618 if ( $first->head->get('Content-Type') =~ m{multipart/alternative} )
619 {
620 my $inner_first = $first->parts(0);
621 if ( $inner_first->head->get('Content-Type') =~ m{text/plain} )
84fb5b46 622 {
403d7b0b 623 $text_part = $inner_first;
84fb5b46
MKG
624 }
625 }
403d7b0b
MKG
626 }
627 elsif ( $mime->head->get('Content-Type') =~ m{multipart/alternative} ) {
628 my $first = $mime->parts(0);
629 if ( $first->head->get('Content-Type') =~ m{text/plain} ) {
630 $text_part = $first;
84fb5b46 631 }
403d7b0b 632 }
84fb5b46 633
403d7b0b
MKG
634 # Add base64 since we've seen examples of double newlines with
635 # this type too. Need an example of a multi-part base64 to
636 # handle that permutation if it exists.
c33a4027 637 elsif ( ($mime->head->get('Content-Transfer-Encoding')||'') =~ m{base64} ) {
403d7b0b
MKG
638 $text_part = $mime; # Assuming single part, already decoded.
639 }
640
641 if ($text_part) {
642
643 # use the unencoded string
644 my $content = $text_part->bodyhandle->as_string;
645 if ( $content =~ s/\n\n/\n/g ) {
646
647 # Outlook puts a space on extra newlines, remove it
648 $content =~ s/\ +$//mg;
649
650 # only write only if we did change the content
651 if ( my $io = $text_part->open("w") ) {
652 $io->print($content);
653 $io->close;
654 $RT::Logger->debug(
655 "Removed extra newlines from MS Outlook message.");
656 return 1;
657 }
658 else {
659 $RT::Logger->error("Can't write to body to fix newlines");
84fb5b46
MKG
660 }
661 }
662 }
403d7b0b 663
84fb5b46
MKG
664 return;
665}
666
403d7b0b
MKG
667=head1 LooksLikeMSEmail
668
669Try to determine if the current email may have
670come from MS Outlook or gone through Exchange, and therefore
671may have extra newlines added.
672
673=cut
674
675sub LooksLikeMSEmail {
676 my $self = shift;
677 my $mime = shift;
678
679 my $mailer = $mime->head->get('X-Mailer');
680
681 # 12.0 is outlook 2007, 14.0 is 2010
682 return 1 if ( $mailer && $mailer =~ /Microsoft(?:.*?)Outlook 1[2-4]\./ );
683
684 if ( RT->Config->Get('CheckMoreMSMailHeaders') ) {
685
686 # Check for additional headers that might
687 # indicate this came from Outlook or through Exchange.
688 # A sample we received had the headers X-MS-Has-Attach: and
689 # X-MS-Tnef-Correlator: and both had no value.
690
691 my @tags = $mime->head->tags();
692 return 1 if grep { /^X-MS-/ } @tags;
693 }
694
695 return 0; # Doesn't look like MS email.
696}
84fb5b46
MKG
697
698sub DESTROY {
699 my $self = shift;
700 File::Path::rmtree([@{$self->{'AttachmentDirs'}}],0,1)
701 if $self->{'AttachmentDirs'};
702}
703
704
705
706RT::Base->_ImportOverlays();
707
7081;