Upgrade to 4.2.2
[usit-rt.git] / lib / RT / I18N.pm
CommitLineData
84fb5b46
MKG
1# BEGIN BPS TAGGED BLOCK {{{
2#
3# COPYRIGHT:
4#
320f0092 5# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC
84fb5b46
MKG
6# <sales@bestpractical.com>
7#
8# (Except where explicitly superseded by other copyright notices)
9#
10#
11# LICENSE:
12#
13# This work is made available to you under the terms of Version 2 of
14# the GNU General Public License. A copy of that license should have
15# been provided with this software, but in any event can be snarfed
16# from www.gnu.org.
17#
18# This work is distributed in the hope that it will be useful, but
19# WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21# General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26# 02110-1301 or visit their web page on the internet at
27# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
28#
29#
30# CONTRIBUTION SUBMISSION POLICY:
31#
32# (The following paragraph is not intended to limit the rights granted
33# to you to modify and distribute this software under the terms of
34# the GNU General Public License and is only of importance to you if
35# you choose to contribute your changes and enhancements to the
36# community by submitting them to Best Practical Solutions, LLC.)
37#
38# By intentionally submitting any modifications, corrections or
39# derivatives to this work, or any other work intended for use with
40# Request Tracker, to Best Practical Solutions, LLC, you confirm that
41# you are the copyright holder for those contributions and you grant
42# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
43# royalty-free, perpetual, license to use, copy, create derivative
44# works based on those contributions, and sublicense and distribute
45# those contributions and any derivatives thereof.
46#
47# END BPS TAGGED BLOCK }}}
48
49=head1 NAME
50
51RT::I18N - a base class for localization of RT
52
53=cut
54
55package RT::I18N;
56
57use strict;
58use warnings;
59
60
61use Locale::Maketext 1.04;
62use Locale::Maketext::Lexicon 0.25;
63use base 'Locale::Maketext::Fuzzy';
64
65use Encode;
66use MIME::Entity;
67use MIME::Head;
68use File::Glob;
69
70# I decree that this project's first language is English.
71
72our %Lexicon = (
73 'TEST_STRING' => 'Concrete Mixer',
74
75 '__Content-Type' => 'text/plain; charset=utf-8',
76
77 '_AUTO' => 1,
78 # That means that lookup failures can't happen -- if we get as far
79 # as looking for something in this lexicon, and we don't find it,
80 # then automagically set $Lexicon{$key} = $key, before possibly
81 # compiling it.
82
83 # The exception is keys that start with "_" -- they aren't auto-makeable.
84
85);
86# End of lexicon.
87
88=head2 Init
89
90Initializes the lexicons used for localization.
91
92
93=cut
94
95sub Init {
96
97 my @lang = RT->Config->Get('LexiconLanguages');
98 @lang = ('*') unless @lang;
99
100 # load default functions
101 require substr(__FILE__, 0, -3) . '/i_default.pm';
102
103 # Load language-specific functions
104 foreach my $file ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm") ) {
105 unless ( $file =~ /^([-\w\s\.\/\\~:]+)$/ ) {
106 warn("$file is tainted. not loading");
107 next;
108 }
109 $file = $1;
110
111 my ($lang) = ($file =~ /([^\\\/]+?)\.pm$/);
112 next unless grep $_ eq '*' || $_ eq $lang, @lang;
113 require $file;
114 }
115
116 my %import;
117 foreach my $l ( @lang ) {
118 $import{$l} = [
119 Gettext => $RT::LexiconPath."/$l.po",
120 ];
121 push @{ $import{$l} }, map {(Gettext => "$_/$l.po")} RT->PluginDirs('po');
122 push @{ $import{$l} }, (Gettext => $RT::LocalLexiconPath."/*/$l.po",
123 Gettext => $RT::LocalLexiconPath."/$l.po");
124 }
125
126 # Acquire all .po files and iterate them into lexicons
127 Locale::Maketext::Lexicon->import({ _decode => 1, %import });
128
129 return 1;
130}
131
132sub LoadLexicons {
133
134 no strict 'refs';
135 foreach my $k (keys %{RT::I18N::} ) {
136 next if $k eq 'main::';
137 next unless index($k, '::', -2) >= 0;
138 next unless exists ${ 'RT::I18N::'. $k }{'Lexicon'};
139
140 my $lex = *{ ${'RT::I18N::'. $k }{'Lexicon'} }{HASH};
141 # run fetch to force load
142 my $tmp = $lex->{'foo'};
143 # XXX: untie may fail with "untie attempted
144 # while 1 inner references still exist"
145 # TODO: untie that has to lower fetch impact
146 # untie %$lex if tied %$lex;
147 }
148}
149
150=head2 encoding
151
152Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
153If it can't find anything, it returns 'ISO-8859-1'
154
155
156
157=cut
158
159
160sub encoding { 'utf-8' }
161
162
163=head2 SetMIMEEntityToUTF8 $entity
164
165An utility function which will try to convert entity body into utf8.
166It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
167
168=cut
169
170sub SetMIMEEntityToUTF8 {
171 RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
172}
173
174
175
176=head2 IsTextualContentType $type
177
178An utility function that determines whether $type is I<textual>, meaning
179that it can sensibly be converted to Unicode text.
180
181Currently, it returns true iff $type matches this regular expression
182(case-insensitively):
183
184 ^(?:text/(?:plain|html)|message/rfc822)\b
185
186
187=cut
188
189sub IsTextualContentType {
190 my $type = shift;
191 ($type =~ m{^(?:text/(?:plain|html)|message/rfc822)\b}i) ? 1 : 0;
192}
193
194
af59614d 195=head2 SetMIMEEntityToEncoding Entity => ENTITY, Encoding => ENCODING, PreserveWords => BOOL, IsOut => BOOL
84fb5b46
MKG
196
197An utility function which will try to convert entity body into specified
198charset encoding (encoded as octets, *not* unicode-strings). It will
199iterate all the entities in $entity, and try to convert each one into
200specified charset if whose Content-Type is 'text/plain'.
201
af59614d
MKG
202If PreserveWords is true, values in mime head will be decoded.(default is false)
203
204Incoming and outgoing mails are handled differently, if IsOut is true(default
205is false), it'll be treated as outgoing mail, otherwise incomding mail:
206
207incoming mail:
2081) find encoding
2092) if found then try to convert to utf-8 in croak mode, return if success
2103) guess encoding
2114) if guessed differently then try to convert to utf-8 in croak mode, return
212 if success
2135) mark part as application/octet-stream instead of falling back to any
214 encoding
215
216outgoing mail:
2171) find encoding
2182) if didn't find then do nothing, send as is, let MUA deal with it
2193) if found then try to convert it to outgoing encoding in croak mode, return
220 if success
2214) do nothing otherwise, keep original encoding
222
84fb5b46
MKG
223This function doesn't return anything meaningful.
224
225=cut
226
227sub SetMIMEEntityToEncoding {
af59614d
MKG
228 my ( $entity, $enc, $preserve_words, $is_out );
229
230 if ( @_ <= 3 ) {
231 ( $entity, $enc, $preserve_words ) = @_;
232 }
233 else {
234 my %args = (
235 Entity => undef,
236 Encoding => undef,
237 PreserveWords => undef,
238 IsOut => undef,
239 @_,
240 );
241
242 $entity = $args{Entity};
243 $enc = $args{Encoding};
244 $preserve_words = $args{PreserveWords};
245 $is_out = $args{IsOut};
246 }
247
248 unless ( $entity && $enc ) {
249 RT->Logger->error("Missing Entity or Encoding arguments");
250 return;
251 }
84fb5b46
MKG
252
253 # do the same for parts first of all
af59614d
MKG
254 SetMIMEEntityToEncoding(
255 Entity => $_,
256 Encoding => $enc,
257 PreserveWords => $preserve_words,
258 IsOut => $is_out,
259 ) foreach $entity->parts;
84fb5b46 260
403d7b0b
MKG
261 my $head = $entity->head;
262
263 my $charset = _FindOrGuessCharset($entity);
264 if ( $charset ) {
265 unless( Encode::find_encoding($charset) ) {
266 $RT::Logger->warning("Encoding '$charset' is not supported");
267 $charset = undef;
268 }
269 }
270 unless ( $charset ) {
271 $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') );
272 $head->mime_attr('Content-Type' => 'application/octet-stream');
273 return;
274 }
84fb5b46
MKG
275
276 SetMIMEHeadToEncoding(
af59614d
MKG
277 Head => $head,
278 From => _FindOrGuessCharset( $entity, 1 ),
279 To => $enc,
280 PreserveWords => $preserve_words,
281 IsOut => $is_out,
84fb5b46
MKG
282 );
283
84fb5b46
MKG
284 # If this is a textual entity, we'd need to preserve its original encoding
285 $head->replace( "X-RT-Original-Encoding" => $charset )
af59614d 286 if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
84fb5b46
MKG
287
288 return unless IsTextualContentType($head->mime_type);
289
290 my $body = $entity->bodyhandle;
291
dab09ea8 292 if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) {
84fb5b46
MKG
293 my $string = $body->as_string or return;
294
295 $RT::Logger->debug( "Converting '$charset' to '$enc' for "
296 . $head->mime_type . " - "
297 . ( $head->get('subject') || 'Subjectless message' ) );
298
299 # NOTE:: see the comments at the end of the sub.
300 Encode::_utf8_off($string);
af59614d
MKG
301 my $orig_string = $string;
302 ( my $success, $string ) = EncodeFromToWithCroak( $orig_string, $charset => $enc );
303 if ( !$success ) {
304 return if $is_out;
305 my $error = $string;
306
307 my $guess = _GuessCharset($orig_string);
308 if ( $guess && $guess ne $charset ) {
309 $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
310 ( $success, $string ) = EncodeFromToWithCroak( $orig_string, $guess, $enc );
311 $error = $string unless $success;
312 }
313
314 if ( !$success ) {
315 $RT::Logger->error( "Encoding error: " . $error . " falling back to application/octet-stream" );
316 $head->mime_attr( "content-type" => 'application/octet-stream' );
317 return;
318 }
319 }
84fb5b46
MKG
320
321 my $new_body = MIME::Body::InCore->new($string);
322
323 # set up the new entity
324 $head->mime_attr( "content-type" => 'text/plain' )
325 unless ( $head->mime_attr("content-type") );
326 $head->mime_attr( "content-type.charset" => $enc );
327 $entity->bodyhandle($new_body);
328 }
329}
330
331# NOTES: Why Encode::_utf8_off before Encode::from_to
332#
333# All the strings in RT are utf-8 now. Quotes from Encode POD:
334#
335# [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
336# ... The data in $octets must be encoded as octets and not as
337# characters in Perl's internal format. ...
338#
339# Not turning off the UTF-8 flag in the string will prevent the string
340# from conversion.
341
342
343
344=head2 DecodeMIMEWordsToUTF8 $raw
345
346An utility method which mimics MIME::Words::decode_mimewords, but only
347limited functionality. This function returns an utf-8 string.
348
349It returns the decoded string, or the original string if it's not
350encoded. Since the subroutine converts specified string into utf-8
351charset, it should not alter a subject written in English.
352
353Why not use MIME::Words directly? Because it fails in RT when I
354tried. Maybe it's ok now.
355
356=cut
357
358sub DecodeMIMEWordsToUTF8 {
359 my $str = shift;
360 return DecodeMIMEWordsToEncoding($str, 'utf-8', @_);
361}
362
363sub DecodeMIMEWordsToEncoding {
364 my $str = shift;
365 my $to_charset = _CanonicalizeCharset(shift);
366 my $field = shift || '';
af59614d
MKG
367 $RT::Logger->warning(
368 "DecodeMIMEWordsToEncoding was called without field name."
369 ."It's known to cause troubles with decoding fields properly."
370 ) unless $field;
371
372 # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
373 # We _should_ be preserving them encoded until after parsing is completed and
374 # THEN undo the mime-encoding.
375 #
376 # This routine should be translating the existing mimeencoding to utf8 but leaving
377 # things encoded.
378 #
379 # It's legal for headers to contain mime-encoded commas and semicolons which
380 # should not be treated as address separators. (Encoding == quoting here)
381 #
382 # until this is fixed, we must escape any string containing a comma or semicolon
383 # this is only a bandaid
384
385 # Some _other_ MUAs encode quotes _already_, and double quotes
386 # confuse us a lot, so only quote it if it isn't quoted
387 # already.
84fb5b46 388
b5747ff2
MKG
389 # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
390 # continuations, and similar syntax from RFC 2231
af59614d 391 if ($field =~ /^Content-/i) {
b5747ff2
MKG
392 # This concatenates continued parameters and normalizes encoded params
393 # to QB encoded-words which we handle below
af59614d
MKG
394 my $params = MIME::Field::ParamVal->parse_params($str);
395 foreach my $v ( values %$params ) {
396 $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
397 # de-quote in case those were hidden inside encoded part
398 $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
399 }
400 $str = bless({}, 'MIME::Field::ParamVal')->set($params)->stringify;
401 }
402 elsif ( $field =~ /^(?:Resent-)?(?:To|From|B?Cc|Sender|Reply-To)$/i ) {
403 my @addresses = RT::EmailParser->ParseEmailAddress( $str );
404 foreach my $address ( @addresses ) {
405 foreach my $field (qw(phrase comment)) {
406 my $v = $address->$field() or next;
407 $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
408 if ( $field eq 'phrase' ) {
409 # de-quote in case quoted value were hidden inside encoded part
410 $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
411 }
412 $address->$field($v);
413 }
414 }
415 $str = join ', ', map $_->format, @addresses;
416 }
417 else {
418 $str = _DecodeMIMEWordsToEncoding( $str, $to_charset );
b5747ff2
MKG
419 }
420
af59614d
MKG
421
422 # We might have \n without trailing whitespace, which will result in
423 # invalid headers.
424 $str =~ s/\n//g;
425
426 return ($str)
427}
428
429sub _DecodeMIMEWordsToEncoding {
430 my $str = shift;
431 my $to_charset = shift;
432
403d7b0b
MKG
433 # Pre-parse by removing all whitespace between encoded words
434 my $encoded_word = qr/
435 =\? # =?
436 ([^?]+?) # charset
437 (?:\*[^?]+)? # optional '*language'
438 \? # ?
439 ([QqBb]) # encoding
440 \? # ?
441 ([^?]+) # encoded string
442 \?= # ?=
443 /x;
444 $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g;
445
446 # Also merge quoted-printable sections together, in case multiple
447 # octets of a single encoded character were split between chunks.
448 # Though not valid according to RFC 2047, this has been seen in the
449 # wild.
450 1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i;
451
b5747ff2
MKG
452 # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
453 # custom MIME word decoding and charset canonicalization. We can't do this
454 # until we parse before decode, instead of the other way around.
455 my @list = $str =~ m/(.*?) # prefix
403d7b0b 456 $encoded_word
b5747ff2
MKG
457 ([^=]*) # trailing
458 /xgcs;
af59614d
MKG
459 return $str unless @list;
460
461 # add everything that hasn't matched to the end of the latest
462 # string in array this happen when we have 'key="=?encoded?="; key="plain"'
463 $list[-1] .= substr($str, pos $str);
464
465 $str = '';
466 while (@list) {
467 my ($prefix, $charset, $encoding, $enc_str, $trailing) =
468 splice @list, 0, 5;
469 $charset = _CanonicalizeCharset($charset);
470 $encoding = lc $encoding;
471
472 $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
473
474 if ( $encoding eq 'q' ) {
475 use MIME::QuotedPrint;
476 $enc_str =~ tr/_/ /; # Observed from Outlook Express
477 $enc_str = decode_qp($enc_str);
478 } elsif ( $encoding eq 'b' ) {
479 use MIME::Base64;
480 $enc_str = decode_base64($enc_str);
481 } else {
482 $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
483 ."only Q(uoted-printable) and B(ase64) are supported");
484 }
84fb5b46 485
af59614d
MKG
486 # now we have got a decoded subject, try to convert into the encoding
487 if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) {
488 if ( Encode::find_encoding($charset) ) {
489 Encode::from_to( $enc_str, $charset, $to_charset );
84fb5b46 490 } else {
af59614d
MKG
491 $RT::Logger->warning("Charset '$charset' is not supported");
492 $enc_str =~ s/[^[:print:]]/\357\277\275/g;
493 Encode::from_to( $enc_str, 'UTF-8', $to_charset )
494 unless $to_charset eq 'utf-8';
84fb5b46 495 }
84fb5b46 496 }
af59614d 497 $str .= $prefix . $enc_str . $trailing;
84fb5b46
MKG
498 }
499
84fb5b46
MKG
500 return ($str)
501}
502
503
84fb5b46
MKG
504=head2 _FindOrGuessCharset MIME::Entity, $head_only
505
506When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
507
508If $head_only is true, only guesses charset for head parts. This is because header's encoding (e.g. filename="...") may be different from that of body's.
509
510=cut
511
512sub _FindOrGuessCharset {
513 my $entity = shift;
514 my $head_only = shift;
515 my $head = $entity->head;
516
517 if ( my $charset = $head->mime_attr("content-type.charset") ) {
518 return _CanonicalizeCharset($charset);
519 }
520
521 if ( !$head_only and $head->mime_type =~ m{^text/} ) {
522 my $body = $entity->bodyhandle or return;
523 return _GuessCharset( $body->as_string );
524 }
525 else {
526
527 # potentially binary data -- don't guess the body
528 return _GuessCharset( $head->as_string );
529 }
530}
531
532
533
534=head2 _GuessCharset STRING
535
536use Encode::Guess to try to figure it out the string's encoding.
537
538=cut
539
540use constant HAS_ENCODE_GUESS => do { local $@; eval { require Encode::Guess; 1 } };
541use constant HAS_ENCODE_DETECT => do { local $@; eval { require Encode::Detect::Detector; 1 } };
542
543sub _GuessCharset {
544 my $fallback = _CanonicalizeCharset('iso-8859-1');
545
546 # if $_[0] is null/empty, we don't guess its encoding
547 return $fallback
548 unless defined $_[0] && length $_[0];
549
550 my @encodings = RT->Config->Get('EmailInputEncodings');
551 unless ( @encodings ) {
552 $RT::Logger->warning("No EmailInputEncodings set, fallback to $fallback");
553 return $fallback;
554 }
555
556 if ( $encodings[0] eq '*' ) {
557 shift @encodings;
558 if ( HAS_ENCODE_DETECT ) {
559 my $charset = Encode::Detect::Detector::detect( $_[0] );
560 if ( $charset ) {
561 $RT::Logger->debug("Encode::Detect::Detector guessed encoding: $charset");
562 return _CanonicalizeCharset( Encode::resolve_alias( $charset ) );
563 }
564 else {
565 $RT::Logger->debug("Encode::Detect::Detector failed to guess encoding");
566 }
567 }
568 else {
af59614d 569 $RT::Logger->error(
84fb5b46
MKG
570 "You requested to guess encoding, but we couldn't"
571 ." load Encode::Detect::Detector module"
572 );
573 }
574 }
575
576 unless ( @encodings ) {
577 $RT::Logger->warning("No EmailInputEncodings set except '*', fallback to $fallback");
578 return $fallback;
579 }
580
581 unless ( HAS_ENCODE_GUESS ) {
582 $RT::Logger->error("We couldn't load Encode::Guess module, fallback to $fallback");
583 return $fallback;
584 }
585
586 Encode::Guess->set_suspects( @encodings );
587 my $decoder = Encode::Guess->guess( $_[0] );
588 unless ( defined $decoder ) {
589 $RT::Logger->warning("Encode::Guess failed: decoder is undefined; fallback to $fallback");
590 return $fallback;
591 }
592
593 if ( ref $decoder ) {
594 my $charset = $decoder->name;
595 $RT::Logger->debug("Encode::Guess guessed encoding: $charset");
596 return _CanonicalizeCharset( $charset );
597 }
598 elsif ($decoder =~ /(\S+ or .+)/) {
599 my %matched = map { $_ => 1 } split(/ or /, $1);
600 return 'utf-8' if $matched{'utf8'}; # one and only normalization
601
602 foreach my $suspect (RT->Config->Get('EmailInputEncodings')) {
603 next unless $matched{$suspect};
604 $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
605 return _CanonicalizeCharset( $suspect );
606 }
607 }
608 else {
609 $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
610 }
611
612 return $fallback;
613}
614
615=head2 _CanonicalizeCharset NAME
616
617canonicalize charset, return lowercase version.
618special cases are: gb2312 => gbk, utf8 => utf-8
619
620=cut
621
622sub _CanonicalizeCharset {
623 my $charset = lc shift;
624 return $charset unless $charset;
625
626 # Canonicalize aliases if they're known
627 if (my $canonical = Encode::resolve_alias($charset)) {
628 $charset = $canonical;
629 }
630
631 if ( $charset eq 'utf8' || $charset eq 'utf-8-strict' ) {
632 return 'utf-8';
633 }
634 elsif ( $charset eq 'euc-cn' ) {
635 # gbk is superset of gb2312/euc-cn so it's safe
636 return 'gbk';
637 # XXX TODO: gb18030 is an even larger, more permissive superset of gbk,
638 # but needs Encode::HanExtra installed
639 }
640 else {
641 return $charset;
642 }
643}
644
645
af59614d 646=head2 SetMIMEHeadToEncoding MIMEHead => HEAD, From => OLD_ENCODING, To => NEW_Encoding, PreserveWords => BOOL, IsOut => BOOL
84fb5b46
MKG
647
648Converts a MIME Head from one encoding to another. This totally violates the RFC.
649We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
650all the time
651
652
653=cut
654
655sub SetMIMEHeadToEncoding {
af59614d
MKG
656 my ( $head, $charset, $enc, $preserve_words, $is_out );
657
658 if ( @_ <= 4 ) {
659 ( $head, $charset, $enc, $preserve_words ) = @_;
660 }
661 else {
662 my %args = (
663 Head => undef,
664 From => undef,
665 To => undef,
666 PreserveWords => undef,
667 IsOut => undef,
668 @_,
669 );
670
671 $head = $args{Head};
672 $charset = $args{From};
673 $enc = $args{To};
674 $preserve_words = $args{PreserveWords};
675 $is_out = $args{IsOut};
676 }
677
678 unless ( $head && $charset && $enc ) {
679 RT->Logger->error(
680 "Missing Head or From or To arguments");
681 return;
682 }
84fb5b46
MKG
683
684 $charset = _CanonicalizeCharset($charset);
685 $enc = _CanonicalizeCharset($enc);
686
687 return if $charset eq $enc and $preserve_words;
688
689 foreach my $tag ( $head->tags ) {
690 next unless $tag; # seen in wild: headers with no name
691 my @values = $head->get_all($tag);
692 $head->delete($tag);
693 foreach my $value (@values) {
dab09ea8 694 if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) {
84fb5b46 695 Encode::_utf8_off($value);
af59614d
MKG
696 my $orig_value = $value;
697 ( my $success, $value ) = EncodeFromToWithCroak( $orig_value, $charset => $enc );
698 if ( !$success ) {
699 my $error = $value;
700 if ($is_out) {
701 $value = $orig_value;
702 $head->add( $tag, $value );
703 next;
704 }
705
706 my $guess = _GuessCharset($orig_value);
707 if ( $guess && $guess ne $charset ) {
708 $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
709 ( $success, $value ) = EncodeFromToWithCroak( $orig_value, $guess, $enc );
710 $error = $value unless $success;
711 }
712
713 if ( !$success ) {
714 $RT::Logger->error( "Encoding error: " . $error . " forcing conversion to $charset => $enc" );
715 $value = $orig_value;
716 Encode::from_to( $value, $charset => $enc );
717 }
718 }
84fb5b46 719 }
af59614d 720
84fb5b46
MKG
721 $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
722 unless $preserve_words;
723
724 # We intentionally add a leading space when re-adding the
725 # header; Mail::Header strips it before storing, but it
726 # serves to prevent it from "helpfully" canonicalizing
727 # $head->add("Subject", "Subject: foo") into the same as
728 # $head->add("Subject", "foo");
729 $head->add( $tag, " " . $value );
730 }
731 }
732
733}
734
af59614d
MKG
735=head2 EncodeFromToWithCroak $string, $from, $to
736
737Try to encode string from encoding $from to encoding $to in croak mode
738
739return (1, $encoded_string) if success, otherwise (0, $error)
740
741=cut
742
743sub EncodeFromToWithCroak {
744 my $string = shift;
745 my $from = shift;
746 my $to = shift;
747
748 eval { Encode::from_to( $string, $from => $to, Encode::FB_CROAK ); };
749 return $@ ? ( 0, $@ ) : ( 1, $string );
750}
751
84fb5b46
MKG
752RT::Base->_ImportOverlays();
753
7541; # End of module.
755