Upgrade to 4.2.8
[usit-rt.git] / lib / RT / I18N.pm
CommitLineData
84fb5b46
MKG
1# BEGIN BPS TAGGED BLOCK {{{
2#
3# COPYRIGHT:
4#
3ffc5f4f 5# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC
84fb5b46
MKG
6# <sales@bestpractical.com>
7#
8# (Except where explicitly superseded by other copyright notices)
9#
10#
11# LICENSE:
12#
13# This work is made available to you under the terms of Version 2 of
14# the GNU General Public License. A copy of that license should have
15# been provided with this software, but in any event can be snarfed
16# from www.gnu.org.
17#
18# This work is distributed in the hope that it will be useful, but
19# WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21# General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26# 02110-1301 or visit their web page on the internet at
27# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
28#
29#
30# CONTRIBUTION SUBMISSION POLICY:
31#
32# (The following paragraph is not intended to limit the rights granted
33# to you to modify and distribute this software under the terms of
34# the GNU General Public License and is only of importance to you if
35# you choose to contribute your changes and enhancements to the
36# community by submitting them to Best Practical Solutions, LLC.)
37#
38# By intentionally submitting any modifications, corrections or
39# derivatives to this work, or any other work intended for use with
40# Request Tracker, to Best Practical Solutions, LLC, you confirm that
41# you are the copyright holder for those contributions and you grant
42# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
43# royalty-free, perpetual, license to use, copy, create derivative
44# works based on those contributions, and sublicense and distribute
45# those contributions and any derivatives thereof.
46#
47# END BPS TAGGED BLOCK }}}
48
49=head1 NAME
50
51RT::I18N - a base class for localization of RT
52
53=cut
54
55package RT::I18N;
56
57use strict;
58use warnings;
59
60
61use Locale::Maketext 1.04;
62use Locale::Maketext::Lexicon 0.25;
63use base 'Locale::Maketext::Fuzzy';
64
84fb5b46
MKG
65use MIME::Entity;
66use MIME::Head;
67use File::Glob;
68
69# I decree that this project's first language is English.
70
71our %Lexicon = (
72 'TEST_STRING' => 'Concrete Mixer',
73
74 '__Content-Type' => 'text/plain; charset=utf-8',
75
76 '_AUTO' => 1,
77 # That means that lookup failures can't happen -- if we get as far
78 # as looking for something in this lexicon, and we don't find it,
79 # then automagically set $Lexicon{$key} = $key, before possibly
80 # compiling it.
81
82 # The exception is keys that start with "_" -- they aren't auto-makeable.
83
84);
85# End of lexicon.
86
87=head2 Init
88
89Initializes the lexicons used for localization.
90
91
92=cut
93
94sub Init {
95
96 my @lang = RT->Config->Get('LexiconLanguages');
97 @lang = ('*') unless @lang;
98
99 # load default functions
100 require substr(__FILE__, 0, -3) . '/i_default.pm';
101
102 # Load language-specific functions
103 foreach my $file ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm") ) {
104 unless ( $file =~ /^([-\w\s\.\/\\~:]+)$/ ) {
105 warn("$file is tainted. not loading");
106 next;
107 }
108 $file = $1;
109
110 my ($lang) = ($file =~ /([^\\\/]+?)\.pm$/);
111 next unless grep $_ eq '*' || $_ eq $lang, @lang;
112 require $file;
113 }
114
115 my %import;
116 foreach my $l ( @lang ) {
117 $import{$l} = [
118 Gettext => $RT::LexiconPath."/$l.po",
119 ];
120 push @{ $import{$l} }, map {(Gettext => "$_/$l.po")} RT->PluginDirs('po');
121 push @{ $import{$l} }, (Gettext => $RT::LocalLexiconPath."/*/$l.po",
122 Gettext => $RT::LocalLexiconPath."/$l.po");
123 }
124
125 # Acquire all .po files and iterate them into lexicons
126 Locale::Maketext::Lexicon->import({ _decode => 1, %import });
127
128 return 1;
129}
130
131sub LoadLexicons {
132
133 no strict 'refs';
134 foreach my $k (keys %{RT::I18N::} ) {
135 next if $k eq 'main::';
136 next unless index($k, '::', -2) >= 0;
137 next unless exists ${ 'RT::I18N::'. $k }{'Lexicon'};
138
139 my $lex = *{ ${'RT::I18N::'. $k }{'Lexicon'} }{HASH};
140 # run fetch to force load
141 my $tmp = $lex->{'foo'};
142 # XXX: untie may fail with "untie attempted
143 # while 1 inner references still exist"
144 # TODO: untie that has to lower fetch impact
145 # untie %$lex if tied %$lex;
146 }
147}
148
149=head2 encoding
150
151Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
152If it can't find anything, it returns 'ISO-8859-1'
153
154
155
156=cut
157
158
159sub encoding { 'utf-8' }
160
161
162=head2 SetMIMEEntityToUTF8 $entity
163
164An utility function which will try to convert entity body into utf8.
165It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
166
167=cut
168
169sub SetMIMEEntityToUTF8 {
170 RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
171}
172
173
174
175=head2 IsTextualContentType $type
176
177An utility function that determines whether $type is I<textual>, meaning
178that it can sensibly be converted to Unicode text.
179
180Currently, it returns true iff $type matches this regular expression
181(case-insensitively):
182
183 ^(?:text/(?:plain|html)|message/rfc822)\b
184
185
186=cut
187
188sub IsTextualContentType {
189 my $type = shift;
190 ($type =~ m{^(?:text/(?:plain|html)|message/rfc822)\b}i) ? 1 : 0;
191}
192
193
3ffc5f4f 194=head2 SetMIMEEntityToEncoding Entity => ENTITY, Encoding => ENCODING, PreserveWords => BOOL, IsOut => BOOL
84fb5b46
MKG
195
196An utility function which will try to convert entity body into specified
197charset encoding (encoded as octets, *not* unicode-strings). It will
198iterate all the entities in $entity, and try to convert each one into
199specified charset if whose Content-Type is 'text/plain'.
200
3ffc5f4f
MKG
201If PreserveWords is true, values in mime head will be decoded.(default is false)
202
203Incoming and outgoing mails are handled differently, if IsOut is true(default
204is false), it'll be treated as outgoing mail, otherwise incomding mail:
205
206incoming mail:
2071) find encoding
2082) if found then try to convert to utf-8 in croak mode, return if success
2093) guess encoding
2104) if guessed differently then try to convert to utf-8 in croak mode, return
211 if success
2125) mark part as application/octet-stream instead of falling back to any
213 encoding
214
215outgoing mail:
2161) find encoding
2172) if didn't find then do nothing, send as is, let MUA deal with it
2183) if found then try to convert it to outgoing encoding in croak mode, return
219 if success
2204) do nothing otherwise, keep original encoding
221
84fb5b46
MKG
222This function doesn't return anything meaningful.
223
224=cut
225
226sub SetMIMEEntityToEncoding {
3ffc5f4f
MKG
227 my ( $entity, $enc, $preserve_words, $is_out );
228
229 if ( @_ <= 3 ) {
230 ( $entity, $enc, $preserve_words ) = @_;
231 }
232 else {
233 my %args = (
234 Entity => undef,
235 Encoding => undef,
236 PreserveWords => undef,
237 IsOut => undef,
238 @_,
239 );
240
241 $entity = $args{Entity};
242 $enc = $args{Encoding};
243 $preserve_words = $args{PreserveWords};
244 $is_out = $args{IsOut};
245 }
246
247 unless ( $entity && $enc ) {
248 RT->Logger->error("Missing Entity or Encoding arguments");
249 return;
250 }
84fb5b46
MKG
251
252 # do the same for parts first of all
3ffc5f4f
MKG
253 SetMIMEEntityToEncoding(
254 Entity => $_,
255 Encoding => $enc,
256 PreserveWords => $preserve_words,
257 IsOut => $is_out,
258 ) foreach $entity->parts;
84fb5b46 259
403d7b0b
MKG
260 my $head = $entity->head;
261
262 my $charset = _FindOrGuessCharset($entity);
263 if ( $charset ) {
264 unless( Encode::find_encoding($charset) ) {
265 $RT::Logger->warning("Encoding '$charset' is not supported");
266 $charset = undef;
267 }
268 }
269 unless ( $charset ) {
270 $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') );
271 $head->mime_attr('Content-Type' => 'application/octet-stream');
272 return;
273 }
84fb5b46
MKG
274
275 SetMIMEHeadToEncoding(
3ffc5f4f
MKG
276 Head => $head,
277 From => _FindOrGuessCharset( $entity, 1 ),
278 To => $enc,
279 PreserveWords => $preserve_words,
280 IsOut => $is_out,
84fb5b46
MKG
281 );
282
84fb5b46 283 # If this is a textual entity, we'd need to preserve its original encoding
3ffc5f4f
MKG
284 $head->replace( "X-RT-Original-Encoding" => Encode::encode( "UTF-8", $charset ) )
285 if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
84fb5b46
MKG
286
287 return unless IsTextualContentType($head->mime_type);
288
289 my $body = $entity->bodyhandle;
290
dab09ea8 291 if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) {
84fb5b46 292 my $string = $body->as_string or return;
3ffc5f4f 293 RT::Util::assert_bytes($string);
84fb5b46
MKG
294
295 $RT::Logger->debug( "Converting '$charset' to '$enc' for "
296 . $head->mime_type . " - "
3ffc5f4f
MKG
297 . ( Encode::decode("UTF-8",$head->get('subject')) || 'Subjectless message' ) );
298
299 my $orig_string = $string;
300 ( my $success, $string ) = EncodeFromToWithCroak( $orig_string, $charset => $enc );
301 if ( !$success ) {
302 return if $is_out;
303 my $error = $string;
304
305 my $guess = _GuessCharset($orig_string);
306 if ( $guess && $guess ne $charset ) {
307 $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
308 ( $success, $string ) = EncodeFromToWithCroak( $orig_string, $guess, $enc );
309 $error = $string unless $success;
310 }
84fb5b46 311
3ffc5f4f
MKG
312 if ( !$success ) {
313 $RT::Logger->error( "Encoding error: " . $error . " falling back to application/octet-stream" );
314 $head->mime_attr( "content-type" => 'application/octet-stream' );
315 return;
316 }
317 }
84fb5b46
MKG
318
319 my $new_body = MIME::Body::InCore->new($string);
320
321 # set up the new entity
322 $head->mime_attr( "content-type" => 'text/plain' )
323 unless ( $head->mime_attr("content-type") );
324 $head->mime_attr( "content-type.charset" => $enc );
325 $entity->bodyhandle($new_body);
326 }
327}
328
84fb5b46
MKG
329=head2 DecodeMIMEWordsToUTF8 $raw
330
331An utility method which mimics MIME::Words::decode_mimewords, but only
3ffc5f4f
MKG
332limited functionality. Despite its name, this function returns the
333bytes of the string, in UTF-8.
84fb5b46
MKG
334
335=cut
336
337sub DecodeMIMEWordsToUTF8 {
338 my $str = shift;
339 return DecodeMIMEWordsToEncoding($str, 'utf-8', @_);
340}
341
342sub DecodeMIMEWordsToEncoding {
343 my $str = shift;
344 my $to_charset = _CanonicalizeCharset(shift);
345 my $field = shift || '';
3ffc5f4f
MKG
346 $RT::Logger->warning(
347 "DecodeMIMEWordsToEncoding was called without field name."
348 ."It's known to cause troubles with decoding fields properly."
349 ) unless $field;
350
351 # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
352 # We _should_ be preserving them encoded until after parsing is completed and
353 # THEN undo the mime-encoding.
354 #
355 # This routine should be translating the existing mimeencoding to utf8 but leaving
356 # things encoded.
357 #
358 # It's legal for headers to contain mime-encoded commas and semicolons which
359 # should not be treated as address separators. (Encoding == quoting here)
360 #
361 # until this is fixed, we must escape any string containing a comma or semicolon
362 # this is only a bandaid
363
364 # Some _other_ MUAs encode quotes _already_, and double quotes
365 # confuse us a lot, so only quote it if it isn't quoted
366 # already.
84fb5b46 367
b5747ff2
MKG
368 # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
369 # continuations, and similar syntax from RFC 2231
3ffc5f4f 370 if ($field =~ /^Content-/i) {
b5747ff2
MKG
371 # This concatenates continued parameters and normalizes encoded params
372 # to QB encoded-words which we handle below
3ffc5f4f
MKG
373 my $params = MIME::Field::ParamVal->parse_params($str);
374 foreach my $v ( values %$params ) {
375 $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
376 # de-quote in case those were hidden inside encoded part
377 $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
378 }
379 $str = bless({}, 'MIME::Field::ParamVal')->set($params)->stringify;
380 }
381 elsif ( $field =~ /^(?:Resent-)?(?:To|From|B?Cc|Sender|Reply-To)$/i ) {
382 my @addresses = RT::EmailParser->ParseEmailAddress( $str );
383 foreach my $address ( @addresses ) {
384 foreach my $field (qw(phrase comment)) {
385 my $v = $address->$field() or next;
386 $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
387 if ( $field eq 'phrase' ) {
388 # de-quote in case quoted value were hidden inside encoded part
389 $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
390 }
391 $address->$field($v);
392 }
393 }
394 $str = join ', ', map $_->format, @addresses;
b5747ff2 395 }
3ffc5f4f
MKG
396 else {
397 $str = _DecodeMIMEWordsToEncoding( $str, $to_charset );
398 }
399
400
401 # We might have \n without trailing whitespace, which will result in
402 # invalid headers.
403 $str =~ s/\n//g;
404
405 return ($str)
406}
407
408sub _DecodeMIMEWordsToEncoding {
409 my $str = shift;
410 my $to_charset = shift;
b5747ff2 411
403d7b0b
MKG
412 # Pre-parse by removing all whitespace between encoded words
413 my $encoded_word = qr/
414 =\? # =?
415 ([^?]+?) # charset
416 (?:\*[^?]+)? # optional '*language'
417 \? # ?
418 ([QqBb]) # encoding
419 \? # ?
420 ([^?]+) # encoded string
421 \?= # ?=
422 /x;
423 $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g;
424
425 # Also merge quoted-printable sections together, in case multiple
426 # octets of a single encoded character were split between chunks.
427 # Though not valid according to RFC 2047, this has been seen in the
428 # wild.
429 1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i;
430
b5747ff2
MKG
431 # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
432 # custom MIME word decoding and charset canonicalization. We can't do this
433 # until we parse before decode, instead of the other way around.
434 my @list = $str =~ m/(.*?) # prefix
403d7b0b 435 $encoded_word
b5747ff2
MKG
436 ([^=]*) # trailing
437 /xgcs;
3ffc5f4f
MKG
438 return $str unless @list;
439
440 # add everything that hasn't matched to the end of the latest
441 # string in array this happen when we have 'key="=?encoded?="; key="plain"'
442 $list[-1] .= substr($str, pos $str);
443
444 $str = '';
445 while (@list) {
446 my ($prefix, $charset, $encoding, $enc_str, $trailing) =
447 splice @list, 0, 5;
448 $charset = _CanonicalizeCharset($charset);
449 $encoding = lc $encoding;
450
451 $trailing =~ s/\s?\t?$//; # Observed from Outlook Express
452
453 if ( $encoding eq 'q' ) {
454 use MIME::QuotedPrint;
455 $enc_str =~ tr/_/ /; # Observed from Outlook Express
456 $enc_str = decode_qp($enc_str);
457 } elsif ( $encoding eq 'b' ) {
458 use MIME::Base64;
459 $enc_str = decode_base64($enc_str);
460 } else {
461 $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
462 ."only Q(uoted-printable) and B(ase64) are supported");
463 }
84fb5b46 464
3ffc5f4f
MKG
465 # now we have got a decoded subject, try to convert into the encoding
466 if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) {
467 if ( Encode::find_encoding($charset) ) {
468 Encode::from_to( $enc_str, $charset, $to_charset );
84fb5b46 469 } else {
3ffc5f4f
MKG
470 $RT::Logger->warning("Charset '$charset' is not supported");
471 $enc_str =~ s/[^[:print:]]/\357\277\275/g;
472 Encode::from_to( $enc_str, 'UTF-8', $to_charset )
473 unless $to_charset eq 'utf-8';
84fb5b46 474 }
84fb5b46 475 }
3ffc5f4f 476 $str .= $prefix . $enc_str . $trailing;
84fb5b46
MKG
477 }
478
84fb5b46
MKG
479 return ($str)
480}
481
482
84fb5b46
MKG
483=head2 _FindOrGuessCharset MIME::Entity, $head_only
484
485When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
486
487If $head_only is true, only guesses charset for head parts. This is because header's encoding (e.g. filename="...") may be different from that of body's.
488
489=cut
490
491sub _FindOrGuessCharset {
492 my $entity = shift;
493 my $head_only = shift;
494 my $head = $entity->head;
495
496 if ( my $charset = $head->mime_attr("content-type.charset") ) {
497 return _CanonicalizeCharset($charset);
498 }
499
500 if ( !$head_only and $head->mime_type =~ m{^text/} ) {
501 my $body = $entity->bodyhandle or return;
502 return _GuessCharset( $body->as_string );
503 }
504 else {
505
506 # potentially binary data -- don't guess the body
507 return _GuessCharset( $head->as_string );
508 }
509}
510
511
512
513=head2 _GuessCharset STRING
514
515use Encode::Guess to try to figure it out the string's encoding.
516
517=cut
518
3ffc5f4f
MKG
519use constant HAS_ENCODE_GUESS => Encode::Guess->require;
520use constant HAS_ENCODE_DETECT => Encode::Detect::Detector->require;
84fb5b46
MKG
521
522sub _GuessCharset {
523 my $fallback = _CanonicalizeCharset('iso-8859-1');
524
525 # if $_[0] is null/empty, we don't guess its encoding
526 return $fallback
527 unless defined $_[0] && length $_[0];
528
529 my @encodings = RT->Config->Get('EmailInputEncodings');
530 unless ( @encodings ) {
531 $RT::Logger->warning("No EmailInputEncodings set, fallback to $fallback");
532 return $fallback;
533 }
534
535 if ( $encodings[0] eq '*' ) {
536 shift @encodings;
537 if ( HAS_ENCODE_DETECT ) {
538 my $charset = Encode::Detect::Detector::detect( $_[0] );
539 if ( $charset ) {
540 $RT::Logger->debug("Encode::Detect::Detector guessed encoding: $charset");
541 return _CanonicalizeCharset( Encode::resolve_alias( $charset ) );
542 }
543 else {
544 $RT::Logger->debug("Encode::Detect::Detector failed to guess encoding");
545 }
546 }
547 else {
3ffc5f4f 548 $RT::Logger->error(
84fb5b46
MKG
549 "You requested to guess encoding, but we couldn't"
550 ." load Encode::Detect::Detector module"
551 );
552 }
553 }
554
555 unless ( @encodings ) {
556 $RT::Logger->warning("No EmailInputEncodings set except '*', fallback to $fallback");
557 return $fallback;
558 }
559
560 unless ( HAS_ENCODE_GUESS ) {
561 $RT::Logger->error("We couldn't load Encode::Guess module, fallback to $fallback");
562 return $fallback;
563 }
564
565 Encode::Guess->set_suspects( @encodings );
566 my $decoder = Encode::Guess->guess( $_[0] );
567 unless ( defined $decoder ) {
568 $RT::Logger->warning("Encode::Guess failed: decoder is undefined; fallback to $fallback");
569 return $fallback;
570 }
571
572 if ( ref $decoder ) {
573 my $charset = $decoder->name;
574 $RT::Logger->debug("Encode::Guess guessed encoding: $charset");
575 return _CanonicalizeCharset( $charset );
576 }
577 elsif ($decoder =~ /(\S+ or .+)/) {
578 my %matched = map { $_ => 1 } split(/ or /, $1);
579 return 'utf-8' if $matched{'utf8'}; # one and only normalization
580
581 foreach my $suspect (RT->Config->Get('EmailInputEncodings')) {
582 next unless $matched{$suspect};
583 $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
584 return _CanonicalizeCharset( $suspect );
585 }
586 }
587 else {
588 $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
589 }
590
591 return $fallback;
592}
593
594=head2 _CanonicalizeCharset NAME
595
596canonicalize charset, return lowercase version.
597special cases are: gb2312 => gbk, utf8 => utf-8
598
599=cut
600
601sub _CanonicalizeCharset {
602 my $charset = lc shift;
603 return $charset unless $charset;
604
605 # Canonicalize aliases if they're known
606 if (my $canonical = Encode::resolve_alias($charset)) {
607 $charset = $canonical;
608 }
609
610 if ( $charset eq 'utf8' || $charset eq 'utf-8-strict' ) {
611 return 'utf-8';
612 }
613 elsif ( $charset eq 'euc-cn' ) {
614 # gbk is superset of gb2312/euc-cn so it's safe
615 return 'gbk';
3ffc5f4f
MKG
616 }
617 elsif ( $charset =~ /^(?:(?:big5(-1984|-2003|ext|plus))|cccii|unisys|euc-tw|gb18030|(?:cns11643-\d+))$/ ) {
618 unless ( Encode::HanExtra->require ) {
619 RT->Logger->error("Please install Encode::HanExtra to handle $charset");
620 }
621 return $charset;
84fb5b46
MKG
622 }
623 else {
624 return $charset;
625 }
626}
627
628
3ffc5f4f 629=head2 SetMIMEHeadToEncoding MIMEHead => HEAD, From => OLD_ENCODING, To => NEW_Encoding, PreserveWords => BOOL, IsOut => BOOL
84fb5b46
MKG
630
631Converts a MIME Head from one encoding to another. This totally violates the RFC.
632We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
633all the time
634
635
636=cut
637
638sub SetMIMEHeadToEncoding {
3ffc5f4f
MKG
639 my ( $head, $charset, $enc, $preserve_words, $is_out );
640
641 if ( @_ <= 4 ) {
642 ( $head, $charset, $enc, $preserve_words ) = @_;
643 }
644 else {
645 my %args = (
646 Head => undef,
647 From => undef,
648 To => undef,
649 PreserveWords => undef,
650 IsOut => undef,
651 @_,
652 );
653
654 $head = $args{Head};
655 $charset = $args{From};
656 $enc = $args{To};
657 $preserve_words = $args{PreserveWords};
658 $is_out = $args{IsOut};
659 }
660
661 unless ( $head && $charset && $enc ) {
662 RT->Logger->error(
663 "Missing Head or From or To arguments");
664 return;
665 }
84fb5b46
MKG
666
667 $charset = _CanonicalizeCharset($charset);
668 $enc = _CanonicalizeCharset($enc);
669
670 return if $charset eq $enc and $preserve_words;
671
3ffc5f4f 672 RT::Util::assert_bytes( $head->as_string );
84fb5b46
MKG
673 foreach my $tag ( $head->tags ) {
674 next unless $tag; # seen in wild: headers with no name
675 my @values = $head->get_all($tag);
676 $head->delete($tag);
677 foreach my $value (@values) {
dab09ea8 678 if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) {
3ffc5f4f
MKG
679 my $orig_value = $value;
680 ( my $success, $value ) = EncodeFromToWithCroak( $orig_value, $charset => $enc );
681 if ( !$success ) {
682 my $error = $value;
683 if ($is_out) {
684 $value = $orig_value;
685 $head->add( $tag, $value );
686 next;
687 }
688
689 my $guess = _GuessCharset($orig_value);
690 if ( $guess && $guess ne $charset ) {
691 $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
692 ( $success, $value ) = EncodeFromToWithCroak( $orig_value, $guess, $enc );
693 $error = $value unless $success;
694 }
695
696 if ( !$success ) {
697 $RT::Logger->error( "Encoding error: " . $error . " forcing conversion to $charset => $enc" );
698 $value = $orig_value;
699 Encode::from_to( $value, $charset => $enc );
700 }
701 }
84fb5b46 702 }
3ffc5f4f 703
84fb5b46
MKG
704 $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
705 unless $preserve_words;
706
707 # We intentionally add a leading space when re-adding the
708 # header; Mail::Header strips it before storing, but it
709 # serves to prevent it from "helpfully" canonicalizing
710 # $head->add("Subject", "Subject: foo") into the same as
711 # $head->add("Subject", "foo");
712 $head->add( $tag, " " . $value );
713 }
714 }
715
716}
717
3ffc5f4f
MKG
718=head2 EncodeFromToWithCroak $string, $from, $to
719
720Try to encode string from encoding $from to encoding $to in croak mode
721
722return (1, $encoded_string) if success, otherwise (0, $error)
723
724=cut
725
726sub EncodeFromToWithCroak {
727 my $string = shift;
728 my $from = shift;
729 my $to = shift;
730
731 eval { Encode::from_to( $string, $from => $to, Encode::FB_CROAK ); };
732 return $@ ? ( 0, $@ ) : ( 1, $string );
733}
734
84fb5b46
MKG
735RT::Base->_ImportOverlays();
736
7371; # End of module.
738