lib/RT/I18N.pm

   1 # BEGIN BPS TAGGED BLOCK {{{
   2 #
   3 # COPYRIGHT:
   4 #
   5 # This software is Copyright (c) 1996-2013 Best Practical Solutions, LLC
   6 #                                          <sales@bestpractical.com>
   7 #
   8 # (Except where explicitly superseded by other copyright notices)
   9 #
  10 #
  11 # LICENSE:
  12 #
  13 # This work is made available to you under the terms of Version 2 of
  14 # the GNU General Public License. A copy of that license should have
  15 # been provided with this software, but in any event can be snarfed
  16 # from www.gnu.org.
  17 #
  18 # This work is distributed in the hope that it will be useful, but
  19 # WITHOUT ANY WARRANTY; without even the implied warranty of
  20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21 # General Public License for more details.
  22 #
  23 # You should have received a copy of the GNU General Public License
  24 # along with this program; if not, write to the Free Software
  25 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  26 # 02110-1301 or visit their web page on the internet at
  27 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
  28 #
  29 #
  30 # CONTRIBUTION SUBMISSION POLICY:
  31 #
  32 # (The following paragraph is not intended to limit the rights granted
  33 # to you to modify and distribute this software under the terms of
  34 # the GNU General Public License and is only of importance to you if
  35 # you choose to contribute your changes and enhancements to the
  36 # community by submitting them to Best Practical Solutions, LLC.)
  37 #
  38 # By intentionally submitting any modifications, corrections or
  39 # derivatives to this work, or any other work intended for use with
  40 # Request Tracker, to Best Practical Solutions, LLC, you confirm that
  41 # you are the copyright holder for those contributions and you grant
  42 # Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
  43 # royalty-free, perpetual, license to use, copy, create derivative
  44 # works based on those contributions, and sublicense and distribute
  45 # those contributions and any derivatives thereof.
  46 #
  47 # END BPS TAGGED BLOCK }}}
  48
  49 =head1 NAME
  50
  51 RT::I18N - a base class for localization of RT
  52
  53 =cut
  54
  55 package RT::I18N;
  56
  57 use strict;
  58 use warnings;
  59
  60
  61 use Locale::Maketext 1.04;
  62 use Locale::Maketext::Lexicon 0.25;
  63 use base 'Locale::Maketext::Fuzzy';
  64
  65 use Encode;
  66 use MIME::Entity;
  67 use MIME::Head;
  68 use File::Glob;
  69
  70 # I decree that this project's first language is English.
  71
  72 our %Lexicon = (
  73    'TEST_STRING' => 'Concrete Mixer',
  74
  75     '__Content-Type' => 'text/plain; charset=utf-8',
  76
  77   '_AUTO' => 1,
  78   # That means that lookup failures can't happen -- if we get as far
  79   #  as looking for something in this lexicon, and we don't find it,
  80   #  then automagically set $Lexicon{$key} = $key, before possibly
  81   #  compiling it.
  82
  83   # The exception is keys that start with "_" -- they aren't auto-makeable.
  84
  85 );
  86 # End of lexicon.
  87
  88 =head2 Init
  89
  90 Initializes the lexicons used for localization.
  91
  92
  93 =cut
  94
  95 sub Init {
  96
  97     my @lang = RT->Config->Get('LexiconLanguages');
  98     @lang = ('*') unless @lang;
  99
 100     # load default functions
 101     require substr(__FILE__, 0, -3) . '/i_default.pm';
 102
 103     # Load language-specific functions
 104     foreach my $file ( File::Glob::bsd_glob(substr(__FILE__, 0, -3) . "/*.pm") ) {
 105         unless ( $file =~ /^([-\w\s\.\/\\~:]+)$/ ) {
 106             warn("$file is tainted. not loading");
 107             next;
 108         }
 109         $file = $1;
 110
 111         my ($lang) = ($file =~ /([^\\\/]+?)\.pm$/);
 112         next unless grep $_ eq '*' || $_ eq $lang, @lang;
 113         require $file;
 114     }
 115
 116     my %import;
 117     foreach my $l ( @lang ) {
 118         $import{$l} = [
 119             Gettext => $RT::LexiconPath."/$l.po",
 120         ];
 121         push @{ $import{$l} }, map {(Gettext => "$_/$l.po")} RT->PluginDirs('po');
 122         push @{ $import{$l} }, (Gettext => $RT::LocalLexiconPath."/*/$l.po",
 123                                 Gettext => $RT::LocalLexiconPath."/$l.po");
 124     }
 125
 126     # Acquire all .po files and iterate them into lexicons
 127     Locale::Maketext::Lexicon->import({ _decode => 1, %import });
 128
 129     return 1;
 130 }
 131
 132 sub LoadLexicons {
 133
 134     no strict 'refs';
 135     foreach my $k (keys %{RT::I18N::} ) {
 136         next if $k eq 'main::';
 137         next unless index($k, '::', -2) >= 0;
 138         next unless exists ${ 'RT::I18N::'. $k }{'Lexicon'};
 139
 140         my $lex = *{ ${'RT::I18N::'. $k }{'Lexicon'} }{HASH};
 141         # run fetch to force load
 142         my $tmp = $lex->{'foo'};
 143         # XXX: untie may fail with "untie attempted
 144         # while 1 inner references still exist"
 145         # TODO: untie that has to lower fetch impact
 146         # untie %$lex if tied %$lex;
 147     }
 148 }
 149
 150 =head2 encoding
 151
 152 Returns the encoding of the current lexicon, as yanked out of __ContentType's "charset" field.
 153 If it can't find anything, it returns 'ISO-8859-1'
 154
 155
 156
 157 =cut
 158
 159
 160 sub encoding { 'utf-8' }
 161
 162
 163 =head2 SetMIMEEntityToUTF8 $entity
 164
 165 An utility function which will try to convert entity body into utf8.
 166 It's now a wrap-up of SetMIMEEntityToEncoding($entity, 'utf-8').
 167
 168 =cut
 169
 170 sub SetMIMEEntityToUTF8 {
 171     RT::I18N::SetMIMEEntityToEncoding(shift, 'utf-8');
 172 }
 173
 174
 175
 176 =head2 IsTextualContentType $type
 177
 178 An utility function that determines whether $type is I<textual>, meaning
 179 that it can sensibly be converted to Unicode text.
 180
 181 Currently, it returns true iff $type matches this regular expression
 182 (case-insensitively):
 183
 184     ^(?:text/(?:plain|html)|message/rfc822)\b
 185
 186
 187 =cut
 188
 189 sub IsTextualContentType {
 190     my $type = shift;
 191     ($type =~ m{^(?:text/(?:plain|html)|message/rfc822)\b}i) ? 1 : 0;
 192 }
 193
 194
 195 =head2 SetMIMEEntityToEncoding Entity => ENTITY, Encoding => ENCODING, PreserveWords => BOOL, IsOut => BOOL
 196
 197 An utility function which will try to convert entity body into specified
 198 charset encoding (encoded as octets, *not* unicode-strings).  It will
 199 iterate all the entities in $entity, and try to convert each one into
 200 specified charset if whose Content-Type is 'text/plain'.
 201
 202 If PreserveWords is true, values in mime head will be decoded.(default is false)
 203
 204 Incoming and outgoing mails are handled differently, if IsOut is true(default
 205 is false), it'll be treated as outgoing mail, otherwise incomding mail:
 206
 207 incoming mail:
 208 1) find encoding
 209 2) if found then try to convert to utf-8 in croak mode, return if success
 210 3) guess encoding
 211 4) if guessed differently then try to convert to utf-8 in croak mode, return
 212    if success
 213 5) mark part as application/octet-stream instead of falling back to any
 214    encoding
 215
 216 outgoing mail:
 217 1) find encoding
 218 2) if didn't find then do nothing, send as is, let MUA deal with it
 219 3) if found then try to convert it to outgoing encoding in croak mode, return
 220    if success
 221 4) do nothing otherwise, keep original encoding
 222
 223 This function doesn't return anything meaningful.
 224
 225 =cut
 226
 227 sub SetMIMEEntityToEncoding {
 228     my ( $entity, $enc, $preserve_words, $is_out );
 229
 230     if ( @_ <= 3 ) {
 231         ( $entity, $enc, $preserve_words ) = @_;
 232     }
 233     else {
 234         my %args = (
 235             Entity        => undef,
 236             Encoding      => undef,
 237             PreserveWords => undef,
 238             IsOut         => undef,
 239             @_,
 240         );
 241
 242         $entity         = $args{Entity};
 243         $enc            = $args{Encoding};
 244         $preserve_words = $args{PreserveWords};
 245         $is_out         = $args{IsOut};
 246     }
 247
 248     unless ( $entity && $enc ) {
 249         RT->Logger->error("Missing Entity or Encoding arguments");
 250         return;
 251     }
 252
 253     # do the same for parts first of all
 254     SetMIMEEntityToEncoding(
 255         Entity        => $_,
 256         Encoding      => $enc,
 257         PreserveWords => $preserve_words,
 258         IsOut         => $is_out,
 259     ) foreach $entity->parts;
 260
 261     my $head = $entity->head;
 262
 263     my $charset = _FindOrGuessCharset($entity);
 264     if ( $charset ) {
 265         unless( Encode::find_encoding($charset) ) {
 266             $RT::Logger->warning("Encoding '$charset' is not supported");
 267             $charset = undef;
 268         }
 269     }
 270     unless ( $charset ) {
 271         $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') );
 272         $head->mime_attr('Content-Type' => 'application/octet-stream');
 273         return;
 274     }
 275
 276     SetMIMEHeadToEncoding(
 277         Head          => $head,
 278         From          => _FindOrGuessCharset( $entity, 1 ),
 279         To            => $enc,
 280         PreserveWords => $preserve_words,
 281         IsOut         => $is_out,
 282     );
 283
 284     # If this is a textual entity, we'd need to preserve its original encoding
 285     $head->replace( "X-RT-Original-Encoding" => $charset )
 286         if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
 287
 288     return unless IsTextualContentType($head->mime_type);
 289
 290     my $body = $entity->bodyhandle;
 291
 292     if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) {
 293         my $string = $body->as_string or return;
 294
 295         $RT::Logger->debug( "Converting '$charset' to '$enc' for "
 296               . $head->mime_type . " - "
 297               . ( $head->get('subject') || 'Subjectless message' ) );
 298
 299         # NOTE:: see the comments at the end of the sub.
 300         Encode::_utf8_off($string);
 301         my $orig_string = $string;
 302         ( my $success, $string ) = EncodeFromToWithCroak( $orig_string, $charset => $enc );
 303         if ( !$success ) {
 304             return if $is_out;
 305             my $error = $string;
 306
 307             my $guess = _GuessCharset($orig_string);
 308             if ( $guess && $guess ne $charset ) {
 309                 $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
 310                 ( $success, $string ) = EncodeFromToWithCroak( $orig_string, $guess, $enc );
 311                 $error = $string unless $success;
 312             }
 313
 314             if ( !$success ) {
 315                 $RT::Logger->error( "Encoding error: " . $error . " falling back to application/octet-stream" );
 316                 $head->mime_attr( "content-type" => 'application/octet-stream' );
 317                 return;
 318             }
 319         }
 320
 321         my $new_body = MIME::Body::InCore->new($string);
 322
 323         # set up the new entity
 324         $head->mime_attr( "content-type" => 'text/plain' )
 325           unless ( $head->mime_attr("content-type") );
 326         $head->mime_attr( "content-type.charset" => $enc );
 327         $entity->bodyhandle($new_body);
 328     }
 329 }
 330
 331 # NOTES:  Why Encode::_utf8_off before Encode::from_to
 332 #
 333 # All the strings in RT are utf-8 now.  Quotes from Encode POD:
 334 #
 335 # [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
 336 # ... The data in $octets must be encoded as octets and not as
 337 # characters in Perl's internal format. ...
 338 #
 339 # Not turning off the UTF-8 flag in the string will prevent the string
 340 # from conversion.
 341
 342
 343
 344 =head2 DecodeMIMEWordsToUTF8 $raw
 345
 346 An utility method which mimics MIME::Words::decode_mimewords, but only
 347 limited functionality.  This function returns an utf-8 string.
 348
 349 It returns the decoded string, or the original string if it's not
 350 encoded.  Since the subroutine converts specified string into utf-8
 351 charset, it should not alter a subject written in English.
 352
 353 Why not use MIME::Words directly?  Because it fails in RT when I
 354 tried.  Maybe it's ok now.
 355
 356 =cut
 357
 358 sub DecodeMIMEWordsToUTF8 {
 359     my $str = shift;
 360     return DecodeMIMEWordsToEncoding($str, 'utf-8', @_);
 361 }
 362
 363 sub DecodeMIMEWordsToEncoding {
 364     my $str = shift;
 365     my $to_charset = _CanonicalizeCharset(shift);
 366     my $field = shift || '';
 367     $RT::Logger->warning(
 368         "DecodeMIMEWordsToEncoding was called without field name."
 369         ."It's known to cause troubles with decoding fields properly."
 370     ) unless $field;
 371
 372     # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers
 373     # We _should_ be preserving them encoded until after parsing is completed and
 374     # THEN undo the mime-encoding.
 375     #
 376     # This routine should be translating the existing mimeencoding to utf8 but leaving
 377     # things encoded.
 378     #
 379     # It's legal for headers to contain mime-encoded commas and semicolons which
 380     # should not be treated as address separators. (Encoding == quoting here)
 381     #
 382     # until this is fixed, we must escape any string containing a comma or semicolon
 383     # this is only a bandaid
 384
 385     # Some _other_ MUAs encode quotes _already_, and double quotes
 386     # confuse us a lot, so only quote it if it isn't quoted
 387     # already.
 388
 389     # handle filename*=ISO-8859-1''%74%E9%73%74%2E%74%78%74, parameter value
 390     # continuations, and similar syntax from RFC 2231
 391     if ($field =~ /^Content-/i) {
 392         # This concatenates continued parameters and normalizes encoded params
 393         # to QB encoded-words which we handle below
 394         my $params = MIME::Field::ParamVal->parse_params($str);
 395         foreach my $v ( values %$params ) {
 396             $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
 397             # de-quote in case those were hidden inside encoded part
 398             $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
 399         }
 400         $str = bless({}, 'MIME::Field::ParamVal')->set($params)->stringify;
 401     }
 402     elsif ( $field =~ /^(?:Resent-)?(?:To|From|B?Cc|Sender|Reply-To)$/i ) {
 403         my @addresses = RT::EmailParser->ParseEmailAddress( $str );
 404         foreach my $address ( @addresses ) {
 405             foreach my $field (qw(phrase comment)) {
 406                 my $v = $address->$field() or next;
 407                 $v = _DecodeMIMEWordsToEncoding( $v, $to_charset );
 408                 if ( $field eq 'phrase' ) {
 409                     # de-quote in case quoted value were hidden inside encoded part
 410                     $v =~ s/\\(.)/$1/g if $v =~ s/^"(.*)"$/$1/;
 411                 }
 412                 $address->$field($v);
 413             }
 414         }
 415         $str = join ', ', map $_->format, @addresses;
 416     }
 417     else {
 418         $str = _DecodeMIMEWordsToEncoding( $str, $to_charset );
 419     }
 420
 421
 422     # We might have \n without trailing whitespace, which will result in
 423     # invalid headers.
 424     $str =~ s/\n//g;
 425
 426     return ($str)
 427 }
 428
 429 sub _DecodeMIMEWordsToEncoding {
 430     my $str = shift;
 431     my $to_charset = shift;
 432
 433     # Pre-parse by removing all whitespace between encoded words
 434     my $encoded_word = qr/
 435                  =\?            # =?
 436                  ([^?]+?)       # charset
 437                  (?:\*[^?]+)?   # optional '*language'
 438                  \?             # ?
 439                  ([QqBb])       # encoding
 440                  \?             # ?
 441                  ([^?]+)        # encoded string
 442                  \?=            # ?=
 443                  /x;
 444     $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g;
 445
 446     # Also merge quoted-printable sections together, in case multiple
 447     # octets of a single encoded character were split between chunks.
 448     # Though not valid according to RFC 2047, this has been seen in the
 449     # wild.
 450     1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i;
 451
 452     # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our
 453     # custom MIME word decoding and charset canonicalization.  We can't do this
 454     # until we parse before decode, instead of the other way around.
 455     my @list = $str =~ m/(.*?)          # prefix
 456                          $encoded_word
 457                          ([^=]*)        # trailing
 458                         /xgcs;
 459     return $str unless @list;
 460
 461     # add everything that hasn't matched to the end of the latest
 462     # string in array this happen when we have 'key="=?encoded?="; key="plain"'
 463     $list[-1] .= substr($str, pos $str);
 464
 465     $str = '';
 466     while (@list) {
 467         my ($prefix, $charset, $encoding, $enc_str, $trailing) =
 468                 splice @list, 0, 5;
 469         $charset  = _CanonicalizeCharset($charset);
 470         $encoding = lc $encoding;
 471
 472         $trailing =~ s/\s?\t?$//;               # Observed from Outlook Express
 473
 474         if ( $encoding eq 'q' ) {
 475             use MIME::QuotedPrint;
 476             $enc_str =~ tr/_/ /;                # Observed from Outlook Express
 477             $enc_str = decode_qp($enc_str);
 478         } elsif ( $encoding eq 'b' ) {
 479             use MIME::Base64;
 480             $enc_str = decode_base64($enc_str);
 481         } else {
 482             $RT::Logger->warning("Incorrect encoding '$encoding' in '$str', "
 483                 ."only Q(uoted-printable) and B(ase64) are supported");
 484         }
 485
 486         # now we have got a decoded subject, try to convert into the encoding
 487         if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) {
 488             if ( Encode::find_encoding($charset) ) {
 489                 Encode::from_to( $enc_str, $charset, $to_charset );
 490             } else {
 491                 $RT::Logger->warning("Charset '$charset' is not supported");
 492                 $enc_str =~ s/[^[:print:]]/\357\277\275/g;
 493                 Encode::from_to( $enc_str, 'UTF-8', $to_charset )
 494                     unless $to_charset eq 'utf-8';
 495             }
 496         }
 497         $str .= $prefix . $enc_str . $trailing;
 498     }
 499
 500     return ($str)
 501 }
 502
 503
 504 =head2 _FindOrGuessCharset MIME::Entity, $head_only
 505
 506 When handed a MIME::Entity will first attempt to read what charset the message is encoded in. Failing that, will use Encode::Guess to try to figure it out
 507
 508 If $head_only is true, only guesses charset for head parts.  This is because header's encoding (e.g. filename="...") may be different from that of body's.
 509
 510 =cut
 511
 512 sub _FindOrGuessCharset {
 513     my $entity = shift;
 514     my $head_only = shift;
 515     my $head = $entity->head;
 516
 517     if ( my $charset = $head->mime_attr("content-type.charset") ) {
 518         return _CanonicalizeCharset($charset);
 519     }
 520
 521     if ( !$head_only and $head->mime_type =~ m{^text/} ) {
 522         my $body = $entity->bodyhandle or return;
 523         return _GuessCharset( $body->as_string );
 524     }
 525     else {
 526
 527         # potentially binary data -- don't guess the body
 528         return _GuessCharset( $head->as_string );
 529     }
 530 }
 531
 532
 533
 534 =head2 _GuessCharset STRING
 535
 536 use Encode::Guess to try to figure it out the string's encoding.
 537
 538 =cut
 539
 540 use constant HAS_ENCODE_GUESS => do { local $@; eval { require Encode::Guess; 1 } };
 541 use constant HAS_ENCODE_DETECT => do { local $@; eval { require Encode::Detect::Detector; 1 } };
 542
 543 sub _GuessCharset {
 544     my $fallback = _CanonicalizeCharset('iso-8859-1');
 545
 546     # if $_[0] is null/empty, we don't guess its encoding
 547     return $fallback
 548         unless defined $_[0] && length $_[0];
 549
 550     my @encodings = RT->Config->Get('EmailInputEncodings');
 551     unless ( @encodings ) {
 552         $RT::Logger->warning("No EmailInputEncodings set, fallback to $fallback");
 553         return $fallback;
 554     }
 555
 556     if ( $encodings[0] eq '*' ) {
 557         shift @encodings;
 558         if ( HAS_ENCODE_DETECT ) {
 559             my $charset = Encode::Detect::Detector::detect( $_[0] );
 560             if ( $charset ) {
 561                 $RT::Logger->debug("Encode::Detect::Detector guessed encoding: $charset");
 562                 return _CanonicalizeCharset( Encode::resolve_alias( $charset ) );
 563             }
 564             else {
 565                 $RT::Logger->debug("Encode::Detect::Detector failed to guess encoding");
 566             }
 567         }
 568         else {
 569             $RT::Logger->error(
 570                 "You requested to guess encoding, but we couldn't"
 571                 ." load Encode::Detect::Detector module"
 572             );
 573         }
 574     }
 575
 576     unless ( @encodings ) {
 577         $RT::Logger->warning("No EmailInputEncodings set except '*', fallback to $fallback");
 578         return $fallback;
 579     }
 580
 581     unless ( HAS_ENCODE_GUESS ) {
 582         $RT::Logger->error("We couldn't load Encode::Guess module, fallback to $fallback");
 583         return $fallback;
 584     }
 585
 586     Encode::Guess->set_suspects( @encodings );
 587     my $decoder = Encode::Guess->guess( $_[0] );
 588     unless ( defined $decoder ) {
 589         $RT::Logger->warning("Encode::Guess failed: decoder is undefined; fallback to $fallback");
 590         return $fallback;
 591     }
 592
 593     if ( ref $decoder ) {
 594         my $charset = $decoder->name;
 595         $RT::Logger->debug("Encode::Guess guessed encoding: $charset");
 596         return _CanonicalizeCharset( $charset );
 597     }
 598     elsif ($decoder =~ /(\S+ or .+)/) {
 599         my %matched = map { $_ => 1 } split(/ or /, $1);
 600         return 'utf-8' if $matched{'utf8'}; # one and only normalization
 601
 602         foreach my $suspect (RT->Config->Get('EmailInputEncodings')) {
 603             next unless $matched{$suspect};
 604             $RT::Logger->debug("Encode::Guess ambiguous ($decoder); using $suspect");
 605             return _CanonicalizeCharset( $suspect );
 606         }
 607     }
 608     else {
 609         $RT::Logger->warning("Encode::Guess failed: $decoder; fallback to $fallback");
 610     }
 611
 612     return $fallback;
 613 }
 614
 615 =head2 _CanonicalizeCharset NAME
 616
 617 canonicalize charset, return lowercase version.
 618 special cases are: gb2312 => gbk, utf8 => utf-8
 619
 620 =cut
 621
 622 sub _CanonicalizeCharset {
 623     my $charset = lc shift;
 624     return $charset unless $charset;
 625
 626     # Canonicalize aliases if they're known
 627     if (my $canonical = Encode::resolve_alias($charset)) {
 628         $charset = $canonical;
 629     }
 630
 631     if ( $charset eq 'utf8' || $charset eq 'utf-8-strict' ) {
 632         return 'utf-8';
 633     }
 634     elsif ( $charset eq 'euc-cn' ) {
 635         # gbk is superset of gb2312/euc-cn so it's safe
 636         return 'gbk';
 637         # XXX TODO: gb18030 is an even larger, more permissive superset of gbk,
 638         # but needs Encode::HanExtra installed
 639     }
 640     else {
 641         return $charset;
 642     }
 643 }
 644
 645
 646 =head2 SetMIMEHeadToEncoding MIMEHead => HEAD, From => OLD_ENCODING, To => NEW_Encoding, PreserveWords => BOOL, IsOut => BOOL
 647
 648 Converts a MIME Head from one encoding to another. This totally violates the RFC.
 649 We should never need this. But, Surprise!, MUAs are badly broken and do this kind of stuff
 650 all the time
 651
 652
 653 =cut
 654
 655 sub SetMIMEHeadToEncoding {
 656     my ( $head, $charset, $enc, $preserve_words, $is_out );
 657
 658     if ( @_ <= 4 ) {
 659         ( $head, $charset, $enc, $preserve_words ) = @_;
 660     }
 661     else {
 662         my %args = (
 663             Head      => undef,
 664             From          => undef,
 665             To            => undef,
 666             PreserveWords => undef,
 667             IsOut         => undef,
 668             @_,
 669         );
 670
 671         $head           = $args{Head};
 672         $charset        = $args{From};
 673         $enc            = $args{To};
 674         $preserve_words = $args{PreserveWords};
 675         $is_out         = $args{IsOut};
 676     }
 677
 678     unless ( $head && $charset && $enc ) {
 679         RT->Logger->error(
 680             "Missing Head or From or To arguments");
 681         return;
 682     }
 683
 684     $charset = _CanonicalizeCharset($charset);
 685     $enc     = _CanonicalizeCharset($enc);
 686
 687     return if $charset eq $enc and $preserve_words;
 688
 689     foreach my $tag ( $head->tags ) {
 690         next unless $tag; # seen in wild: headers with no name
 691         my @values = $head->get_all($tag);
 692         $head->delete($tag);
 693         foreach my $value (@values) {
 694             if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) {
 695                 Encode::_utf8_off($value);
 696                 my $orig_value = $value;
 697                 ( my $success, $value ) = EncodeFromToWithCroak( $orig_value, $charset => $enc );
 698                 if ( !$success ) {
 699                     my $error = $value;
 700                     if ($is_out) {
 701                         $value = $orig_value;
 702                         $head->add( $tag, $value );
 703                         next;
 704                     }
 705
 706                     my $guess = _GuessCharset($orig_value);
 707                     if ( $guess && $guess ne $charset ) {
 708                         $RT::Logger->error( "Encoding error: " . $error . " falling back to Guess($guess) => $enc" );
 709                         ( $success, $value ) = EncodeFromToWithCroak( $orig_value, $guess, $enc );
 710                         $error = $value unless $success;
 711                     }
 712
 713                     if ( !$success ) {
 714                         $RT::Logger->error( "Encoding error: " . $error . " forcing conversion to $charset => $enc" );
 715                         $value = $orig_value;
 716                         Encode::from_to( $value, $charset => $enc );
 717                     }
 718                 }
 719             }
 720
 721             $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )
 722                 unless $preserve_words;
 723
 724             # We intentionally add a leading space when re-adding the
 725             # header; Mail::Header strips it before storing, but it
 726             # serves to prevent it from "helpfully" canonicalizing
 727             # $head->add("Subject", "Subject: foo") into the same as
 728             # $head->add("Subject", "foo");
 729             $head->add( $tag, " " . $value );
 730         }
 731     }
 732
 733 }
 734
 735 =head2 EncodeFromToWithCroak $string, $from, $to
 736
 737 Try to encode string from encoding $from to encoding $to in croak mode
 738
 739 return (1, $encoded_string) if success, otherwise (0, $error)
 740
 741 =cut
 742
 743 sub EncodeFromToWithCroak {
 744     my $string = shift;
 745     my $from   = shift;
 746     my $to     = shift;
 747
 748     eval { Encode::from_to( $string, $from => $to, Encode::FB_CROAK ); };
 749     return $@ ? ( 0, $@ ) : ( 1, $string );
 750 }
 751
 752 RT::Base->_ImportOverlays();
 753
 754 1;  # End of module.
 755