Master to 4.2.8

[usit-rt.git] / lib / RT / Record.pm
diff --git a/lib/RT/Record.pm b/lib/RT/Record.pm

index 84410db93803841877f56b3b41176b0267deb835..9e9f3f6ba629f313722ebcaaa5dc1481b87f3282 100644 (file)
--- a/lib/RT/Record.pm
+++ b/lib/RT/Record.pm
@@ -75,7 +75,6 @@ require RT::User;
  require RT::Attributes;
  require RT::Transactions;
  require RT::Link;
-use Encode qw();
  
  our $_TABLE_ATTR = { };
  
@@ -657,12 +656,16 @@ sub __Value {
  
      return undef if (!defined $value);
  
+    # Pg returns character columns as character strings; mysql and
+    # sqlite return them as bytes.  While mysql can be made to return
+    # characters, using the mysql_enable_utf8 flag, the "Content" column
+    # is bytes on mysql and characters on Postgres, making true
+    # consistency impossible.
      if ( $args{'decode_utf8'} ) {
-        if ( !utf8::is_utf8($value) ) {
+        if ( !utf8::is_utf8($value) ) { # mysql/sqlite
              utf8::decode($value);
          }
-    }
-    else {
+    } else {
          if ( utf8::is_utf8($value) ) {
              utf8::encode($value);
          }
@@ -748,7 +751,7 @@ sub _Accessible  {
  =head2 _EncodeLOB BODY MIME_TYPE FILENAME
  
  Takes a potentially large attachment. Returns (ContentEncoding,
-EncodedBody, MimeType, Filename) based on system configuration and
+EncodedBody, MimeType, Filename, NoteArgs) based on system configuration and
  selected database.  Returns a custom (short) text/plain message if
  DropLongAttachments causes an attachment to not be stored.
  
@@ -760,78 +763,97 @@ encoded on databases which are strict.
  This function expects to receive an octet string in order to properly
  evaluate and encode it.  It will return an octet string.
  
+NoteArgs is currently used to indicate caller that the message is too long and
+is truncated or dropped. It's a hashref which is expected to be passed to
+L<RT::Record/_NewTransaction>.
+
  =cut
  
  sub _EncodeLOB {
-        my $self = shift;
-        my $Body = shift;
-        my $MIMEType = shift || '';
-        my $Filename = shift;
+    my $self = shift;
+    my $Body = shift;
+    my $MIMEType = shift || '';
+    my $Filename = shift;
  
-        my $ContentEncoding = 'none';
+    my $ContentEncoding = 'none';
+    my $note_args;
  
-        #get the max attachment length from RT
-        my $MaxSize = RT->Config->Get('MaxAttachmentSize');
+    RT::Util::assert_bytes( $Body );
  
-        #if the current attachment contains nulls and the
-        #database doesn't support embedded nulls
+    #get the max attachment length from RT
+    my $MaxSize = RT->Config->Get('MaxAttachmentSize');
  
-        if ( ( !$RT::Handle->BinarySafeBLOBs ) && ( $Body =~ /\x00/ ) ) {
+    #if the current attachment contains nulls and the
+    #database doesn't support embedded nulls
  
-            # set a flag telling us to mimencode the attachment
-            $ContentEncoding = 'base64';
+    if ( ( !$RT::Handle->BinarySafeBLOBs ) && ( $Body =~ /\x00/ ) ) {
  
-            #cut the max attchment size by 25% (for mime-encoding overhead.
-            $RT::Logger->debug("Max size is $MaxSize");
-            $MaxSize = $MaxSize * 3 / 4;
-        # Some databases (postgres) can't handle non-utf8 data
-        } elsif (    !$RT::Handle->BinarySafeBLOBs
-                  && $Body =~ /\P{ASCII}/
-                  && !Encode::is_utf8( $Body, 1 ) ) {
-              $ContentEncoding = 'quoted-printable';
-        }
+        # set a flag telling us to mimencode the attachment
+        $ContentEncoding = 'base64';
  
-        #if the attachment is larger than the maximum size
-        if ( ($MaxSize) and ( $MaxSize < length($Body) ) ) {
+        #cut the max attchment size by 25% (for mime-encoding overhead.
+        $RT::Logger->debug("Max size is $MaxSize");
+        $MaxSize = $MaxSize * 3 / 4;
+    # Some databases (postgres) can't handle non-utf8 data
+    } elsif (    !$RT::Handle->BinarySafeBLOBs
+              && $Body =~ /\P{ASCII}/
+              && !Encode::is_utf8( $Body, 1 ) ) {
+          $ContentEncoding = 'quoted-printable';
+    }
  
-            # if we're supposed to truncate large attachments
-            if (RT->Config->Get('TruncateLongAttachments')) {
+    #if the attachment is larger than the maximum size
+    if ( ($MaxSize) and ( $MaxSize < length($Body) ) ) {
  
-                # truncate the attachment to that length.
-                $Body = substr( $Body, 0, $MaxSize );
+        my $size = length $Body;
+        # if we're supposed to truncate large attachments
+        if (RT->Config->Get('TruncateLongAttachments')) {
  
-            }
+            $RT::Logger->info("$self: Truncated an attachment of size $size");
  
-            # elsif we're supposed to drop large attachments on the floor,
-            elsif (RT->Config->Get('DropLongAttachments')) {
+            # truncate the attachment to that length.
+            $Body = substr( $Body, 0, $MaxSize );
+            $note_args = {
+                Type           => 'AttachmentTruncate',
+                Data           => $Filename,
+                OldValue       => $size,
+                NewValue       => $MaxSize,
+                ActivateScrips => 0,
+            };
  
-                # drop the attachment on the floor
-                $RT::Logger->info( "$self: Dropped an attachment of size "
-                                   . length($Body));
-                $RT::Logger->info( "It started: " . substr( $Body, 0, 60 ) );
-                $Filename .= ".txt" if $Filename;
-                return ("none", "Large attachment dropped", "text/plain", $Filename );
-            }
          }
  
-        # if we need to mimencode the attachment
-        if ( $ContentEncoding eq 'base64' ) {
-
-            # base64 encode the attachment
-            Encode::_utf8_off($Body);
-            $Body = MIME::Base64::encode_base64($Body);
-
-        } elsif ($ContentEncoding eq 'quoted-printable') {
-            Encode::_utf8_off($Body);
-            $Body = MIME::QuotedPrint::encode($Body);
+        # elsif we're supposed to drop large attachments on the floor,
+        elsif (RT->Config->Get('DropLongAttachments')) {
+
+            # drop the attachment on the floor
+            $RT::Logger->info( "$self: Dropped an attachment of size $size" );
+            $RT::Logger->info( "It started: " . substr( $Body, 0, 60 ) );
+            $note_args = {
+                Type           => 'AttachmentDrop',
+                Data           => $Filename,
+                OldValue       => $size,
+                NewValue       => $MaxSize,
+                ActivateScrips => 0,
+            };
+            $Filename .= ".txt" if $Filename && $Filename !~ /\.txt$/;
+            return ("none", "Large attachment dropped", "text/plain", $Filename, $note_args );
          }
+    }
  
+    # if we need to mimencode the attachment
+    if ( $ContentEncoding eq 'base64' ) {
+        # base64 encode the attachment
+        $Body = MIME::Base64::encode_base64($Body);
  
-        return ($ContentEncoding, $Body, $MIMEType, $Filename );
+    } elsif ($ContentEncoding eq 'quoted-printable') {
+        $Body = MIME::QuotedPrint::encode($Body);
+    }
  
+
+    return ($ContentEncoding, $Body, $MIMEType, $Filename, $note_args );
  }
  
-=head2 _DecodeLOB
+=head2 _DecodeLOB C<ContentType>, C<ContentEncoding>, C<Content>
  
  Unpacks data stored in the database, which may be base64 or QP encoded
  because of our need to store binary and badly encoded data in columns
@@ -847,6 +869,12 @@ This is similar to how we filter all data coming in via the web UI in
  RT::Interface::Web::DecodeARGS. This filter should only end up being
  applied to old data from less UTF-8-safe versions of RT.
  
+If the passed C<ContentType> includes a character set, that will be used
+to decode textual data; the default character set is UTF-8.  This is
+necessary because while we attempt to store textual data as UTF-8, the
+definition of "textual" has migrated over time, and thus we may now need
+to attempt to decode data that was previously not trancoded on insertion.
+
  Important Note - This function expects an octet string and returns a
  character string for non-binary data.
  
@@ -858,6 +886,8 @@ sub _DecodeLOB {
      my $ContentEncoding = shift || 'none';
      my $Content         = shift;
  
+    RT::Util::assert_bytes( $Content );
+
      if ( $ContentEncoding eq 'base64' ) {
          $Content = MIME::Base64::decode_base64($Content);
      }
@@ -868,9 +898,15 @@ sub _DecodeLOB {
          return ( $self->loc( "Unknown ContentEncoding [_1]", $ContentEncoding ) );
      }
      if ( RT::I18N::IsTextualContentType($ContentType) ) {
-       $Content = Encode::decode('UTF-8',$Content,Encode::FB_PERLQQ) unless Encode::is_utf8($Content);
+        my $entity = MIME::Entity->new();
+        $entity->head->add("Content-Type", $ContentType);
+        $entity->bodyhandle( MIME::Body::Scalar->new( $Content ) );
+        my $charset = RT::I18N::_FindOrGuessCharset($entity);
+        $charset = 'utf-8' if not $charset or not Encode::find_encoding($charset);
+
+        $Content = Encode::decode($charset,$Content,Encode::FB_PERLQQ);
      }
-        return ($Content);
+    return ($Content);
  }
  
  =head2 Update  ARGSHASH
@@ -1762,6 +1798,9 @@ our %TRANSACTION_CLASSIFICATION = (
              Owner Creator LastUpdatedBy
          ) ),
      },
+    SystemError => 'error',
+    AttachmentTruncate => 'attachment-truncate',
+    AttachmentDrop => 'attachment-drop',
      __default => 'other',
  );
  
@@ -1969,8 +2008,8 @@ sub _AddCustomFieldValue {
                  $i++;
                  if ( $i < $cf_values ) {
                      my ( $val, $msg ) = $cf->DeleteValueForObject(
-                        Object  => $self,
-                        Content => $value->Content
+                        Object => $self,
+                        Id     => $value->id,
                      );
                      unless ($val) {
                          return ( 0, $msg );
@@ -1986,31 +2025,14 @@ sub _AddCustomFieldValue {
              $values->RedoSearch if $i; # redo search if have deleted at least one value
          }
  
-        my ( $old_value, $old_content );
-        if ( $old_value = $values->First ) {
-            $old_content = $old_value->Content;
-            $old_content = undef if defined $old_content && !length $old_content;
-
-            my $is_the_same = 1;
-            if ( defined $args{'Value'} ) {
-                $is_the_same = 0 unless defined $old_content
-                    && $old_content eq $args{'Value'};
-            } else {
-                $is_the_same = 0 if defined $old_content;
-            }
-            if ( $is_the_same ) {
-                my $old_content = $old_value->LargeContent;
-                if ( defined $args{'LargeContent'} ) {
-                    $is_the_same = 0 unless defined $old_content
-                        && $old_content eq $args{'LargeContent'};
-                } else {
-                    $is_the_same = 0 if defined $old_content;
-                }
-            }
-
-            return $old_value->id if $is_the_same;
+        if ( my $entry = $values->HasEntry($args{'Value'}, $args{'LargeContent'}) ) {
+            return $entry->id;
          }
  
+        my $old_value = $values->First;
+        my $old_content;
+        $old_content = $old_value->Content if $old_value;
+
          my ( $new_value_id, $value_msg ) = $cf->AddValueForObject(
              Object       => $self,
              Content      => $args{'Value'},
@@ -2077,6 +2099,11 @@ sub _AddCustomFieldValue {
  
      # otherwise, just add a new value and record "new value added"
      else {
+        my $values = $cf->ValuesForObject($self);
+        if ( my $entry = $values->HasEntry($args{'Value'}, $args{'LargeContent'}) ) {
+            return $entry->id;
+        }
+
          my ($new_value_id, $msg) = $cf->AddValueForObject(
              Object       => $self,
              Content      => $args{'Value'},
@@ -2413,10 +2440,17 @@ sub Serialize {
      $store{$_} = $values{lc $_} for @cols;
      $store{id} = $values{id}; # Explicitly necessary in some cases
  
-    # Un-encode things with a ContentEncoding for transfer
+    # Un-apply the _transfer_ encoding, but don't mess with the octets
+    # themselves.  Calling ->Content directly would, in some cases,
+    # decode from some mostly-unknown character set -- which reversing
+    # on the far end would be complicated.
      if ($ca{ContentEncoding} and $ca{ContentType}) {
          my ($content_col) = grep {exists $ca{$_}} qw/LargeContent Content/;
-        $store{$content_col} = $self->$content_col;
+        $store{$content_col} = $self->_DecodeLOB(
+            "application/octet-stream", # Lie so that we get bytes, not characters
+            $self->ContentEncoding,
+            $self->_Value( $content_col, decode_utf8 => 0 )
+        );
          delete $store{ContentEncoding};
      }
      return %store unless $args{UIDs};
@@ -2455,8 +2489,7 @@ sub PreInflate {
          my ($content_col) = grep {exists $ca{$_}} qw/LargeContent Content/;
          if (defined $data->{$content_col}) {
              my ($ContentEncoding, $Content) = $class->_EncodeLOB(
-                $data->{$content_col},
-                $data->{ContentType},
+                $data->{$content_col}, $data->{ContentType},
              );
              $data->{ContentEncoding} = $ContentEncoding;
              $data->{$content_col} = $Content;