Upgrade to 4.2.8
[usit-rt.git] / sbin / rt-setup-fulltext-index
CommitLineData
84fb5b46
MKG
1#!/usr/bin/perl
2# BEGIN BPS TAGGED BLOCK {{{
3#
4# COPYRIGHT:
5#
3ffc5f4f 6# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC
84fb5b46
MKG
7# <sales@bestpractical.com>
8#
9# (Except where explicitly superseded by other copyright notices)
10#
11#
12# LICENSE:
13#
14# This work is made available to you under the terms of Version 2 of
15# the GNU General Public License. A copy of that license should have
16# been provided with this software, but in any event can be snarfed
17# from www.gnu.org.
18#
19# This work is distributed in the hope that it will be useful, but
20# WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22# General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27# 02110-1301 or visit their web page on the internet at
28# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
29#
30#
31# CONTRIBUTION SUBMISSION POLICY:
32#
33# (The following paragraph is not intended to limit the rights granted
34# to you to modify and distribute this software under the terms of
35# the GNU General Public License and is only of importance to you if
36# you choose to contribute your changes and enhancements to the
37# community by submitting them to Best Practical Solutions, LLC.)
38#
39# By intentionally submitting any modifications, corrections or
40# derivatives to this work, or any other work intended for use with
41# Request Tracker, to Best Practical Solutions, LLC, you confirm that
42# you are the copyright holder for those contributions and you grant
43# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
44# royalty-free, perpetual, license to use, copy, create derivative
45# works based on those contributions, and sublicense and distribute
46# those contributions and any derivatives thereof.
47#
48# END BPS TAGGED BLOCK }}}
49use strict;
50use warnings;
51no warnings 'once';
52
53# fix lib paths, some may be relative
3ffc5f4f 54BEGIN { # BEGIN RT CMD BOILERPLATE
84fb5b46 55 require File::Spec;
3ffc5f4f 56 require Cwd;
84fb5b46
MKG
57 my @libs = ("lib", "local/lib");
58 my $bin_path;
59
60 for my $lib (@libs) {
61 unless ( File::Spec->file_name_is_absolute($lib) ) {
3ffc5f4f 62 $bin_path ||= ( File::Spec->splitpath(Cwd::abs_path(__FILE__)) )[1];
84fb5b46
MKG
63 $lib = File::Spec->catfile( $bin_path, File::Spec->updir, $lib );
64 }
65 unshift @INC, $lib;
66 }
3ffc5f4f 67
84fb5b46
MKG
68}
69
70BEGIN {
71 use RT;
72 RT::LoadConfig();
73 RT::Init();
74};
75use RT::Interface::CLI ();
76
77my %DB = (
78 type => scalar RT->Config->Get('DatabaseType'),
79 user => scalar RT->Config->Get('DatabaseUser'),
3ffc5f4f 80 admin => scalar RT->Config->Get('DatabaseAdmin'),
84fb5b46
MKG
81 admin_password => undef,
82);
83
84my %OPT = (
85 help => 0,
86 ask => 1,
87 dryrun => 0,
88 attachments => 1,
89);
90
91my %DEFAULT;
92if ( $DB{'type'} eq 'Pg' ) {
93 %DEFAULT = (
94 table => 'Attachments',
95 column => 'ContentIndex',
96 );
97}
98elsif ( $DB{'type'} eq 'mysql' ) {
99 %DEFAULT = (
100 table => 'AttachmentsIndex',
101 );
102}
103elsif ( $DB{'type'} eq 'Oracle' ) {
104 %DEFAULT = (
105 prefix => 'rt_fts_',
106 );
107}
108
109use Getopt::Long qw(GetOptions);
110GetOptions(
111 'h|help!' => \$OPT{'help'},
112 'ask!' => \$OPT{'ask'},
113 'dry-run!' => \$OPT{'dryrun'},
114 'attachments!' => \$OPT{'attachments'},
115
116 'table=s' => \$OPT{'table'},
117 'column=s' => \$OPT{'column'},
118 'url=s' => \$OPT{'url'},
119 'maxmatches=i' => \$OPT{'maxmatches'},
120 'index-type=s' => \$OPT{'index-type'},
121
122 'dba=s' => \$DB{'admin'},
123 'dba-password=s' => \$DB{'admin_password'},
124) or show_help();
125
126if ( $OPT{'help'} || (!$DB{'admin'} && $DB{'type'} eq 'Oracle' ) ) {
127 show_help( !$OPT{'help'} );
128}
129
130my $dbh = $RT::Handle->dbh;
131$dbh->{'RaiseError'} = 1;
132$dbh->{'PrintError'} = 1;
133
134if ( $DB{'type'} eq 'mysql' ) {
135 check_sphinx();
136 my $table = $OPT{'table'} || prompt(
137 message => "Enter name of a new MySQL table that will be used to connect to the\n"
138 . "Sphinx server:",
139 default => $DEFAULT{'table'},
140 silent => !$OPT{'ask'},
141 );
3ffc5f4f
MKG
142
143 my $url = 'sphinx://localhost:3312/rt';
144 my $version = ($dbh->selectrow_array("show variables like 'version'"))[1];
145 $url = 'sphinx://127.0.0.1:3312/rt'
146 if $version and $version =~ /^(\d+\.\d+)/ and $1 >= 5.5;
147
148 $url = $OPT{'url'} || prompt(
84fb5b46
MKG
149 message => "Enter URL of the sphinx search server; this should be of the form\n"
150 . "sphinx://<server>:<port>/<index name>",
3ffc5f4f 151 default => $url,
84fb5b46
MKG
152 silent => !$OPT{'ask'},
153 );
154 my $maxmatches = $OPT{'maxmatches'} || prompt(
155 message => "Maximum number of matches to return; this is the maximum number of\n"
156 . "attachment records returned by the search, not the maximum number\n"
157 . "of tickets. Both your RT_SiteConfig.pm and your sphinx.conf must\n"
158 . "agree on this value. Larger values cause your Sphinx server to\n"
159 . "consume more memory and CPU time per query.",
160 default => 10000,
161 silent => !$OPT{'ask'},
162 );
163
164 my $schema = <<END;
165CREATE TABLE $table (
3ffc5f4f 166 id BIGINT NOT NULL,
84fb5b46
MKG
167 weight INTEGER NOT NULL,
168 query VARCHAR(3072) NOT NULL,
169 INDEX(query)
170) ENGINE=SPHINX CONNECTION="$url" CHARACTER SET utf8
171END
172
173 do_error_is_ok( dba_handle() => "DROP TABLE $table" )
174 unless $OPT{'dryrun'};
175 insert_schema( $schema );
176
177 print_rt_config( Table => $table, MaxMatches => $maxmatches );
178
179 require URI;
180 my $urlo = URI->new( $url );
181 my ($host, $port) = split /:/, $urlo->authority;
182 my $index = $urlo->path;
183 $index =~ s{^/+}{};
184
185 my $var_path = $RT::VarPath;
186
187 my %sphinx_conf = ();
188 $sphinx_conf{'host'} = RT->Config->Get('DatabaseHost');
189 $sphinx_conf{'db'} = RT->Config->Get('DatabaseName');
190 $sphinx_conf{'user'} = RT->Config->Get('DatabaseUser');
191 $sphinx_conf{'pass'} = RT->Config->Get('DatabasePassword');
192
193 print <<END
194
195Below is a simple Sphinx configuration which can be used to index all
196text/plain attachments in your database. This configuration is not
197ideal; you should read the Sphinx documentation to understand how to
198configure it to better suit your needs.
199
200source rt {
201 type = mysql
202
203 sql_host = $sphinx_conf{'host'}
204 sql_db = $sphinx_conf{'db'}
205 sql_user = $sphinx_conf{'user'}
206 sql_pass = $sphinx_conf{'pass'}
207
208 sql_query_pre = SET NAMES utf8
209 sql_query = \\
210 SELECT a.id, a.content FROM Attachments a \\
211 JOIN Transactions txn ON a.TransactionId = txn.id AND txn.ObjectType = 'RT::Ticket' \\
212 JOIN Tickets t ON txn.ObjectId = t.id \\
213 WHERE a.ContentType = 'text/plain' AND t.Status != 'deleted'
214
215 sql_query_info = SELECT * FROM Attachments WHERE id=\$id
216}
217
218index $index {
219 source = rt
220 path = $var_path/sphinx/index
221 docinfo = extern
222 charset_type = utf-8
223}
224
225indexer {
226 mem_limit = 32M
227}
228
229searchd {
230 port = $port
231 log = $var_path/sphinx/searchd.log
232 query_log = $var_path/sphinx/query.log
233 read_timeout = 5
234 max_children = 30
235 pid_file = $var_path/sphinx/searchd.pid
236 max_matches = $maxmatches
237 seamless_rotate = 1
238 preopen_indexes = 0
239 unlink_old = 1
240}
241
242END
243
244}
245elsif ( $DB{'type'} eq 'Pg' ) {
246 check_tsvalue();
247 my $table = $OPT{'table'} || prompt(
248 message => "Enter the name of a DB table that will be used to store the Pg tsvector.\n"
249 . "You may either use the existing Attachments table, or create a new\n"
250 . "table.",
251 default => $DEFAULT{'table'},
252 silent => !$OPT{'ask'},
253 );
254 my $column = $OPT{'column'} || prompt(
255 message => 'Enter the name of a column that will be used to store the Pg tsvector:',
256 default => $DEFAULT{'column'},
257 silent => !$OPT{'ask'},
258 );
259
260 my $schema;
261 my $drop;
262 if ( lc($table) eq 'attachments' ) {
263 $drop = "ALTER TABLE $table DROP COLUMN $column";
264 $schema = "ALTER TABLE $table ADD COLUMN $column tsvector";
265 } else {
266 $drop = "DROP TABLE $table";
267 $schema = "CREATE TABLE $table ( "
268 ."id INTEGER NOT NULL,"
269 ."$column tsvector )";
270 }
271
272 my $index_type = lc($OPT{'index-type'} || '');
273 while ( $index_type ne 'gist' and $index_type ne 'gin' ) {
274 $index_type = lc prompt(
275 message => "You may choose between GiST or GIN indexes; the former is several times\n"
276 . "slower to search, but takes less space on disk and is faster to update.",
277 default => 'GiST',
278 silent => !$OPT{'ask'},
279 );
280 }
281
282 do_error_is_ok( dba_handle() => $drop )
283 unless $OPT{'dryrun'};
284 insert_schema( $schema );
285 insert_schema("CREATE INDEX ${column}_idx ON $table USING $index_type($column)");
286
287 print_rt_config( Table => $table, Column => $column );
288}
289elsif ( $DB{'type'} eq 'Oracle' ) {
290 {
291 my $dbah = dba_handle();
292 do_print_error( $dbah => 'GRANT CTXAPP TO '. $DB{'user'} );
293 do_print_error( $dbah => 'GRANT EXECUTE ON CTXSYS.CTX_DDL TO '. $DB{'user'} );
294 }
295
296 my %PREFERENCES = (
297 datastore => {
298 type => 'DIRECT_DATASTORE',
299 },
300 filter => {
301 type => 'AUTO_FILTER',
302# attributes => {
303# timeout => 120, # seconds
304# timeout_type => 'HEURISTIC', # or 'FIXED'
305# },
306 },
307 lexer => {
308 type => 'WORLD_LEXER',
309 },
310 word_list => {
311 type => 'BASIC_WORDLIST',
312 attributes => {
313 stemmer => 'AUTO',
314 fuzzy_match => 'AUTO',
315# fuzzy_score => undef,
316# fuzzy_numresults => undef,
317# substring_index => undef,
318# prefix_index => undef,
319# prefix_length_min => undef,
320# prefix_length_max => undef,
321# wlidcard_maxterms => undef,
322 },
323 },
324 'section_group' => {
325 type => 'NULL_SECTION_GROUP',
326 },
327
328 storage => {
329 type => 'BASIC_STORAGE',
330 attributes => {
331 R_TABLE_CLAUSE => 'lob (data) store as (cache)',
332 I_INDEX_CLAUSE => 'compress 2',
333 },
334 },
335 );
336
337 my @params = ();
338 push @params, ora_create_datastore( %{ $PREFERENCES{'datastore'} } );
339 push @params, ora_create_filter( %{ $PREFERENCES{'filter'} } );
340 push @params, ora_create_lexer( %{ $PREFERENCES{'lexer'} } );
341 push @params, ora_create_word_list( %{ $PREFERENCES{'word_list'} } );
342 push @params, ora_create_stop_list();
343 push @params, ora_create_section_group( %{ $PREFERENCES{'section_group'} } );
344 push @params, ora_create_storage( %{ $PREFERENCES{'storage'} } );
345
346 my $index_params = join "\n", @params;
347 my $index_name = $DEFAULT{prefix} .'index';
348 do_error_is_ok( $dbh => "DROP INDEX $index_name" )
349 unless $OPT{'dryrun'};
350 $dbh->do(
351 "CREATE INDEX $index_name ON Attachments(Content)
352 indextype is ctxsys.context parameters('
353 $index_params
354 ')",
355 ) unless $OPT{'dryrun'};
356
357 print_rt_config( IndexName => $index_name );
358}
359else {
360 die "Full-text indexes on $DB{type} are not yet supported";
361}
362
363sub check_tsvalue {
364 my $dbh = $RT::Handle->dbh;
365 my $fts = ($dbh->selectrow_array(<<EOQ))[0];
366SELECT 1 FROM information_schema.routines WHERE routine_name = 'plainto_tsquery'
367EOQ
368 unless ($fts) {
369 print STDERR <<EOT;
370
371Your PostgreSQL server does not include full-text support. You will
372need to upgrade to PostgreSQL version 8.3 or higher to use full-text
373indexing.
374
375EOT
376 exit 1;
377 }
378}
379
380sub check_sphinx {
381 return if $RT::Handle->CheckSphinxSE;
382
383 print STDERR <<EOT;
384
385Your MySQL server has not been compiled with the Sphinx storage engine
386(sphinxse). You will need to recompile MySQL according to the
387instructions in Sphinx's documentation at
388http://sphinxsearch.com/docs/current.html#sphinxse-installing
389
390EOT
391 exit 1;
392}
393
394sub ora_create_datastore {
395 return sprintf 'datastore %s', ora_create_preference(
396 @_,
397 name => 'datastore',
398 );
399}
400
401sub ora_create_filter {
402 my $res = '';
403 $res .= sprintf "format column %s\n", ora_create_format_column();
404 $res .= sprintf 'filter %s', ora_create_preference(
405 @_,
406 name => 'filter',
407 );
408 return $res;
409}
410
411sub ora_create_lexer {
412 return sprintf 'lexer %s', ora_create_preference(
413 @_,
414 name => 'lexer',
415 );
416}
417
418sub ora_create_word_list {
419 return sprintf 'wordlist %s', ora_create_preference(
420 @_,
421 name => 'word_list',
422 );
423}
424
425sub ora_create_stop_list {
426 my $file = shift || 'etc/stopwords/en.txt';
427 return '' unless -e $file;
428
429 my $name = $DEFAULT{'prefix'} .'stop_list';
430 unless ($OPT{'dryrun'}) {
431 do_error_is_ok( $dbh => 'begin ctx_ddl.drop_stoplist(?); end;', $name );
432
433 $dbh->do(
434 'begin ctx_ddl.create_stoplist(?, ?); end;',
435 undef, $name, 'BASIC_STOPLIST'
436 );
437
438 open( my $fh, '<:utf8', $file )
439 or die "couldn't open file '$file': $!";
440 while ( my $word = <$fh> ) {
441 chomp $word;
442 $dbh->do(
443 'begin ctx_ddl.add_stopword(?, ?); end;',
444 undef, $name, $word
445 );
446 }
447 close $fh;
448 }
449 return sprintf 'stoplist %s', $name;
450}
451
452sub ora_create_section_group {
453 my %args = @_;
454 my $name = $DEFAULT{'prefix'} .'section_group';
455 unless ($OPT{'dryrun'}) {
456 do_error_is_ok( $dbh => 'begin ctx_ddl.drop_section_group(?); end;', $name );
457 $dbh->do(
458 'begin ctx_ddl.create_section_group(?, ?); end;',
459 undef, $name, $args{'type'}
460 );
461 }
462 return sprintf 'section group %s', $name;
463}
464
465sub ora_create_storage {
466 return sprintf 'storage %s', ora_create_preference(
467 @_,
468 name => 'storage',
469 );
470}
471
472sub ora_create_format_column {
473 my $column_name = 'ContentOracleFormat';
474 return $column_name if $OPT{'dryrun'};
475 unless (
476 $dbh->column_info(
477 undef, undef, uc('Attachments'), uc( $column_name )
478 )->fetchrow_array
479 ) {
480 $dbh->do(qq{
481 ALTER TABLE Attachments ADD $column_name VARCHAR2(10)
482 });
483 }
484
485 my $detect_format = qq{
486 CREATE OR REPLACE FUNCTION $DEFAULT{prefix}detect_format_simple(
487 parent IN NUMBER,
488 type IN VARCHAR2,
489 encoding IN VARCHAR2,
490 fname IN VARCHAR2
491 )
492 RETURN VARCHAR2
493 AS
494 format VARCHAR2(10);
495 BEGIN
496 format := CASE
497 };
498 unless ( $OPT{'attachments'} ) {
499 $detect_format .= qq{
500 WHEN fname IS NOT NULL THEN 'ignore'
501 };
502 }
503 $detect_format .= qq{
504 WHEN type = 'text' THEN 'text'
505 WHEN type = 'text/rtf' THEN 'ignore'
506 WHEN type LIKE 'text/%' THEN 'text'
507 WHEN type LIKE 'message/%' THEN 'text'
508 ELSE 'ignore'
509 END;
510 RETURN format;
511 END;
512 };
513 ora_create_procedure( $detect_format );
514
515 $dbh->do(qq{
516 UPDATE Attachments
517 SET $column_name = $DEFAULT{prefix}detect_format_simple(
518 Parent,
519 ContentType, ContentEncoding,
520 Filename
521 )
522 WHERE $column_name IS NULL
523 });
524 $dbh->do(qq{
525 CREATE OR REPLACE TRIGGER $DEFAULT{prefix}set_format
526 BEFORE INSERT
527 ON Attachments
528 FOR EACH ROW
529 BEGIN
530 :new.$column_name := $DEFAULT{prefix}detect_format_simple(
531 :new.Parent,
532 :new.ContentType, :new.ContentEncoding,
533 :new.Filename
534 );
535 END;
536 });
537 return $column_name;
538}
539
540sub ora_create_preference {
541 my %info = @_;
542 my $name = $DEFAULT{'prefix'} . $info{'name'};
543 return $name if $OPT{'dryrun'};
544 do_error_is_ok( $dbh => 'begin ctx_ddl.drop_preference(?); end;', $name );
545 $dbh->do(
546 'begin ctx_ddl.create_preference(?, ?); end;',
547 undef, $name, $info{'type'}
548 );
549 return $name unless $info{'attributes'};
550
551 while ( my ($attr, $value) = each %{ $info{'attributes'} } ) {
552 $dbh->do(
553 'begin ctx_ddl.set_attribute(?, ?, ?); end;',
554 undef, $name, $attr, $value
555 );
556 }
557
558 return $name;
559}
560
561sub ora_create_procedure {
562 my $text = shift;
563
564 return if $OPT{'dryrun'};
565 my $status = $dbh->do($text, { RaiseError => 0 });
566
567 # Statement succeeded
568 return if $status;
569
570 if ( 6550 != $dbh->err ) {
571 # Utter failure
572 die $dbh->errstr;
573 }
574 else {
575 my $msg = $dbh->func( 'plsql_errstr' );
576 die $dbh->errstr if !defined $msg;
577 die $msg if $msg;
578 }
579}
580
581sub dba_handle {
582 if ( $DB{'type'} eq 'Oracle' ) {
583 $ENV{'NLS_LANG'} = "AMERICAN_AMERICA.AL32UTF8";
584 $ENV{'NLS_NCHAR'} = "AL32UTF8";
585 }
586 my $dsn = do { my $h = new RT::Handle; $h->BuildDSN; $h->DSN };
587 my $dbh = DBI->connect(
588 $dsn, $DB{admin}, $DB{admin_password},
589 { RaiseError => 1, PrintError => 1 },
590 );
591 unless ( $dbh ) {
592 die "Failed to connect to $dsn as user '$DB{admin}': ". $DBI::errstr;
593 }
594 return $dbh;
595}
596
597sub do_error_is_ok {
598 my $dbh = shift;
599 local $dbh->{'RaiseError'} = 0;
600 local $dbh->{'PrintError'} = 0;
601 return $dbh->do(shift, undef, @_);
602}
603
604sub do_print_error {
605 my $dbh = shift;
606 local $dbh->{'RaiseError'} = 0;
607 local $dbh->{'PrintError'} = 1;
608 return $dbh->do(shift, undef, @_);
609}
610
611sub prompt {
612 my %args = ( @_ );
613 return $args{'default'} if $args{'silent'};
614
615 local $| = 1;
616 print $args{'message'};
617 if ( $args{'default'} ) {
618 print "\n[". $args{'default'} .']: ';
619 } else {
620 print ":\n";
621 }
622
623 my $res = <STDIN>;
624 chomp $res;
625 print "\n";
626 return $args{'default'} if !$res && $args{'default'};
627 return $res;
628}
629
630sub verbose { print @_, "\n" if $OPT{verbose} || $OPT{verbose}; 1 }
631sub debug { print @_, "\n" if $OPT{debug}; 1 }
632sub error { $RT::Logger->error( @_ ); verbose(@_); 1 }
633sub warning { $RT::Logger->warning( @_ ); verbose(@_); 1 }
634
635sub show_help {
636 my $error = shift;
637 RT::Interface::CLI->ShowHelp(
638 ExitValue => $error,
639 Sections => 'NAME|DESCRIPTION',
640 );
641}
642
643sub print_rt_config {
644 my %args = @_;
645 my $config = <<END;
646
647You can now configure RT to use the newly-created full-text index by
648adding the following to your RT_SiteConfig.pm:
649
650Set( %FullTextSearch,
651 Enable => 1,
652 Indexed => 1,
653END
654
655 $config .= sprintf(" %-10s => '$args{$_}',\n",$_)
656 foreach grep defined $args{$_}, keys %args;
657 $config .= ");\n";
658
659 print $config;
660}
661
662sub insert_schema {
663 my $dbh = dba_handle();
664 my $message = "Going to run the following in the DB:";
665 my $schema = shift;
666 print "$message\n";
667 my $disp = $schema;
668 $disp =~ s/^/ /mg;
669 print "$disp\n\n";
670 return if $OPT{'dryrun'};
671
672 my $res = $dbh->do( $schema );
673 unless ( $res ) {
674 die "Couldn't run DDL query: ". $dbh->errstr;
675 }
676}
677
678=head1 NAME
679
680rt-setup-fulltext-index - Create indexes for full text search
681
682=head1 DESCRIPTION
683
684This script creates the appropriate tables, columns, functions, and / or
685views necessary for full-text searching for your database type. It will
686drop any existing indexes in the process.
687
688Please read F<docs/full_text_indexing.pod> for complete documentation on
689full-text indexing for your database type.
690
691If you have a non-standard database administrator user or password, you
692may use the C<--dba> and C<--dba-password> parameters to set them
693explicitly:
694
695 rt-setup-fulltext-index --dba sysdba --dba-password 'secret'
696
697To test what will happen without running any DDL, pass the C<--dryrun>
698flag.
699
700The Oracle index determines which content-types it will index at
701creation time. By default, textual message bodies and textual uploaded
702attachments (attachments with filenames) are indexed; to ignore textual
703attachments, pass the C<--no-attachments> flag when the index is
704created.
705
706
707=head1 AUTHOR
708
709Ruslan Zakirov E<lt>ruz@bestpractical.comE<gt>,
710Alex Vandiver E<lt>alexmv@bestpractical.comE<gt>
711
712=cut
713