]> git.uio.no Git - usit-rt.git/blame - sbin/rt-setup-fulltext-index
Merge branch 'dev'
[usit-rt.git] / sbin / rt-setup-fulltext-index
CommitLineData
84fb5b46
MKG
1#!/usr/bin/perl
2# BEGIN BPS TAGGED BLOCK {{{
3#
4# COPYRIGHT:
5#
6# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC
7# <sales@bestpractical.com>
8#
9# (Except where explicitly superseded by other copyright notices)
10#
11#
12# LICENSE:
13#
14# This work is made available to you under the terms of Version 2 of
15# the GNU General Public License. A copy of that license should have
16# been provided with this software, but in any event can be snarfed
17# from www.gnu.org.
18#
19# This work is distributed in the hope that it will be useful, but
20# WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22# General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27# 02110-1301 or visit their web page on the internet at
28# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
29#
30#
31# CONTRIBUTION SUBMISSION POLICY:
32#
33# (The following paragraph is not intended to limit the rights granted
34# to you to modify and distribute this software under the terms of
35# the GNU General Public License and is only of importance to you if
36# you choose to contribute your changes and enhancements to the
37# community by submitting them to Best Practical Solutions, LLC.)
38#
39# By intentionally submitting any modifications, corrections or
40# derivatives to this work, or any other work intended for use with
41# Request Tracker, to Best Practical Solutions, LLC, you confirm that
42# you are the copyright holder for those contributions and you grant
43# Best Practical Solutions, LLC a nonexclusive, worldwide, irrevocable,
44# royalty-free, perpetual, license to use, copy, create derivative
45# works based on those contributions, and sublicense and distribute
46# those contributions and any derivatives thereof.
47#
48# END BPS TAGGED BLOCK }}}
49use strict;
50use warnings;
51no warnings 'once';
52
53# fix lib paths, some may be relative
54BEGIN {
55 require File::Spec;
56 my @libs = ("lib", "local/lib");
57 my $bin_path;
58
59 for my $lib (@libs) {
60 unless ( File::Spec->file_name_is_absolute($lib) ) {
61 unless ($bin_path) {
62 if ( File::Spec->file_name_is_absolute(__FILE__) ) {
63 $bin_path = ( File::Spec->splitpath(__FILE__) )[1];
64 }
65 else {
66 require FindBin;
67 no warnings "once";
68 $bin_path = $FindBin::Bin;
69 }
70 }
71 $lib = File::Spec->catfile( $bin_path, File::Spec->updir, $lib );
72 }
73 unshift @INC, $lib;
74 }
75}
76
77BEGIN {
78 use RT;
79 RT::LoadConfig();
80 RT::Init();
81};
82use RT::Interface::CLI ();
83
84my %DB = (
85 type => scalar RT->Config->Get('DatabaseType'),
86 user => scalar RT->Config->Get('DatabaseUser'),
87 admin => 'postgres',
88 admin_password => undef,
89);
90
91my %OPT = (
92 help => 0,
93 ask => 1,
94 dryrun => 0,
95 attachments => 1,
96);
97
98my %DEFAULT;
99if ( $DB{'type'} eq 'Pg' ) {
100 %DEFAULT = (
101 table => 'Attachments',
102 column => 'ContentIndex',
103 );
104}
105elsif ( $DB{'type'} eq 'mysql' ) {
106 %DEFAULT = (
107 table => 'AttachmentsIndex',
108 );
109}
110elsif ( $DB{'type'} eq 'Oracle' ) {
111 %DEFAULT = (
112 prefix => 'rt_fts_',
113 );
114}
115
116use Getopt::Long qw(GetOptions);
117GetOptions(
118 'h|help!' => \$OPT{'help'},
119 'ask!' => \$OPT{'ask'},
120 'dry-run!' => \$OPT{'dryrun'},
121 'attachments!' => \$OPT{'attachments'},
122
123 'table=s' => \$OPT{'table'},
124 'column=s' => \$OPT{'column'},
125 'url=s' => \$OPT{'url'},
126 'maxmatches=i' => \$OPT{'maxmatches'},
127 'index-type=s' => \$OPT{'index-type'},
128
129 'dba=s' => \$DB{'admin'},
130 'dba-password=s' => \$DB{'admin_password'},
131) or show_help();
132
133if ( $OPT{'help'} || (!$DB{'admin'} && $DB{'type'} eq 'Oracle' ) ) {
134 show_help( !$OPT{'help'} );
135}
136
137my $dbh = $RT::Handle->dbh;
138$dbh->{'RaiseError'} = 1;
139$dbh->{'PrintError'} = 1;
140
141if ( $DB{'type'} eq 'mysql' ) {
142 check_sphinx();
143 my $table = $OPT{'table'} || prompt(
144 message => "Enter name of a new MySQL table that will be used to connect to the\n"
145 . "Sphinx server:",
146 default => $DEFAULT{'table'},
147 silent => !$OPT{'ask'},
148 );
149 my $url = $OPT{'url'} || prompt(
150 message => "Enter URL of the sphinx search server; this should be of the form\n"
151 . "sphinx://<server>:<port>/<index name>",
152 default => 'sphinx://localhost:3312/rt',
153 silent => !$OPT{'ask'},
154 );
155 my $maxmatches = $OPT{'maxmatches'} || prompt(
156 message => "Maximum number of matches to return; this is the maximum number of\n"
157 . "attachment records returned by the search, not the maximum number\n"
158 . "of tickets. Both your RT_SiteConfig.pm and your sphinx.conf must\n"
159 . "agree on this value. Larger values cause your Sphinx server to\n"
160 . "consume more memory and CPU time per query.",
161 default => 10000,
162 silent => !$OPT{'ask'},
163 );
164
165 my $schema = <<END;
166CREATE TABLE $table (
167 id INTEGER UNSIGNED NOT NULL,
168 weight INTEGER NOT NULL,
169 query VARCHAR(3072) NOT NULL,
170 INDEX(query)
171) ENGINE=SPHINX CONNECTION="$url" CHARACTER SET utf8
172END
173
174 do_error_is_ok( dba_handle() => "DROP TABLE $table" )
175 unless $OPT{'dryrun'};
176 insert_schema( $schema );
177
178 print_rt_config( Table => $table, MaxMatches => $maxmatches );
179
180 require URI;
181 my $urlo = URI->new( $url );
182 my ($host, $port) = split /:/, $urlo->authority;
183 my $index = $urlo->path;
184 $index =~ s{^/+}{};
185
186 my $var_path = $RT::VarPath;
187
188 my %sphinx_conf = ();
189 $sphinx_conf{'host'} = RT->Config->Get('DatabaseHost');
190 $sphinx_conf{'db'} = RT->Config->Get('DatabaseName');
191 $sphinx_conf{'user'} = RT->Config->Get('DatabaseUser');
192 $sphinx_conf{'pass'} = RT->Config->Get('DatabasePassword');
193
194 print <<END
195
196Below is a simple Sphinx configuration which can be used to index all
197text/plain attachments in your database. This configuration is not
198ideal; you should read the Sphinx documentation to understand how to
199configure it to better suit your needs.
200
201source rt {
202 type = mysql
203
204 sql_host = $sphinx_conf{'host'}
205 sql_db = $sphinx_conf{'db'}
206 sql_user = $sphinx_conf{'user'}
207 sql_pass = $sphinx_conf{'pass'}
208
209 sql_query_pre = SET NAMES utf8
210 sql_query = \\
211 SELECT a.id, a.content FROM Attachments a \\
212 JOIN Transactions txn ON a.TransactionId = txn.id AND txn.ObjectType = 'RT::Ticket' \\
213 JOIN Tickets t ON txn.ObjectId = t.id \\
214 WHERE a.ContentType = 'text/plain' AND t.Status != 'deleted'
215
216 sql_query_info = SELECT * FROM Attachments WHERE id=\$id
217}
218
219index $index {
220 source = rt
221 path = $var_path/sphinx/index
222 docinfo = extern
223 charset_type = utf-8
224}
225
226indexer {
227 mem_limit = 32M
228}
229
230searchd {
231 port = $port
232 log = $var_path/sphinx/searchd.log
233 query_log = $var_path/sphinx/query.log
234 read_timeout = 5
235 max_children = 30
236 pid_file = $var_path/sphinx/searchd.pid
237 max_matches = $maxmatches
238 seamless_rotate = 1
239 preopen_indexes = 0
240 unlink_old = 1
241}
242
243END
244
245}
246elsif ( $DB{'type'} eq 'Pg' ) {
247 check_tsvalue();
248 my $table = $OPT{'table'} || prompt(
249 message => "Enter the name of a DB table that will be used to store the Pg tsvector.\n"
250 . "You may either use the existing Attachments table, or create a new\n"
251 . "table.",
252 default => $DEFAULT{'table'},
253 silent => !$OPT{'ask'},
254 );
255 my $column = $OPT{'column'} || prompt(
256 message => 'Enter the name of a column that will be used to store the Pg tsvector:',
257 default => $DEFAULT{'column'},
258 silent => !$OPT{'ask'},
259 );
260
261 my $schema;
262 my $drop;
263 if ( lc($table) eq 'attachments' ) {
264 $drop = "ALTER TABLE $table DROP COLUMN $column";
265 $schema = "ALTER TABLE $table ADD COLUMN $column tsvector";
266 } else {
267 $drop = "DROP TABLE $table";
268 $schema = "CREATE TABLE $table ( "
269 ."id INTEGER NOT NULL,"
270 ."$column tsvector )";
271 }
272
273 my $index_type = lc($OPT{'index-type'} || '');
274 while ( $index_type ne 'gist' and $index_type ne 'gin' ) {
275 $index_type = lc prompt(
276 message => "You may choose between GiST or GIN indexes; the former is several times\n"
277 . "slower to search, but takes less space on disk and is faster to update.",
278 default => 'GiST',
279 silent => !$OPT{'ask'},
280 );
281 }
282
283 do_error_is_ok( dba_handle() => $drop )
284 unless $OPT{'dryrun'};
285 insert_schema( $schema );
286 insert_schema("CREATE INDEX ${column}_idx ON $table USING $index_type($column)");
287
288 print_rt_config( Table => $table, Column => $column );
289}
290elsif ( $DB{'type'} eq 'Oracle' ) {
291 {
292 my $dbah = dba_handle();
293 do_print_error( $dbah => 'GRANT CTXAPP TO '. $DB{'user'} );
294 do_print_error( $dbah => 'GRANT EXECUTE ON CTXSYS.CTX_DDL TO '. $DB{'user'} );
295 }
296
297 my %PREFERENCES = (
298 datastore => {
299 type => 'DIRECT_DATASTORE',
300 },
301 filter => {
302 type => 'AUTO_FILTER',
303# attributes => {
304# timeout => 120, # seconds
305# timeout_type => 'HEURISTIC', # or 'FIXED'
306# },
307 },
308 lexer => {
309 type => 'WORLD_LEXER',
310 },
311 word_list => {
312 type => 'BASIC_WORDLIST',
313 attributes => {
314 stemmer => 'AUTO',
315 fuzzy_match => 'AUTO',
316# fuzzy_score => undef,
317# fuzzy_numresults => undef,
318# substring_index => undef,
319# prefix_index => undef,
320# prefix_length_min => undef,
321# prefix_length_max => undef,
322# wlidcard_maxterms => undef,
323 },
324 },
325 'section_group' => {
326 type => 'NULL_SECTION_GROUP',
327 },
328
329 storage => {
330 type => 'BASIC_STORAGE',
331 attributes => {
332 R_TABLE_CLAUSE => 'lob (data) store as (cache)',
333 I_INDEX_CLAUSE => 'compress 2',
334 },
335 },
336 );
337
338 my @params = ();
339 push @params, ora_create_datastore( %{ $PREFERENCES{'datastore'} } );
340 push @params, ora_create_filter( %{ $PREFERENCES{'filter'} } );
341 push @params, ora_create_lexer( %{ $PREFERENCES{'lexer'} } );
342 push @params, ora_create_word_list( %{ $PREFERENCES{'word_list'} } );
343 push @params, ora_create_stop_list();
344 push @params, ora_create_section_group( %{ $PREFERENCES{'section_group'} } );
345 push @params, ora_create_storage( %{ $PREFERENCES{'storage'} } );
346
347 my $index_params = join "\n", @params;
348 my $index_name = $DEFAULT{prefix} .'index';
349 do_error_is_ok( $dbh => "DROP INDEX $index_name" )
350 unless $OPT{'dryrun'};
351 $dbh->do(
352 "CREATE INDEX $index_name ON Attachments(Content)
353 indextype is ctxsys.context parameters('
354 $index_params
355 ')",
356 ) unless $OPT{'dryrun'};
357
358 print_rt_config( IndexName => $index_name );
359}
360else {
361 die "Full-text indexes on $DB{type} are not yet supported";
362}
363
364sub check_tsvalue {
365 my $dbh = $RT::Handle->dbh;
366 my $fts = ($dbh->selectrow_array(<<EOQ))[0];
367SELECT 1 FROM information_schema.routines WHERE routine_name = 'plainto_tsquery'
368EOQ
369 unless ($fts) {
370 print STDERR <<EOT;
371
372Your PostgreSQL server does not include full-text support. You will
373need to upgrade to PostgreSQL version 8.3 or higher to use full-text
374indexing.
375
376EOT
377 exit 1;
378 }
379}
380
381sub check_sphinx {
382 return if $RT::Handle->CheckSphinxSE;
383
384 print STDERR <<EOT;
385
386Your MySQL server has not been compiled with the Sphinx storage engine
387(sphinxse). You will need to recompile MySQL according to the
388instructions in Sphinx's documentation at
389http://sphinxsearch.com/docs/current.html#sphinxse-installing
390
391EOT
392 exit 1;
393}
394
395sub ora_create_datastore {
396 return sprintf 'datastore %s', ora_create_preference(
397 @_,
398 name => 'datastore',
399 );
400}
401
402sub ora_create_filter {
403 my $res = '';
404 $res .= sprintf "format column %s\n", ora_create_format_column();
405 $res .= sprintf 'filter %s', ora_create_preference(
406 @_,
407 name => 'filter',
408 );
409 return $res;
410}
411
412sub ora_create_lexer {
413 return sprintf 'lexer %s', ora_create_preference(
414 @_,
415 name => 'lexer',
416 );
417}
418
419sub ora_create_word_list {
420 return sprintf 'wordlist %s', ora_create_preference(
421 @_,
422 name => 'word_list',
423 );
424}
425
426sub ora_create_stop_list {
427 my $file = shift || 'etc/stopwords/en.txt';
428 return '' unless -e $file;
429
430 my $name = $DEFAULT{'prefix'} .'stop_list';
431 unless ($OPT{'dryrun'}) {
432 do_error_is_ok( $dbh => 'begin ctx_ddl.drop_stoplist(?); end;', $name );
433
434 $dbh->do(
435 'begin ctx_ddl.create_stoplist(?, ?); end;',
436 undef, $name, 'BASIC_STOPLIST'
437 );
438
439 open( my $fh, '<:utf8', $file )
440 or die "couldn't open file '$file': $!";
441 while ( my $word = <$fh> ) {
442 chomp $word;
443 $dbh->do(
444 'begin ctx_ddl.add_stopword(?, ?); end;',
445 undef, $name, $word
446 );
447 }
448 close $fh;
449 }
450 return sprintf 'stoplist %s', $name;
451}
452
453sub ora_create_section_group {
454 my %args = @_;
455 my $name = $DEFAULT{'prefix'} .'section_group';
456 unless ($OPT{'dryrun'}) {
457 do_error_is_ok( $dbh => 'begin ctx_ddl.drop_section_group(?); end;', $name );
458 $dbh->do(
459 'begin ctx_ddl.create_section_group(?, ?); end;',
460 undef, $name, $args{'type'}
461 );
462 }
463 return sprintf 'section group %s', $name;
464}
465
466sub ora_create_storage {
467 return sprintf 'storage %s', ora_create_preference(
468 @_,
469 name => 'storage',
470 );
471}
472
473sub ora_create_format_column {
474 my $column_name = 'ContentOracleFormat';
475 return $column_name if $OPT{'dryrun'};
476 unless (
477 $dbh->column_info(
478 undef, undef, uc('Attachments'), uc( $column_name )
479 )->fetchrow_array
480 ) {
481 $dbh->do(qq{
482 ALTER TABLE Attachments ADD $column_name VARCHAR2(10)
483 });
484 }
485
486 my $detect_format = qq{
487 CREATE OR REPLACE FUNCTION $DEFAULT{prefix}detect_format_simple(
488 parent IN NUMBER,
489 type IN VARCHAR2,
490 encoding IN VARCHAR2,
491 fname IN VARCHAR2
492 )
493 RETURN VARCHAR2
494 AS
495 format VARCHAR2(10);
496 BEGIN
497 format := CASE
498 };
499 unless ( $OPT{'attachments'} ) {
500 $detect_format .= qq{
501 WHEN fname IS NOT NULL THEN 'ignore'
502 };
503 }
504 $detect_format .= qq{
505 WHEN type = 'text' THEN 'text'
506 WHEN type = 'text/rtf' THEN 'ignore'
507 WHEN type LIKE 'text/%' THEN 'text'
508 WHEN type LIKE 'message/%' THEN 'text'
509 ELSE 'ignore'
510 END;
511 RETURN format;
512 END;
513 };
514 ora_create_procedure( $detect_format );
515
516 $dbh->do(qq{
517 UPDATE Attachments
518 SET $column_name = $DEFAULT{prefix}detect_format_simple(
519 Parent,
520 ContentType, ContentEncoding,
521 Filename
522 )
523 WHERE $column_name IS NULL
524 });
525 $dbh->do(qq{
526 CREATE OR REPLACE TRIGGER $DEFAULT{prefix}set_format
527 BEFORE INSERT
528 ON Attachments
529 FOR EACH ROW
530 BEGIN
531 :new.$column_name := $DEFAULT{prefix}detect_format_simple(
532 :new.Parent,
533 :new.ContentType, :new.ContentEncoding,
534 :new.Filename
535 );
536 END;
537 });
538 return $column_name;
539}
540
541sub ora_create_preference {
542 my %info = @_;
543 my $name = $DEFAULT{'prefix'} . $info{'name'};
544 return $name if $OPT{'dryrun'};
545 do_error_is_ok( $dbh => 'begin ctx_ddl.drop_preference(?); end;', $name );
546 $dbh->do(
547 'begin ctx_ddl.create_preference(?, ?); end;',
548 undef, $name, $info{'type'}
549 );
550 return $name unless $info{'attributes'};
551
552 while ( my ($attr, $value) = each %{ $info{'attributes'} } ) {
553 $dbh->do(
554 'begin ctx_ddl.set_attribute(?, ?, ?); end;',
555 undef, $name, $attr, $value
556 );
557 }
558
559 return $name;
560}
561
562sub ora_create_procedure {
563 my $text = shift;
564
565 return if $OPT{'dryrun'};
566 my $status = $dbh->do($text, { RaiseError => 0 });
567
568 # Statement succeeded
569 return if $status;
570
571 if ( 6550 != $dbh->err ) {
572 # Utter failure
573 die $dbh->errstr;
574 }
575 else {
576 my $msg = $dbh->func( 'plsql_errstr' );
577 die $dbh->errstr if !defined $msg;
578 die $msg if $msg;
579 }
580}
581
582sub dba_handle {
583 if ( $DB{'type'} eq 'Oracle' ) {
584 $ENV{'NLS_LANG'} = "AMERICAN_AMERICA.AL32UTF8";
585 $ENV{'NLS_NCHAR'} = "AL32UTF8";
586 }
587 my $dsn = do { my $h = new RT::Handle; $h->BuildDSN; $h->DSN };
588 my $dbh = DBI->connect(
589 $dsn, $DB{admin}, $DB{admin_password},
590 { RaiseError => 1, PrintError => 1 },
591 );
592 unless ( $dbh ) {
593 die "Failed to connect to $dsn as user '$DB{admin}': ". $DBI::errstr;
594 }
595 return $dbh;
596}
597
598sub do_error_is_ok {
599 my $dbh = shift;
600 local $dbh->{'RaiseError'} = 0;
601 local $dbh->{'PrintError'} = 0;
602 return $dbh->do(shift, undef, @_);
603}
604
605sub do_print_error {
606 my $dbh = shift;
607 local $dbh->{'RaiseError'} = 0;
608 local $dbh->{'PrintError'} = 1;
609 return $dbh->do(shift, undef, @_);
610}
611
612sub prompt {
613 my %args = ( @_ );
614 return $args{'default'} if $args{'silent'};
615
616 local $| = 1;
617 print $args{'message'};
618 if ( $args{'default'} ) {
619 print "\n[". $args{'default'} .']: ';
620 } else {
621 print ":\n";
622 }
623
624 my $res = <STDIN>;
625 chomp $res;
626 print "\n";
627 return $args{'default'} if !$res && $args{'default'};
628 return $res;
629}
630
631sub verbose { print @_, "\n" if $OPT{verbose} || $OPT{verbose}; 1 }
632sub debug { print @_, "\n" if $OPT{debug}; 1 }
633sub error { $RT::Logger->error( @_ ); verbose(@_); 1 }
634sub warning { $RT::Logger->warning( @_ ); verbose(@_); 1 }
635
636sub show_help {
637 my $error = shift;
638 RT::Interface::CLI->ShowHelp(
639 ExitValue => $error,
640 Sections => 'NAME|DESCRIPTION',
641 );
642}
643
644sub print_rt_config {
645 my %args = @_;
646 my $config = <<END;
647
648You can now configure RT to use the newly-created full-text index by
649adding the following to your RT_SiteConfig.pm:
650
651Set( %FullTextSearch,
652 Enable => 1,
653 Indexed => 1,
654END
655
656 $config .= sprintf(" %-10s => '$args{$_}',\n",$_)
657 foreach grep defined $args{$_}, keys %args;
658 $config .= ");\n";
659
660 print $config;
661}
662
663sub insert_schema {
664 my $dbh = dba_handle();
665 my $message = "Going to run the following in the DB:";
666 my $schema = shift;
667 print "$message\n";
668 my $disp = $schema;
669 $disp =~ s/^/ /mg;
670 print "$disp\n\n";
671 return if $OPT{'dryrun'};
672
673 my $res = $dbh->do( $schema );
674 unless ( $res ) {
675 die "Couldn't run DDL query: ". $dbh->errstr;
676 }
677}
678
679=head1 NAME
680
681rt-setup-fulltext-index - Create indexes for full text search
682
683=head1 DESCRIPTION
684
685This script creates the appropriate tables, columns, functions, and / or
686views necessary for full-text searching for your database type. It will
687drop any existing indexes in the process.
688
689Please read F<docs/full_text_indexing.pod> for complete documentation on
690full-text indexing for your database type.
691
692If you have a non-standard database administrator user or password, you
693may use the C<--dba> and C<--dba-password> parameters to set them
694explicitly:
695
696 rt-setup-fulltext-index --dba sysdba --dba-password 'secret'
697
698To test what will happen without running any DDL, pass the C<--dryrun>
699flag.
700
701The Oracle index determines which content-types it will index at
702creation time. By default, textual message bodies and textual uploaded
703attachments (attachments with filenames) are indexed; to ignore textual
704attachments, pass the C<--no-attachments> flag when the index is
705created.
706
707
708=head1 AUTHOR
709
710Ruslan Zakirov E<lt>ruz@bestpractical.comE<gt>,
711Alex Vandiver E<lt>alexmv@bestpractical.comE<gt>
712
713=cut
714