summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2002-02-07 22:11:43 +0000
committerTom Lane2002-02-07 22:11:43 +0000
commitfe1a9c336290cde8a1dacc56d0161b123fcc18a1 (patch)
tree42fc9f5cddbf18776b5a291cd03b465d1ace54d7
parente206ff59467458cd5a9af593c45565641f218a09 (diff)
Repair some problems in GIST-index contrib modules. Patch from
Teodor Sigaev <[email protected]>.
-rw-r--r--contrib/intarray/_int.c4
-rw-r--r--contrib/tsearch/README.tsearch21
-rw-r--r--contrib/tsearch/gistidx.c43
-rw-r--r--contrib/tsearch/tsearch.sql.in4
4 files changed, 39 insertions, 33 deletions
diff --git a/contrib/intarray/_int.c b/contrib/intarray/_int.c
index a642998cd44..35ee7a659e2 100644
--- a/contrib/intarray/_int.c
+++ b/contrib/intarray/_int.c
@@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec,
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
+ if ( seed_1 == 0 || seed_2 == 0 ) {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key);
datum_l = copy_intArrayType(datum_alpha);
diff --git a/contrib/tsearch/README.tsearch b/contrib/tsearch/README.tsearch
index 96059893fa6..c63ae91edd0 100644
--- a/contrib/tsearch/README.tsearch
+++ b/contrib/tsearch/README.tsearch
@@ -198,23 +198,6 @@ Don't forget to do
make clean; make; make install
2.
-As it was mentioned above we don't use explicitly ID of lexems
-as in OpenFTS but use hash function (crc32) instead to map lexem to
-integer. Our experiments show that probability of collision is quite small:
-for english text it's about 10**(-6) and 10**(-5) for russian collection.
-Default installation doesn't check for collisions but if your application
-does need to guarantee an exact (no collisions) search, you need
-to update system table to mark index islossy:
-
- update pg_amop set amopreqcheck = true where amopclaid =
- (select oid from pg_opclass where opcname = 'gist_txtidx_ops');
-
-If you don't bother about collisions :
-
- update pg_amop set amopreqcheck = false where amopclaid =
- (select oid from pg_opclass where opcname = 'gist_txtidx_ops');
-
-3.
txtidx doesn't preserve words ordering (this is not critical for searching)
for performance reason, for example:
@@ -224,7 +207,7 @@ test=# select 'page two'::txtidx;
'two' 'page'
(1 row)
-4.
+3.
Indexed access provided by txtidx data type isn't always good
because of internal data structure we use (RD-Tree). Particularly,
queries like '!gist' will be slower than just a sequential scan,
@@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt );
These two queries will be processed by scanning of full index !
Very slow !
-5.
+4.
Following selects produce the same result
select title from titles where titleidx @@ 'patch&gist';
diff --git a/contrib/tsearch/gistidx.c b/contrib/tsearch/gistidx.c
index 376b21a307d..5e527036840 100644
--- a/contrib/tsearch/gistidx.c
+++ b/contrib/tsearch/gistidx.c
@@ -10,6 +10,7 @@
#include "utils/array.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
+#include "access/tuptoaster.h"
#include "txtidx.h"
#include "query.h"
@@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) {
return res + 1 - a;
}
+static void
+makesign( BITVECP sign, GISTTYPE *a) {
+ int4 k,len = ARRNELEM( a );
+ int4 *ptr = GETARR( a );
+ MemSet( (void*)sign, 0, sizeof(BITVEC) );
+ for(k=0;k<len;k++)
+ HASH( sign, ptr[k] );
+}
+
Datum
gtxtidx_compress(PG_FUNCTION_ARGS) {
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
@@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
*arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
arr++; ptr++;
}
- if ( val != toastedval )
- pfree(val);
len = uniqueint( GETARR(res), val->size );
if ( len != val->size ) {
@@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
len = CALCGTSIZE( ARRKEY, len );
res = (GISTTYPE*)repalloc( (void*)res, len );
res->len = len;
- }
+ }
+ if ( val != toastedval )
+ pfree(val);
+
+ /* make signature, if array is too long */
+ if ( res->len > TOAST_INDEX_TARGET ) {
+ GISTTYPE *ressign;
+
+ len = CALCGTSIZE( SIGNKEY, 0 );
+ ressign = (GISTTYPE*)palloc( len );
+ ressign->len = len;
+ ressign->flag = SIGNKEY;
+ makesign( GETSIGN(ressign), res );
+ pfree(res);
+ res = ressign;
+ }
retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
@@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) {
PG_RETURN_POINTER( penalty );
}
-static void
-makesign( BITVECP sign, GISTTYPE *a) {
- int4 k,len = ARRNELEM( a );
- int4 *ptr = GETARR( a );
- MemSet( (void*)sign, 0, sizeof(BITVEC) );
- for(k=0;k<len;k++)
- HASH( sign, ptr[k] );
-}
-
typedef struct {
bool allistrue;
BITVEC sign;
@@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
right = v->spl_right;
v->spl_nright = 0;
+ if ( seed_1 == 0 || seed_2 == 0 ) {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
+
/* form initial .. */
if ( cache[seed_1].allistrue ) {
datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );
diff --git a/contrib/tsearch/tsearch.sql.in b/contrib/tsearch/tsearch.sql.in
index 17317f25584..48a5ae27e62 100644
--- a/contrib/tsearch/tsearch.sql.in
+++ b/contrib/tsearch/tsearch.sql.in
@@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid
and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
- SELECT opcl.oid, 1, false, c.opoid
+ SELECT opcl.oid, 1, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
@@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
and c.oprname = '@@';
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
- SELECT opcl.oid, 2, false, c.opoid
+ SELECT opcl.oid, 2, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')