1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-13 07:41:39 +03:00

Repair some problems in GIST-index contrib modules. Patch from

Teodor Sigaev <teodor@stack.net>.
This commit is contained in:
Tom Lane
2002-02-07 22:11:43 +00:00
parent e206ff5946
commit fe1a9c3362
4 changed files with 39 additions and 33 deletions

View File

@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec,
v->spl_nleft = 0; v->spl_nleft = 0;
right = v->spl_right; right = v->spl_right;
v->spl_nright = 0; v->spl_nright = 0;
if ( seed_1 == 0 || seed_2 == 0 ) {
seed_1 = 1;
seed_2 = 2;
}
datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key); datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key);
datum_l = copy_intArrayType(datum_alpha); datum_l = copy_intArrayType(datum_alpha);

View File

@ -198,23 +198,6 @@ Don't forget to do
make clean; make; make install make clean; make; make install
2. 2.
As it was mentioned above we don't use explicitly ID of lexems
as in OpenFTS but use hash function (crc32) instead to map lexem to
integer. Our experiments show that probability of collision is quite small:
for english text it's about 10**(-6) and 10**(-5) for russian collection.
Default installation doesn't check for collisions but if your application
does need to guarantee an exact (no collisions) search, you need
to update system table to mark index islossy:
update pg_amop set amopreqcheck = true where amopclaid =
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
If you don't bother about collisions :
update pg_amop set amopreqcheck = false where amopclaid =
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
3.
txtidx doesn't preserve words ordering (this is not critical for searching) txtidx doesn't preserve words ordering (this is not critical for searching)
for performance reason, for example: for performance reason, for example:
@ -224,7 +207,7 @@ test=# select 'page two'::txtidx;
'two' 'page' 'two' 'page'
(1 row) (1 row)
4. 3.
Indexed access provided by txtidx data type isn't always good Indexed access provided by txtidx data type isn't always good
because of internal data structure we use (RD-Tree). Particularly, because of internal data structure we use (RD-Tree). Particularly,
queries like '!gist' will be slower than just a sequential scan, queries like '!gist' will be slower than just a sequential scan,
@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt );
These two queries will be processed by scanning of full index ! These two queries will be processed by scanning of full index !
Very slow ! Very slow !
5. 4.
Following selects produce the same result Following selects produce the same result
select title from titles where titleidx @@ 'patch&gist'; select title from titles where titleidx @@ 'patch&gist';

View File

@ -10,6 +10,7 @@
#include "utils/array.h" #include "utils/array.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "storage/bufpage.h" #include "storage/bufpage.h"
#include "access/tuptoaster.h"
#include "txtidx.h" #include "txtidx.h"
#include "query.h" #include "query.h"
@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) {
return res + 1 - a; return res + 1 - a;
} }
static void
makesign( BITVECP sign, GISTTYPE *a) {
int4 k,len = ARRNELEM( a );
int4 *ptr = GETARR( a );
MemSet( (void*)sign, 0, sizeof(BITVEC) );
for(k=0;k<len;k++)
HASH( sign, ptr[k] );
}
Datum Datum
gtxtidx_compress(PG_FUNCTION_ARGS) { gtxtidx_compress(PG_FUNCTION_ARGS) {
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0); GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
*arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len ); *arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
arr++; ptr++; arr++; ptr++;
} }
if ( val != toastedval )
pfree(val);
len = uniqueint( GETARR(res), val->size ); len = uniqueint( GETARR(res), val->size );
if ( len != val->size ) { if ( len != val->size ) {
@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
len = CALCGTSIZE( ARRKEY, len ); len = CALCGTSIZE( ARRKEY, len );
res = (GISTTYPE*)repalloc( (void*)res, len ); res = (GISTTYPE*)repalloc( (void*)res, len );
res->len = len; res->len = len;
} }
if ( val != toastedval )
pfree(val);
/* make signature, if array is too long */
if ( res->len > TOAST_INDEX_TARGET ) {
GISTTYPE *ressign;
len = CALCGTSIZE( SIGNKEY, 0 );
ressign = (GISTTYPE*)palloc( len );
ressign->len = len;
ressign->flag = SIGNKEY;
makesign( GETSIGN(ressign), res );
pfree(res);
res = ressign;
}
retval = (GISTENTRY*)palloc(sizeof(GISTENTRY)); retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res), gistentryinit(*retval, PointerGetDatum(res),
@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) {
PG_RETURN_POINTER( penalty ); PG_RETURN_POINTER( penalty );
} }
static void
makesign( BITVECP sign, GISTTYPE *a) {
int4 k,len = ARRNELEM( a );
int4 *ptr = GETARR( a );
MemSet( (void*)sign, 0, sizeof(BITVEC) );
for(k=0;k<len;k++)
HASH( sign, ptr[k] );
}
typedef struct { typedef struct {
bool allistrue; bool allistrue;
BITVEC sign; BITVEC sign;
@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
right = v->spl_right; right = v->spl_right;
v->spl_nright = 0; v->spl_nright = 0;
if ( seed_1 == 0 || seed_2 == 0 ) {
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */ /* form initial .. */
if ( cache[seed_1].allistrue ) { if ( cache[seed_1].allistrue ) {
datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) ); datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );

View File

@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid
and ( tq.typname='query_txt' or tq.typname='mquery_txt' ); and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
SELECT opcl.oid, 1, false, c.opoid SELECT opcl.oid, 1, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
and c.oprname = '@@'; and c.oprname = '@@';
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
SELECT opcl.oid, 2, false, c.opoid SELECT opcl.oid, 2, true, c.opoid
FROM pg_opclass opcl, txtidx_ops_tmp c FROM pg_opclass opcl, txtidx_ops_tmp c
WHERE WHERE
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist') opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')