mirror of
https://github.com/postgres/postgres.git
synced 2025-06-13 07:41:39 +03:00
Repair some problems in GIST-index contrib modules. Patch from
Teodor Sigaev <teodor@stack.net>.
This commit is contained in:
@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec,
|
|||||||
v->spl_nleft = 0;
|
v->spl_nleft = 0;
|
||||||
right = v->spl_right;
|
right = v->spl_right;
|
||||||
v->spl_nright = 0;
|
v->spl_nright = 0;
|
||||||
|
if ( seed_1 == 0 || seed_2 == 0 ) {
|
||||||
|
seed_1 = 1;
|
||||||
|
seed_2 = 2;
|
||||||
|
}
|
||||||
|
|
||||||
datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key);
|
datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key);
|
||||||
datum_l = copy_intArrayType(datum_alpha);
|
datum_l = copy_intArrayType(datum_alpha);
|
||||||
|
@ -198,23 +198,6 @@ Don't forget to do
|
|||||||
make clean; make; make install
|
make clean; make; make install
|
||||||
|
|
||||||
2.
|
2.
|
||||||
As it was mentioned above we don't use explicitly ID of lexems
|
|
||||||
as in OpenFTS but use hash function (crc32) instead to map lexem to
|
|
||||||
integer. Our experiments show that probability of collision is quite small:
|
|
||||||
for english text it's about 10**(-6) and 10**(-5) for russian collection.
|
|
||||||
Default installation doesn't check for collisions but if your application
|
|
||||||
does need to guarantee an exact (no collisions) search, you need
|
|
||||||
to update system table to mark index islossy:
|
|
||||||
|
|
||||||
update pg_amop set amopreqcheck = true where amopclaid =
|
|
||||||
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
|
|
||||||
|
|
||||||
If you don't bother about collisions :
|
|
||||||
|
|
||||||
update pg_amop set amopreqcheck = false where amopclaid =
|
|
||||||
(select oid from pg_opclass where opcname = 'gist_txtidx_ops');
|
|
||||||
|
|
||||||
3.
|
|
||||||
txtidx doesn't preserve words ordering (this is not critical for searching)
|
txtidx doesn't preserve words ordering (this is not critical for searching)
|
||||||
for performance reason, for example:
|
for performance reason, for example:
|
||||||
|
|
||||||
@ -224,7 +207,7 @@ test=# select 'page two'::txtidx;
|
|||||||
'two' 'page'
|
'two' 'page'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
4.
|
3.
|
||||||
Indexed access provided by txtidx data type isn't always good
|
Indexed access provided by txtidx data type isn't always good
|
||||||
because of internal data structure we use (RD-Tree). Particularly,
|
because of internal data structure we use (RD-Tree). Particularly,
|
||||||
queries like '!gist' will be slower than just a sequential scan,
|
queries like '!gist' will be slower than just a sequential scan,
|
||||||
@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt );
|
|||||||
These two queries will be processed by scanning of full index !
|
These two queries will be processed by scanning of full index !
|
||||||
Very slow !
|
Very slow !
|
||||||
|
|
||||||
5.
|
4.
|
||||||
Following selects produce the same result
|
Following selects produce the same result
|
||||||
|
|
||||||
select title from titles where titleidx @@ 'patch&gist';
|
select title from titles where titleidx @@ 'patch&gist';
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "utils/array.h"
|
#include "utils/array.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
#include "storage/bufpage.h"
|
#include "storage/bufpage.h"
|
||||||
|
#include "access/tuptoaster.h"
|
||||||
|
|
||||||
#include "txtidx.h"
|
#include "txtidx.h"
|
||||||
#include "query.h"
|
#include "query.h"
|
||||||
@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) {
|
|||||||
return res + 1 - a;
|
return res + 1 - a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
makesign( BITVECP sign, GISTTYPE *a) {
|
||||||
|
int4 k,len = ARRNELEM( a );
|
||||||
|
int4 *ptr = GETARR( a );
|
||||||
|
MemSet( (void*)sign, 0, sizeof(BITVEC) );
|
||||||
|
for(k=0;k<len;k++)
|
||||||
|
HASH( sign, ptr[k] );
|
||||||
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
gtxtidx_compress(PG_FUNCTION_ARGS) {
|
gtxtidx_compress(PG_FUNCTION_ARGS) {
|
||||||
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
|
GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
|
||||||
@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
|
|||||||
*arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
|
*arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
|
||||||
arr++; ptr++;
|
arr++; ptr++;
|
||||||
}
|
}
|
||||||
if ( val != toastedval )
|
|
||||||
pfree(val);
|
|
||||||
|
|
||||||
len = uniqueint( GETARR(res), val->size );
|
len = uniqueint( GETARR(res), val->size );
|
||||||
if ( len != val->size ) {
|
if ( len != val->size ) {
|
||||||
@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
|
|||||||
len = CALCGTSIZE( ARRKEY, len );
|
len = CALCGTSIZE( ARRKEY, len );
|
||||||
res = (GISTTYPE*)repalloc( (void*)res, len );
|
res = (GISTTYPE*)repalloc( (void*)res, len );
|
||||||
res->len = len;
|
res->len = len;
|
||||||
}
|
}
|
||||||
|
if ( val != toastedval )
|
||||||
|
pfree(val);
|
||||||
|
|
||||||
|
/* make signature, if array is too long */
|
||||||
|
if ( res->len > TOAST_INDEX_TARGET ) {
|
||||||
|
GISTTYPE *ressign;
|
||||||
|
|
||||||
|
len = CALCGTSIZE( SIGNKEY, 0 );
|
||||||
|
ressign = (GISTTYPE*)palloc( len );
|
||||||
|
ressign->len = len;
|
||||||
|
ressign->flag = SIGNKEY;
|
||||||
|
makesign( GETSIGN(ressign), res );
|
||||||
|
pfree(res);
|
||||||
|
res = ressign;
|
||||||
|
}
|
||||||
|
|
||||||
retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
|
retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
|
||||||
gistentryinit(*retval, PointerGetDatum(res),
|
gistentryinit(*retval, PointerGetDatum(res),
|
||||||
@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) {
|
|||||||
PG_RETURN_POINTER( penalty );
|
PG_RETURN_POINTER( penalty );
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
makesign( BITVECP sign, GISTTYPE *a) {
|
|
||||||
int4 k,len = ARRNELEM( a );
|
|
||||||
int4 *ptr = GETARR( a );
|
|
||||||
MemSet( (void*)sign, 0, sizeof(BITVEC) );
|
|
||||||
for(k=0;k<len;k++)
|
|
||||||
HASH( sign, ptr[k] );
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
bool allistrue;
|
bool allistrue;
|
||||||
BITVEC sign;
|
BITVEC sign;
|
||||||
@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
|
|||||||
right = v->spl_right;
|
right = v->spl_right;
|
||||||
v->spl_nright = 0;
|
v->spl_nright = 0;
|
||||||
|
|
||||||
|
if ( seed_1 == 0 || seed_2 == 0 ) {
|
||||||
|
seed_1 = 1;
|
||||||
|
seed_2 = 2;
|
||||||
|
}
|
||||||
|
|
||||||
/* form initial .. */
|
/* form initial .. */
|
||||||
if ( cache[seed_1].allistrue ) {
|
if ( cache[seed_1].allistrue ) {
|
||||||
datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );
|
datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );
|
||||||
|
@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid
|
|||||||
and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
|
and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
|
||||||
|
|
||||||
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
|
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
|
||||||
SELECT opcl.oid, 1, false, c.opoid
|
SELECT opcl.oid, 1, true, c.opoid
|
||||||
FROM pg_opclass opcl, txtidx_ops_tmp c
|
FROM pg_opclass opcl, txtidx_ops_tmp c
|
||||||
WHERE
|
WHERE
|
||||||
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
|
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
|
||||||
@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
|
|||||||
and c.oprname = '@@';
|
and c.oprname = '@@';
|
||||||
|
|
||||||
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
|
INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
|
||||||
SELECT opcl.oid, 2, false, c.opoid
|
SELECT opcl.oid, 2, true, c.opoid
|
||||||
FROM pg_opclass opcl, txtidx_ops_tmp c
|
FROM pg_opclass opcl, txtidx_ops_tmp c
|
||||||
WHERE
|
WHERE
|
||||||
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
|
opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
|
||||||
|
Reference in New Issue
Block a user