1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

GIN: Generalized Inverted iNdex.

text[], int4[], Tsearch2 support for GIN.
This commit is contained in:
Teodor Sigaev
2006-05-02 11:28:56 +00:00
parent 427c6b5b98
commit 8a3631f8d8
49 changed files with 5871 additions and 50 deletions

View File

@ -1,4 +1,4 @@
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.13 2006/01/27 16:32:31 teodor Exp $
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.14 2006/05/02 11:28:54 teodor Exp $
MODULE_big = tsearch2
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
@ -7,7 +7,7 @@ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
tsvector_op.o rank.o ts_stat.o \
query_util.o query_support.o query_rewrite.o query_gist.o \
ts_locale.o
ts_locale.o ginidx.o
SUBDIRS := snowball ispell wordparser
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)

View File

@ -3001,3 +3001,42 @@ select a is null, a from test_tsvector order by a;
t |
(514 rows)
drop index wowidx;
create index wowidx on test_tsvector using gin (a);
set enable_seqscan=off;
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count
-------
158
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
count
-------
17
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
count
-------
6
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
count
-------
98
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
count
-------
23
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
count
-------
39
(1 row)

145
contrib/tsearch2/ginidx.c Normal file
View File

@ -0,0 +1,145 @@
#include "postgres.h"
#include <float.h>
#include "access/gist.h"
#include "access/itup.h"
#include "access/tuptoaster.h"
#include "storage/bufpage.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "tsvector.h"
#include "query.h"
#include "query_cleanup.h"
PG_FUNCTION_INFO_V1(gin_extract_tsvector);
Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
Datum
gin_extract_tsvector(PG_FUNCTION_ARGS) {
tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
Datum *entries = NULL;
*nentries = 0;
if ( vector->size > 0 ) {
int i;
WordEntry *we = ARRPTR( vector );
*nentries = (uint32)vector->size;
entries = (Datum*)palloc( sizeof(Datum) * vector->size );
for(i=0;i<vector->size;i++) {
text *txt = (text*)palloc( VARHDRSZ + we->len );
VARATT_SIZEP(txt) = VARHDRSZ + we->len;
memcpy( VARDATA(txt), STRPTR( vector ) + we->pos, we->len );
entries[i] = PointerGetDatum( txt );
we++;
}
}
PG_FREE_IF_COPY(vector, 0);
PG_RETURN_POINTER(entries);
}
PG_FUNCTION_INFO_V1(gin_extract_tsquery);
Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
Datum
gin_extract_tsquery(PG_FUNCTION_ARGS) {
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
StrategyNumber strategy = DatumGetUInt16( PG_GETARG_DATUM(2) );
Datum *entries = NULL;
*nentries = 0;
if ( query->size > 0 ) {
int4 i, j=0, len;
ITEM *item;
item = clean_NOT_v2(GETQUERY(query), &len);
if ( !item )
elog(ERROR,"Query requires full scan, GIN doesn't support it");
item = GETQUERY(query);
for(i=0; i<query->size; i++)
if ( item[i].type == VAL )
(*nentries)++;
entries = (Datum*)palloc( sizeof(Datum) * (*nentries) );
for(i=0; i<query->size; i++)
if ( item[i].type == VAL ) {
text *txt;
txt = (text*)palloc( VARHDRSZ + item[i].length );
VARATT_SIZEP(txt) = VARHDRSZ + item[i].length;
memcpy( VARDATA(txt), GETOPERAND( query ) + item[i].distance, item[i].length );
entries[j++] = PointerGetDatum( txt );
if ( strategy == 1 && item[i].weight != 0 )
elog(ERROR,"With class of lexeme restrictions use @@@ operation");
}
}
PG_FREE_IF_COPY(query, 0);
PG_RETURN_POINTER(entries);
}
typedef struct {
ITEM *frst;
bool *mapped_check;
} GinChkVal;
static bool
checkcondition_gin(void *checkval, ITEM * val) {
GinChkVal *gcv = (GinChkVal*)checkval;
return gcv->mapped_check[ val - gcv->frst ];
}
PG_FUNCTION_INFO_V1(gin_ts_consistent);
Datum gin_ts_consistent(PG_FUNCTION_ARGS);
Datum
gin_ts_consistent(PG_FUNCTION_ARGS) {
bool *check = (bool*)PG_GETARG_POINTER(0);
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
bool res = FALSE;
if ( query->size > 0 ) {
int4 i, j=0;
ITEM *item;
GinChkVal gcv;
gcv.frst = item = GETQUERY(query);
gcv.mapped_check= (bool*)palloc( sizeof(bool) * query->size );
for(i=0; i<query->size; i++)
if ( item[i].type == VAL )
gcv.mapped_check[ i ] = check[ j++ ];
res = TS_execute(
GETQUERY(query),
&gcv,
true,
checkcondition_gin
);
}
PG_FREE_IF_COPY(query, 2);
PG_RETURN_BOOL(res);
}

View File

@ -363,3 +363,14 @@ select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
insert into test_tsvector values (null, null);
select a is null, a from test_tsvector order by a;
drop index wowidx;
create index wowidx on test_tsvector using gin (a);
set enable_seqscan=off;
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';

View File

@ -1146,8 +1146,54 @@ AS
FUNCTION 7 gtsq_same (gtsq, gtsq, internal),
STORAGE gtsq;
--GIN support function
CREATE FUNCTION gin_extract_tsvector(tsvector,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C RETURNS NULL ON NULL INPUT;
CREATE FUNCTION gin_extract_tsquery(tsquery,internal,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C RETURNS NULL ON NULL INPUT;
CREATE FUNCTION gin_ts_consistent(internal,internal,tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE C RETURNS NULL ON NULL INPUT;
CREATE OPERATOR @@@ (
LEFTARG = tsvector,
RIGHTARG = tsquery,
PROCEDURE = exectsq,
COMMUTATOR = '@@@',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR @@@ (
LEFTARG = tsquery,
RIGHTARG = tsvector,
PROCEDURE = rexectsq,
COMMUTATOR = '@@@',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR CLASS gin_tsvector_ops
DEFAULT FOR TYPE tsvector USING gin
AS
OPERATOR 1 @@ (tsvector, tsquery),
OPERATOR 2 @@@ (tsvector, tsquery) RECHECK,
FUNCTION 1 bttextcmp(text, text),
FUNCTION 2 gin_extract_tsvector(tsvector,internal),
FUNCTION 3 gin_extract_tsquery(tsquery,internal,internal),
FUNCTION 4 gin_ts_consistent(internal,internal,tsquery),
STORAGE text;
--example of ISpell dictionary
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
--example of synonym dict
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
END;