mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
GIN: Generalized Inverted iNdex.
text[], int4[], Tsearch2 support for GIN.
This commit is contained in:
@ -1,4 +1,4 @@
|
||||
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.13 2006/01/27 16:32:31 teodor Exp $
|
||||
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.14 2006/05/02 11:28:54 teodor Exp $
|
||||
|
||||
MODULE_big = tsearch2
|
||||
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
|
||||
@ -7,7 +7,7 @@ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
|
||||
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
|
||||
tsvector_op.o rank.o ts_stat.o \
|
||||
query_util.o query_support.o query_rewrite.o query_gist.o \
|
||||
ts_locale.o
|
||||
ts_locale.o ginidx.o
|
||||
|
||||
SUBDIRS := snowball ispell wordparser
|
||||
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)
|
||||
|
@ -3001,3 +3001,42 @@ select a is null, a from test_tsvector order by a;
|
||||
t |
|
||||
(514 rows)
|
||||
|
||||
drop index wowidx;
|
||||
create index wowidx on test_tsvector using gin (a);
|
||||
set enable_seqscan=off;
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
||||
count
|
||||
-------
|
||||
158
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
||||
count
|
||||
-------
|
||||
17
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
||||
count
|
||||
-------
|
||||
6
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
||||
count
|
||||
-------
|
||||
98
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
||||
count
|
||||
-------
|
||||
23
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
||||
count
|
||||
-------
|
||||
39
|
||||
(1 row)
|
||||
|
||||
|
145
contrib/tsearch2/ginidx.c
Normal file
145
contrib/tsearch2/ginidx.c
Normal file
@ -0,0 +1,145 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include <float.h>
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "access/tuptoaster.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "tsvector.h"
|
||||
#include "query.h"
|
||||
#include "query_cleanup.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_tsvector);
|
||||
Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_extract_tsvector(PG_FUNCTION_ARGS) {
|
||||
tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
|
||||
Datum *entries = NULL;
|
||||
|
||||
*nentries = 0;
|
||||
if ( vector->size > 0 ) {
|
||||
int i;
|
||||
WordEntry *we = ARRPTR( vector );
|
||||
|
||||
*nentries = (uint32)vector->size;
|
||||
entries = (Datum*)palloc( sizeof(Datum) * vector->size );
|
||||
|
||||
for(i=0;i<vector->size;i++) {
|
||||
text *txt = (text*)palloc( VARHDRSZ + we->len );
|
||||
|
||||
VARATT_SIZEP(txt) = VARHDRSZ + we->len;
|
||||
memcpy( VARDATA(txt), STRPTR( vector ) + we->pos, we->len );
|
||||
|
||||
entries[i] = PointerGetDatum( txt );
|
||||
|
||||
we++;
|
||||
}
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(vector, 0);
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_tsquery);
|
||||
Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_extract_tsquery(PG_FUNCTION_ARGS) {
|
||||
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
|
||||
StrategyNumber strategy = DatumGetUInt16( PG_GETARG_DATUM(2) );
|
||||
Datum *entries = NULL;
|
||||
|
||||
*nentries = 0;
|
||||
if ( query->size > 0 ) {
|
||||
int4 i, j=0, len;
|
||||
ITEM *item;
|
||||
|
||||
item = clean_NOT_v2(GETQUERY(query), &len);
|
||||
if ( !item )
|
||||
elog(ERROR,"Query requires full scan, GIN doesn't support it");
|
||||
|
||||
item = GETQUERY(query);
|
||||
|
||||
for(i=0; i<query->size; i++)
|
||||
if ( item[i].type == VAL )
|
||||
(*nentries)++;
|
||||
|
||||
entries = (Datum*)palloc( sizeof(Datum) * (*nentries) );
|
||||
|
||||
for(i=0; i<query->size; i++)
|
||||
if ( item[i].type == VAL ) {
|
||||
text *txt;
|
||||
|
||||
txt = (text*)palloc( VARHDRSZ + item[i].length );
|
||||
|
||||
VARATT_SIZEP(txt) = VARHDRSZ + item[i].length;
|
||||
memcpy( VARDATA(txt), GETOPERAND( query ) + item[i].distance, item[i].length );
|
||||
|
||||
entries[j++] = PointerGetDatum( txt );
|
||||
|
||||
if ( strategy == 1 && item[i].weight != 0 )
|
||||
elog(ERROR,"With class of lexeme restrictions use @@@ operation");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
ITEM *frst;
|
||||
bool *mapped_check;
|
||||
} GinChkVal;
|
||||
|
||||
static bool
|
||||
checkcondition_gin(void *checkval, ITEM * val) {
|
||||
GinChkVal *gcv = (GinChkVal*)checkval;
|
||||
|
||||
return gcv->mapped_check[ val - gcv->frst ];
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_ts_consistent);
|
||||
Datum gin_ts_consistent(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_ts_consistent(PG_FUNCTION_ARGS) {
|
||||
bool *check = (bool*)PG_GETARG_POINTER(0);
|
||||
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
bool res = FALSE;
|
||||
|
||||
if ( query->size > 0 ) {
|
||||
int4 i, j=0;
|
||||
ITEM *item;
|
||||
GinChkVal gcv;
|
||||
|
||||
gcv.frst = item = GETQUERY(query);
|
||||
gcv.mapped_check= (bool*)palloc( sizeof(bool) * query->size );
|
||||
|
||||
for(i=0; i<query->size; i++)
|
||||
if ( item[i].type == VAL )
|
||||
gcv.mapped_check[ i ] = check[ j++ ];
|
||||
|
||||
|
||||
res = TS_execute(
|
||||
GETQUERY(query),
|
||||
&gcv,
|
||||
true,
|
||||
checkcondition_gin
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_BOOL(res);
|
||||
}
|
||||
|
||||
|
@ -363,3 +363,14 @@ select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
|
||||
insert into test_tsvector values (null, null);
|
||||
select a is null, a from test_tsvector order by a;
|
||||
|
||||
drop index wowidx;
|
||||
create index wowidx on test_tsvector using gin (a);
|
||||
set enable_seqscan=off;
|
||||
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
|
||||
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
|
||||
|
||||
|
@ -1146,8 +1146,54 @@ AS
|
||||
FUNCTION 7 gtsq_same (gtsq, gtsq, internal),
|
||||
STORAGE gtsq;
|
||||
|
||||
--GIN support function
|
||||
CREATE FUNCTION gin_extract_tsvector(tsvector,internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION gin_extract_tsquery(tsquery,internal,internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION gin_ts_consistent(internal,internal,tsquery)
|
||||
RETURNS bool
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C RETURNS NULL ON NULL INPUT;
|
||||
|
||||
CREATE OPERATOR @@@ (
|
||||
LEFTARG = tsvector,
|
||||
RIGHTARG = tsquery,
|
||||
PROCEDURE = exectsq,
|
||||
COMMUTATOR = '@@@',
|
||||
RESTRICT = contsel,
|
||||
JOIN = contjoinsel
|
||||
);
|
||||
CREATE OPERATOR @@@ (
|
||||
LEFTARG = tsquery,
|
||||
RIGHTARG = tsvector,
|
||||
PROCEDURE = rexectsq,
|
||||
COMMUTATOR = '@@@',
|
||||
RESTRICT = contsel,
|
||||
JOIN = contjoinsel
|
||||
);
|
||||
|
||||
CREATE OPERATOR CLASS gin_tsvector_ops
|
||||
DEFAULT FOR TYPE tsvector USING gin
|
||||
AS
|
||||
OPERATOR 1 @@ (tsvector, tsquery),
|
||||
OPERATOR 2 @@@ (tsvector, tsquery) RECHECK,
|
||||
FUNCTION 1 bttextcmp(text, text),
|
||||
FUNCTION 2 gin_extract_tsvector(tsvector,internal),
|
||||
FUNCTION 3 gin_extract_tsquery(tsquery,internal,internal),
|
||||
FUNCTION 4 gin_ts_consistent(internal,internal,tsquery),
|
||||
STORAGE text;
|
||||
|
||||
|
||||
--example of ISpell dictionary
|
||||
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
|
||||
--example of synonym dict
|
||||
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
|
||||
|
||||
END;
|
||||
|
Reference in New Issue
Block a user