mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
pgindent run.
This commit is contained in:
@ -4,80 +4,99 @@
|
||||
#include "ts_cfg.h"
|
||||
#include "dict.h"
|
||||
|
||||
text*
|
||||
char2text(char* in) {
|
||||
text *
|
||||
char2text(char *in)
|
||||
{
|
||||
return charl2text(in, strlen(in));
|
||||
}
|
||||
|
||||
text* charl2text(char* in, int len) {
|
||||
text *out=(text*)palloc(len+VARHDRSZ);
|
||||
text *
|
||||
charl2text(char *in, int len)
|
||||
{
|
||||
text *out = (text *) palloc(len + VARHDRSZ);
|
||||
|
||||
memcpy(VARDATA(out), in, len);
|
||||
VARATT_SIZEP(out) = len+VARHDRSZ;
|
||||
VARATT_SIZEP(out) = len + VARHDRSZ;
|
||||
return out;
|
||||
}
|
||||
|
||||
char
|
||||
*text2char(text* in) {
|
||||
char *out=palloc( VARSIZE(in) );
|
||||
memcpy(out, VARDATA(in), VARSIZE(in)-VARHDRSZ);
|
||||
out[ VARSIZE(in)-VARHDRSZ ] ='\0';
|
||||
char
|
||||
*
|
||||
text2char(text *in)
|
||||
{
|
||||
char *out = palloc(VARSIZE(in));
|
||||
|
||||
memcpy(out, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
||||
out[VARSIZE(in) - VARHDRSZ] = '\0';
|
||||
return out;
|
||||
}
|
||||
|
||||
char
|
||||
*pnstrdup(char* in, int len) {
|
||||
char *out=palloc( len+1 );
|
||||
char
|
||||
*
|
||||
pnstrdup(char *in, int len)
|
||||
{
|
||||
char *out = palloc(len + 1);
|
||||
|
||||
memcpy(out, in, len);
|
||||
out[len]='\0';
|
||||
out[len] = '\0';
|
||||
return out;
|
||||
}
|
||||
|
||||
text
|
||||
*ptextdup(text* in) {
|
||||
text *out=(text*)palloc( VARSIZE(in) );
|
||||
memcpy(out,in,VARSIZE(in));
|
||||
text
|
||||
*
|
||||
ptextdup(text *in)
|
||||
{
|
||||
text *out = (text *) palloc(VARSIZE(in));
|
||||
|
||||
memcpy(out, in, VARSIZE(in));
|
||||
return out;
|
||||
}
|
||||
|
||||
text
|
||||
*mtextdup(text* in) {
|
||||
text *out=(text*)malloc( VARSIZE(in) );
|
||||
if ( !out )
|
||||
text
|
||||
*
|
||||
mtextdup(text *in)
|
||||
{
|
||||
text *out = (text *) malloc(VARSIZE(in));
|
||||
|
||||
if (!out)
|
||||
ts_error(ERROR, "No memory");
|
||||
memcpy(out,in,VARSIZE(in));
|
||||
memcpy(out, in, VARSIZE(in));
|
||||
return out;
|
||||
}
|
||||
|
||||
void
|
||||
ts_error(int state, const char *format, ...) {
|
||||
va_list args;
|
||||
int tlen = 128, len=0;
|
||||
char *buf;
|
||||
|
||||
void
|
||||
ts_error(int state, const char *format,...)
|
||||
{
|
||||
va_list args;
|
||||
int tlen = 128,
|
||||
len = 0;
|
||||
char *buf;
|
||||
|
||||
reset_cfg();
|
||||
reset_dict();
|
||||
reset_prs();
|
||||
|
||||
va_start(args, format);
|
||||
buf = palloc(tlen);
|
||||
len = vsnprintf(buf, tlen-1, format, args);
|
||||
if ( len >= tlen ) {
|
||||
tlen=len+1;
|
||||
buf = repalloc( buf, tlen );
|
||||
vsnprintf(buf, tlen-1, format, args);
|
||||
len = vsnprintf(buf, tlen - 1, format, args);
|
||||
if (len >= tlen)
|
||||
{
|
||||
tlen = len + 1;
|
||||
buf = repalloc(buf, tlen);
|
||||
vsnprintf(buf, tlen - 1, format, args);
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
|
||||
/* ?? internal error ?? */
|
||||
elog(state, "%s", buf);
|
||||
pfree(buf);
|
||||
}
|
||||
|
||||
int
|
||||
text_cmp(text *a, text *b) {
|
||||
if ( VARSIZE(a) == VARSIZE(b) )
|
||||
return strncmp( VARDATA(a), VARDATA(b), VARSIZE(a)-VARHDRSZ );
|
||||
return (int)VARSIZE(a) - (int)VARSIZE(b);
|
||||
int
|
||||
text_cmp(text *a, text *b)
|
||||
{
|
||||
if (VARSIZE(a) == VARSIZE(b))
|
||||
return strncmp(VARDATA(a), VARDATA(b), VARSIZE(a) - VARHDRSZ);
|
||||
return (int) VARSIZE(a) - (int) VARSIZE(b);
|
||||
|
||||
}
|
||||
|
||||
|
@ -7,18 +7,18 @@
|
||||
#define PG_NARGS() (fcinfo->nargs)
|
||||
#endif
|
||||
|
||||
text* char2text(char* in);
|
||||
text* charl2text(char* in, int len);
|
||||
char *text2char(text* in);
|
||||
char *pnstrdup(char* in, int len);
|
||||
text *ptextdup(text* in);
|
||||
text *mtextdup(text* in);
|
||||
text *char2text(char *in);
|
||||
text *charl2text(char *in, int len);
|
||||
char *text2char(text *in);
|
||||
char *pnstrdup(char *in, int len);
|
||||
text *ptextdup(text *in);
|
||||
text *mtextdup(text *in);
|
||||
|
||||
int text_cmp(text *a, text *b);
|
||||
int text_cmp(text *a, text *b);
|
||||
|
||||
#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
|
||||
#define ARRNELEMS(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
|
||||
|
||||
void ts_error(int state, const char *format, ...);
|
||||
void ts_error(int state, const char *format,...);
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* interface functions to dictionary
|
||||
/*
|
||||
* interface functions to dictionary
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include <errno.h>
|
||||
@ -19,260 +19,285 @@
|
||||
|
||||
/*********top interface**********/
|
||||
|
||||
static void *plan_getdict=NULL;
|
||||
static void *plan_getdict = NULL;
|
||||
|
||||
void
|
||||
init_dict(Oid id, DictInfo *dict) {
|
||||
Oid arg[1]={ OIDOID };
|
||||
bool isnull;
|
||||
Datum pars[1]={ ObjectIdGetDatum(id) };
|
||||
int stat;
|
||||
init_dict(Oid id, DictInfo * dict)
|
||||
{
|
||||
Oid arg[1] = {OIDOID};
|
||||
bool isnull;
|
||||
Datum pars[1] = {ObjectIdGetDatum(id)};
|
||||
int stat;
|
||||
|
||||
memset(dict,0,sizeof(DictInfo));
|
||||
memset(dict, 0, sizeof(DictInfo));
|
||||
SPI_connect();
|
||||
if ( !plan_getdict ) {
|
||||
plan_getdict = SPI_saveplan( SPI_prepare( "select dict_init, dict_initoption, dict_lexize from pg_ts_dict where oid = $1" , 1, arg ) );
|
||||
if ( !plan_getdict )
|
||||
if (!plan_getdict)
|
||||
{
|
||||
plan_getdict = SPI_saveplan(SPI_prepare("select dict_init, dict_initoption, dict_lexize from pg_ts_dict where oid = $1", 1, arg));
|
||||
if (!plan_getdict)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
}
|
||||
|
||||
stat = SPI_execp(plan_getdict, pars, " ", 1);
|
||||
if ( stat < 0 )
|
||||
ts_error (ERROR, "SPI_execp return %d", stat);
|
||||
if ( SPI_processed > 0 ) {
|
||||
Datum opt;
|
||||
Oid oid=InvalidOid;
|
||||
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||||
if ( !(isnull || oid==InvalidOid) ) {
|
||||
opt=SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
|
||||
dict->dictionary=(void*)DatumGetPointer(OidFunctionCall1(oid, opt));
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
{
|
||||
Datum opt;
|
||||
Oid oid = InvalidOid;
|
||||
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
if (!(isnull || oid == InvalidOid))
|
||||
{
|
||||
opt = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
|
||||
dict->dictionary = (void *) DatumGetPointer(OidFunctionCall1(oid, opt));
|
||||
}
|
||||
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
|
||||
if ( isnull || oid==InvalidOid )
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
|
||||
if (isnull || oid == InvalidOid)
|
||||
ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
|
||||
fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
|
||||
dict->dict_id=id;
|
||||
} else
|
||||
dict->dict_id = id;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "No dictionary with id %d", id);
|
||||
SPI_finish();
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
DictInfo *last_dict;
|
||||
int len;
|
||||
int reallen;
|
||||
DictInfo *list;
|
||||
typedef struct
|
||||
{
|
||||
DictInfo *last_dict;
|
||||
int len;
|
||||
int reallen;
|
||||
DictInfo *list;
|
||||
SNMap name2id_map;
|
||||
} DictList;
|
||||
} DictList;
|
||||
|
||||
static DictList DList = {NULL,0,0,NULL,{0,0,NULL}};
|
||||
static DictList DList = {NULL, 0, 0, NULL, {0, 0, NULL}};
|
||||
|
||||
void
|
||||
reset_dict(void) {
|
||||
freeSNMap( &(DList.name2id_map) );
|
||||
reset_dict(void)
|
||||
{
|
||||
freeSNMap(&(DList.name2id_map));
|
||||
/* XXX need to free DList.list[*].dictionary */
|
||||
if ( DList.list )
|
||||
if (DList.list)
|
||||
free(DList.list);
|
||||
memset(&DList,0,sizeof(DictList));
|
||||
memset(&DList, 0, sizeof(DictList));
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
comparedict(const void *a, const void *b) {
|
||||
return ((DictInfo*)a)->dict_id - ((DictInfo*)b)->dict_id;
|
||||
comparedict(const void *a, const void *b)
|
||||
{
|
||||
return ((DictInfo *) a)->dict_id - ((DictInfo *) b)->dict_id;
|
||||
}
|
||||
|
||||
DictInfo *
|
||||
finddict(Oid id) {
|
||||
finddict(Oid id)
|
||||
{
|
||||
/* last used dict */
|
||||
if ( DList.last_dict && DList.last_dict->dict_id==id )
|
||||
if (DList.last_dict && DList.last_dict->dict_id == id)
|
||||
return DList.last_dict;
|
||||
|
||||
|
||||
/* already used dict */
|
||||
if ( DList.len != 0 ) {
|
||||
DictInfo key;
|
||||
key.dict_id=id;
|
||||
if (DList.len != 0)
|
||||
{
|
||||
DictInfo key;
|
||||
|
||||
key.dict_id = id;
|
||||
DList.last_dict = bsearch(&key, DList.list, DList.len, sizeof(DictInfo), comparedict);
|
||||
if ( DList.last_dict != NULL )
|
||||
if (DList.last_dict != NULL)
|
||||
return DList.last_dict;
|
||||
}
|
||||
|
||||
/* last chance */
|
||||
if ( DList.len==DList.reallen ) {
|
||||
DictInfo *tmp;
|
||||
int reallen = ( DList.reallen ) ? 2*DList.reallen : 16;
|
||||
tmp=(DictInfo*)realloc(DList.list,sizeof(DictInfo)*reallen);
|
||||
if ( !tmp )
|
||||
ts_error(ERROR,"No memory");
|
||||
DList.reallen=reallen;
|
||||
DList.list=tmp;
|
||||
if (DList.len == DList.reallen)
|
||||
{
|
||||
DictInfo *tmp;
|
||||
int reallen = (DList.reallen) ? 2 * DList.reallen : 16;
|
||||
|
||||
tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
|
||||
if (!tmp)
|
||||
ts_error(ERROR, "No memory");
|
||||
DList.reallen = reallen;
|
||||
DList.list = tmp;
|
||||
}
|
||||
DList.last_dict=&(DList.list[DList.len]);
|
||||
DList.last_dict = &(DList.list[DList.len]);
|
||||
init_dict(id, DList.last_dict);
|
||||
|
||||
DList.len++;
|
||||
qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
|
||||
return finddict(id); /* qsort changed order!! */;
|
||||
return finddict(id); /* qsort changed order!! */ ;
|
||||
}
|
||||
|
||||
static void *plan_name2id=NULL;
|
||||
static void *plan_name2id = NULL;
|
||||
|
||||
Oid
|
||||
name2id_dict(text *name) {
|
||||
Oid arg[1]={ TEXTOID };
|
||||
bool isnull;
|
||||
Datum pars[1]={ PointerGetDatum(name) };
|
||||
int stat;
|
||||
Oid id=findSNMap_t( &(DList.name2id_map), name );
|
||||
name2id_dict(text *name)
|
||||
{
|
||||
Oid arg[1] = {TEXTOID};
|
||||
bool isnull;
|
||||
Datum pars[1] = {PointerGetDatum(name)};
|
||||
int stat;
|
||||
Oid id = findSNMap_t(&(DList.name2id_map), name);
|
||||
|
||||
if ( id )
|
||||
if (id)
|
||||
return id;
|
||||
|
||||
|
||||
SPI_connect();
|
||||
if ( !plan_name2id ) {
|
||||
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_dict where dict_name = $1" , 1, arg ) );
|
||||
if ( !plan_name2id )
|
||||
if (!plan_name2id)
|
||||
{
|
||||
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_dict where dict_name = $1", 1, arg));
|
||||
if (!plan_name2id)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
}
|
||||
|
||||
stat = SPI_execp(plan_name2id, pars, " ", 1);
|
||||
if ( stat < 0 )
|
||||
ts_error (ERROR, "SPI_execp return %d", stat);
|
||||
if ( SPI_processed > 0 )
|
||||
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||||
else
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
else
|
||||
ts_error(ERROR, "No dictionary with name '%s'", text2char(name));
|
||||
SPI_finish();
|
||||
addSNMap_t( &(DList.name2id_map), name, id );
|
||||
addSNMap_t(&(DList.name2id_map), name, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
|
||||
/******sql-level interface******/
|
||||
PG_FUNCTION_INFO_V1(lexize);
|
||||
Datum lexize(PG_FUNCTION_ARGS);
|
||||
Datum lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
lexize(PG_FUNCTION_ARGS) {
|
||||
text *in=PG_GETARG_TEXT_P(1);
|
||||
DictInfo *dict = finddict( PG_GETARG_OID(0) );
|
||||
char **res, **ptr;
|
||||
Datum *da;
|
||||
ArrayType *a;
|
||||
lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
DictInfo *dict = finddict(PG_GETARG_OID(0));
|
||||
char **res,
|
||||
**ptr;
|
||||
Datum *da;
|
||||
ArrayType *a;
|
||||
|
||||
|
||||
ptr = res = (char**)DatumGetPointer(
|
||||
FunctionCall3(&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(VARDATA(in)),
|
||||
Int32GetDatum(VARSIZE(in)-VARHDRSZ)
|
||||
)
|
||||
);
|
||||
ptr = res = (char **) DatumGetPointer(
|
||||
FunctionCall3(&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(VARDATA(in)),
|
||||
Int32GetDatum(VARSIZE(in) - VARHDRSZ)
|
||||
)
|
||||
);
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
if ( !res ) {
|
||||
if (PG_NARGS() > 2)
|
||||
if (!res)
|
||||
{
|
||||
if (PG_NARGS() > 2)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
else
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
while(*ptr) ptr++;
|
||||
da = (Datum*)palloc(sizeof(Datum)*(ptr-res+1));
|
||||
ptr=res;
|
||||
while(*ptr) {
|
||||
da[ ptr-res ] = PointerGetDatum( char2text(*ptr) );
|
||||
while (*ptr)
|
||||
ptr++;
|
||||
da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1));
|
||||
ptr = res;
|
||||
while (*ptr)
|
||||
{
|
||||
da[ptr - res] = PointerGetDatum(char2text(*ptr));
|
||||
ptr++;
|
||||
}
|
||||
|
||||
a = construct_array(
|
||||
da,
|
||||
ptr-res,
|
||||
TEXTOID,
|
||||
-1,
|
||||
false,
|
||||
'i'
|
||||
);
|
||||
da,
|
||||
ptr - res,
|
||||
TEXTOID,
|
||||
-1,
|
||||
false,
|
||||
'i'
|
||||
);
|
||||
|
||||
ptr=res;
|
||||
while(*ptr) {
|
||||
pfree( DatumGetPointer(da[ ptr-res ]) );
|
||||
pfree( *ptr );
|
||||
ptr = res;
|
||||
while (*ptr)
|
||||
{
|
||||
pfree(DatumGetPointer(da[ptr - res]));
|
||||
pfree(*ptr);
|
||||
ptr++;
|
||||
}
|
||||
pfree(res);
|
||||
pfree(da);
|
||||
|
||||
PG_RETURN_POINTER(a);
|
||||
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(lexize_byname);
|
||||
Datum lexize_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
lexize_byname(PG_FUNCTION_ARGS) {
|
||||
text *dictname=PG_GETARG_TEXT_P(0);
|
||||
Datum res;
|
||||
Datum lexize_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
lexize_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *dictname = PG_GETARG_TEXT_P(0);
|
||||
Datum res;
|
||||
|
||||
strdup("simple");
|
||||
res=DirectFunctionCall3(
|
||||
lexize,
|
||||
ObjectIdGetDatum(name2id_dict(dictname)),
|
||||
PG_GETARG_DATUM(1),
|
||||
(Datum)0
|
||||
);
|
||||
res = DirectFunctionCall3(
|
||||
lexize,
|
||||
ObjectIdGetDatum(name2id_dict(dictname)),
|
||||
PG_GETARG_DATUM(1),
|
||||
(Datum) 0
|
||||
);
|
||||
PG_FREE_IF_COPY(dictname, 0);
|
||||
if (res)
|
||||
PG_RETURN_DATUM(res);
|
||||
else
|
||||
if (res)
|
||||
PG_RETURN_DATUM(res);
|
||||
else
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
static Oid currect_dictionary_id=0;
|
||||
static Oid currect_dictionary_id = 0;
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curdict);
|
||||
Datum set_curdict(PG_FUNCTION_ARGS);
|
||||
Datum set_curdict(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curdict(PG_FUNCTION_ARGS) {
|
||||
set_curdict(PG_FUNCTION_ARGS)
|
||||
{
|
||||
finddict(PG_GETARG_OID(0));
|
||||
currect_dictionary_id=PG_GETARG_OID(0);
|
||||
currect_dictionary_id = PG_GETARG_OID(0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curdict_byname);
|
||||
Datum set_curdict_byname(PG_FUNCTION_ARGS);
|
||||
Datum set_curdict_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curdict_byname(PG_FUNCTION_ARGS) {
|
||||
text *dictname=PG_GETARG_TEXT_P(0);
|
||||
set_curdict_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *dictname = PG_GETARG_TEXT_P(0);
|
||||
|
||||
DirectFunctionCall1(
|
||||
set_curdict,
|
||||
ObjectIdGetDatum( name2id_dict(dictname) )
|
||||
);
|
||||
set_curdict,
|
||||
ObjectIdGetDatum(name2id_dict(dictname))
|
||||
);
|
||||
PG_FREE_IF_COPY(dictname, 0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(lexize_bycurrent);
|
||||
Datum lexize_bycurrent(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
lexize_bycurrent(PG_FUNCTION_ARGS) {
|
||||
Datum res;
|
||||
if ( currect_dictionary_id == 0 )
|
||||
Datum lexize_bycurrent(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
lexize_bycurrent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum res;
|
||||
|
||||
if (currect_dictionary_id == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("no currect dictionary"),
|
||||
errhint("Execute select set_curdict().")));
|
||||
|
||||
res = DirectFunctionCall3(
|
||||
lexize,
|
||||
ObjectIdGetDatum(currect_dictionary_id),
|
||||
PG_GETARG_DATUM(0),
|
||||
(Datum)0
|
||||
);
|
||||
if (res)
|
||||
lexize,
|
||||
ObjectIdGetDatum(currect_dictionary_id),
|
||||
PG_GETARG_DATUM(0),
|
||||
(Datum) 0
|
||||
);
|
||||
if (res)
|
||||
PG_RETURN_DATUM(res);
|
||||
else
|
||||
else
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
|
||||
|
@ -3,36 +3,39 @@
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
|
||||
typedef struct {
|
||||
int len;
|
||||
char **stop;
|
||||
char* (*wordop)(char*);
|
||||
} StopList;
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
char **stop;
|
||||
char *(*wordop) (char *);
|
||||
} StopList;
|
||||
|
||||
void sortstoplist(StopList *s);
|
||||
void freestoplist(StopList *s);
|
||||
void readstoplist(text *in, StopList *s);
|
||||
bool searchstoplist(StopList *s, char *key);
|
||||
char* lowerstr(char *str);
|
||||
void sortstoplist(StopList * s);
|
||||
void freestoplist(StopList * s);
|
||||
void readstoplist(text *in, StopList * s);
|
||||
bool searchstoplist(StopList * s, char *key);
|
||||
char *lowerstr(char *str);
|
||||
|
||||
typedef struct {
|
||||
Oid dict_id;
|
||||
FmgrInfo lexize_info;
|
||||
void *dictionary;
|
||||
} DictInfo;
|
||||
typedef struct
|
||||
{
|
||||
Oid dict_id;
|
||||
FmgrInfo lexize_info;
|
||||
void *dictionary;
|
||||
} DictInfo;
|
||||
|
||||
void init_dict(Oid id, DictInfo *dict);
|
||||
DictInfo* finddict(Oid id);
|
||||
Oid name2id_dict(text *name);
|
||||
void reset_dict(void);
|
||||
void init_dict(Oid id, DictInfo * dict);
|
||||
DictInfo *finddict(Oid id);
|
||||
Oid name2id_dict(text *name);
|
||||
void reset_dict(void);
|
||||
|
||||
|
||||
/* simple parser of cfg string */
|
||||
typedef struct {
|
||||
char *key;
|
||||
char *value;
|
||||
} Map;
|
||||
typedef struct
|
||||
{
|
||||
char *key;
|
||||
char *value;
|
||||
} Map;
|
||||
|
||||
void parse_cfgdict(text *in, Map **m);
|
||||
void parse_cfgdict(text *in, Map ** m);
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* example of dictionary
|
||||
/*
|
||||
* example of dictionary
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include <errno.h>
|
||||
@ -11,30 +11,35 @@
|
||||
#include "dict.h"
|
||||
#include "common.h"
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
StopList stoplist;
|
||||
} DictExample;
|
||||
} DictExample;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dex_init);
|
||||
Datum dex_init(PG_FUNCTION_ARGS);
|
||||
Datum dex_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(dex_lexize);
|
||||
Datum dex_lexize(PG_FUNCTION_ARGS);
|
||||
Datum dex_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
dex_init(PG_FUNCTION_ARGS) {
|
||||
DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
|
||||
Datum
|
||||
dex_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictExample *d = (DictExample *) malloc(sizeof(DictExample));
|
||||
|
||||
if ( !d )
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d,0,sizeof(DictExample));
|
||||
memset(d, 0, sizeof(DictExample));
|
||||
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
|
||||
d->stoplist.wordop=lowerstr;
|
||||
|
||||
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
readstoplist(in, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
@ -44,18 +49,21 @@ dex_init(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
|
||||
Datum
|
||||
dex_lexize(PG_FUNCTION_ARGS) {
|
||||
DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
|
||||
char *in = (char*)PG_GETARG_POINTER(1);
|
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
char **res=palloc(sizeof(char*)*2);
|
||||
dex_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
char **res = palloc(sizeof(char *) * 2);
|
||||
|
||||
if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
|
||||
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
|
||||
{
|
||||
pfree(txt);
|
||||
res[0]=NULL;
|
||||
} else
|
||||
res[0]=txt;
|
||||
res[1]=NULL;
|
||||
res[0] = NULL;
|
||||
}
|
||||
else
|
||||
res[0] = txt;
|
||||
res[1] = NULL;
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*
|
||||
/*
|
||||
* ISpell interface
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
@ -12,96 +12,117 @@
|
||||
#include "common.h"
|
||||
#include "ispell/spell.h"
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
StopList stoplist;
|
||||
IspellDict obj;
|
||||
} DictISpell;
|
||||
} DictISpell;
|
||||
|
||||
PG_FUNCTION_INFO_V1(spell_init);
|
||||
Datum spell_init(PG_FUNCTION_ARGS);
|
||||
Datum spell_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(spell_lexize);
|
||||
Datum spell_lexize(PG_FUNCTION_ARGS);
|
||||
Datum spell_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
static void
|
||||
freeDictISpell(DictISpell *d) {
|
||||
freeDictISpell(DictISpell * d)
|
||||
{
|
||||
FreeIspell(&(d->obj));
|
||||
freestoplist(&(d->stoplist));
|
||||
free(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
spell_init(PG_FUNCTION_ARGS) {
|
||||
DictISpell *d;
|
||||
Map *cfg, *pcfg;
|
||||
text *in;
|
||||
bool affloaded=false, dictloaded=false, stoploaded=false;
|
||||
Datum
|
||||
spell_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictISpell *d;
|
||||
Map *cfg,
|
||||
*pcfg;
|
||||
text *in;
|
||||
bool affloaded = false,
|
||||
dictloaded = false,
|
||||
stoploaded = false;
|
||||
|
||||
if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL )
|
||||
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("ISpell confguration error")));
|
||||
|
||||
d = (DictISpell*)malloc( sizeof(DictISpell) );
|
||||
if ( !d )
|
||||
|
||||
d = (DictISpell *) malloc(sizeof(DictISpell));
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d,0,sizeof(DictISpell));
|
||||
d->stoplist.wordop=lowerstr;
|
||||
memset(d, 0, sizeof(DictISpell));
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
in = PG_GETARG_TEXT_P(0);
|
||||
parse_cfgdict(in,&cfg);
|
||||
parse_cfgdict(in, &cfg);
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
pcfg=cfg;
|
||||
while(pcfg->key) {
|
||||
if ( strcasecmp("DictFile", pcfg->key) == 0 ) {
|
||||
if ( dictloaded ) {
|
||||
pcfg = cfg;
|
||||
while (pcfg->key)
|
||||
{
|
||||
if (strcasecmp("DictFile", pcfg->key) == 0)
|
||||
{
|
||||
if (dictloaded)
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("dictionary already loaded")));
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("dictionary already loaded")));
|
||||
}
|
||||
if ( ImportDictionary(&(d->obj), pcfg->value) ) {
|
||||
if (ImportDictionary(&(d->obj), pcfg->value))
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not load dictionary file \"%s\"",
|
||||
pcfg->value)));
|
||||
}
|
||||
dictloaded=true;
|
||||
} else if ( strcasecmp("AffFile", pcfg->key) == 0 ) {
|
||||
if ( affloaded ) {
|
||||
dictloaded = true;
|
||||
}
|
||||
else if (strcasecmp("AffFile", pcfg->key) == 0)
|
||||
{
|
||||
if (affloaded)
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("affixes already loaded")));
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("affixes already loaded")));
|
||||
}
|
||||
if ( ImportAffixes(&(d->obj), pcfg->value) ) {
|
||||
if (ImportAffixes(&(d->obj), pcfg->value))
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not load affix file \"%s\"",
|
||||
pcfg->value)));
|
||||
}
|
||||
affloaded=true;
|
||||
} else if ( strcasecmp("StopFile", pcfg->key) == 0 ) {
|
||||
text *tmp=char2text(pcfg->value);
|
||||
if ( stoploaded ) {
|
||||
affloaded = true;
|
||||
}
|
||||
else if (strcasecmp("StopFile", pcfg->key) == 0)
|
||||
{
|
||||
text *tmp = char2text(pcfg->value);
|
||||
|
||||
if (stoploaded)
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("stop words already loaded")));
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("stop words already loaded")));
|
||||
}
|
||||
readstoplist(tmp, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
pfree(tmp);
|
||||
stoploaded=true;
|
||||
} else {
|
||||
stoploaded = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("unrecognized option: %s => %s",
|
||||
pcfg->key, pcfg->value)));
|
||||
pcfg->key, pcfg->value)));
|
||||
}
|
||||
pfree(pcfg->key);
|
||||
pfree(pcfg->value);
|
||||
@ -109,15 +130,20 @@ spell_init(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
pfree(cfg);
|
||||
|
||||
if ( affloaded && dictloaded ) {
|
||||
if (affloaded && dictloaded)
|
||||
{
|
||||
SortDictionary(&(d->obj));
|
||||
SortAffixes(&(d->obj));
|
||||
} else if ( !affloaded ) {
|
||||
}
|
||||
else if (!affloaded)
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("no affixes")));
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
freeDictISpell(d);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
@ -128,37 +154,43 @@ spell_init(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
|
||||
Datum
|
||||
spell_lexize(PG_FUNCTION_ARGS) {
|
||||
DictISpell *d = (DictISpell*)PG_GETARG_POINTER(0);
|
||||
char *in = (char*)PG_GETARG_POINTER(1);
|
||||
char *txt;
|
||||
char **res;
|
||||
char **ptr, **cptr;
|
||||
spell_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
char *txt;
|
||||
char **res;
|
||||
char **ptr,
|
||||
**cptr;
|
||||
|
||||
if ( !PG_GETARG_INT32(2) )
|
||||
if (!PG_GETARG_INT32(2))
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
res=palloc(sizeof(char*)*2);
|
||||
res = palloc(sizeof(char *) * 2);
|
||||
txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
res=NormalizeWord(&(d->obj), txt);
|
||||
res = NormalizeWord(&(d->obj), txt);
|
||||
pfree(txt);
|
||||
|
||||
if ( res==NULL )
|
||||
if (res == NULL)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
ptr=cptr=res;
|
||||
while(*ptr) {
|
||||
if ( searchstoplist(&(d->stoplist),*ptr) ) {
|
||||
ptr = cptr = res;
|
||||
while (*ptr)
|
||||
{
|
||||
if (searchstoplist(&(d->stoplist), *ptr))
|
||||
{
|
||||
pfree(*ptr);
|
||||
*ptr=NULL;
|
||||
*ptr = NULL;
|
||||
ptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*cptr = *ptr;
|
||||
cptr++;
|
||||
ptr++;
|
||||
} else {
|
||||
*cptr=*ptr;
|
||||
cptr++; ptr++;
|
||||
}
|
||||
}
|
||||
*cptr=NULL;
|
||||
*cptr = NULL;
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
/*
|
||||
* example of Snowball dictionary
|
||||
* http://snowball.tartarus.org/
|
||||
* http://snowball.tartarus.org/
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
@ -14,103 +14,118 @@
|
||||
#include "snowball/english_stem.h"
|
||||
#include "snowball/russian_stem.h"
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
struct SN_env *z;
|
||||
StopList stoplist;
|
||||
int (*stem)(struct SN_env * z);
|
||||
} DictSnowball;
|
||||
int (*stem) (struct SN_env * z);
|
||||
} DictSnowball;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(snb_en_init);
|
||||
Datum snb_en_init(PG_FUNCTION_ARGS);
|
||||
Datum snb_en_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(snb_ru_init);
|
||||
Datum snb_ru_init(PG_FUNCTION_ARGS);
|
||||
Datum snb_ru_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(snb_lexize);
|
||||
Datum snb_lexize(PG_FUNCTION_ARGS);
|
||||
Datum snb_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
snb_en_init(PG_FUNCTION_ARGS) {
|
||||
DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
|
||||
Datum
|
||||
snb_en_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
|
||||
|
||||
if ( !d )
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d,0,sizeof(DictSnowball));
|
||||
d->stoplist.wordop=lowerstr;
|
||||
|
||||
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
memset(d, 0, sizeof(DictSnowball));
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
|
||||
readstoplist(in, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
}
|
||||
|
||||
d->z = english_create_env();
|
||||
if (!d->z) {
|
||||
if (!d->z)
|
||||
{
|
||||
freestoplist(&(d->stoplist));
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
d->stem=english_stem;
|
||||
d->stem = english_stem;
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
snb_ru_init(PG_FUNCTION_ARGS) {
|
||||
DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
|
||||
Datum
|
||||
snb_ru_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
|
||||
|
||||
if ( !d )
|
||||
if (!d)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset(d,0,sizeof(DictSnowball));
|
||||
d->stoplist.wordop=lowerstr;
|
||||
|
||||
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
memset(d, 0, sizeof(DictSnowball));
|
||||
d->stoplist.wordop = lowerstr;
|
||||
|
||||
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(0);
|
||||
|
||||
readstoplist(in, &(d->stoplist));
|
||||
sortstoplist(&(d->stoplist));
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
}
|
||||
|
||||
d->z = russian_create_env();
|
||||
if (!d->z) {
|
||||
if (!d->z)
|
||||
{
|
||||
freestoplist(&(d->stoplist));
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
d->stem=russian_stem;
|
||||
d->stem = russian_stem;
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
snb_lexize(PG_FUNCTION_ARGS) {
|
||||
DictSnowball *d = (DictSnowball*)PG_GETARG_POINTER(0);
|
||||
char *in = (char*)PG_GETARG_POINTER(1);
|
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
char **res=palloc(sizeof(char*)*2);
|
||||
snb_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
char **res = palloc(sizeof(char *) * 2);
|
||||
|
||||
if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
|
||||
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
|
||||
{
|
||||
pfree(txt);
|
||||
res[0]=NULL;
|
||||
} else {
|
||||
SN_set_current(d->z, strlen(txt), txt);
|
||||
(d->stem)(d->z);
|
||||
if ( d->z->p && d->z->l ) {
|
||||
txt=repalloc(txt, d->z->l+1);
|
||||
memcpy( txt, d->z->p, d->z->l);
|
||||
txt[d->z->l]='\0';
|
||||
}
|
||||
res[0]=txt;
|
||||
res[0] = NULL;
|
||||
}
|
||||
res[1]=NULL;
|
||||
else
|
||||
{
|
||||
SN_set_current(d->z, strlen(txt), txt);
|
||||
(d->stem) (d->z);
|
||||
if (d->z->p && d->z->l)
|
||||
{
|
||||
txt = repalloc(txt, d->z->l + 1);
|
||||
memcpy(txt, d->z->p, d->z->l);
|
||||
txt[d->z->l] = '\0';
|
||||
}
|
||||
res[0] = txt;
|
||||
}
|
||||
res[1] = NULL;
|
||||
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*
|
||||
/*
|
||||
* ISpell interface
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
@ -13,93 +13,106 @@
|
||||
#include "common.h"
|
||||
|
||||
#define SYNBUFLEN 4096
|
||||
typedef struct {
|
||||
char *in;
|
||||
char *out;
|
||||
} Syn;
|
||||
typedef struct
|
||||
{
|
||||
char *in;
|
||||
char *out;
|
||||
} Syn;
|
||||
|
||||
typedef struct {
|
||||
int len;
|
||||
Syn *syn;
|
||||
} DictSyn;
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
Syn *syn;
|
||||
} DictSyn;
|
||||
|
||||
PG_FUNCTION_INFO_V1(syn_init);
|
||||
Datum syn_init(PG_FUNCTION_ARGS);
|
||||
Datum syn_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(syn_lexize);
|
||||
Datum syn_lexize(PG_FUNCTION_ARGS);
|
||||
Datum syn_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
static char *
|
||||
findwrd(char *in, char **end) {
|
||||
char *start;
|
||||
findwrd(char *in, char **end)
|
||||
{
|
||||
char *start;
|
||||
|
||||
*end=NULL;
|
||||
while(*in && isspace(*in))
|
||||
*end = NULL;
|
||||
while (*in && isspace(*in))
|
||||
in++;
|
||||
|
||||
if ( !in )
|
||||
if (!in)
|
||||
return NULL;
|
||||
start=in;
|
||||
start = in;
|
||||
|
||||
while(*in && !isspace(*in))
|
||||
while (*in && !isspace(*in))
|
||||
in++;
|
||||
|
||||
*end=in;
|
||||
*end = in;
|
||||
return start;
|
||||
}
|
||||
|
||||
static int
|
||||
compareSyn(const void *a, const void *b) {
|
||||
return strcmp( ((Syn*)a)->in, ((Syn*)b)->in );
|
||||
compareSyn(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
syn_init(PG_FUNCTION_ARGS) {
|
||||
text *in;
|
||||
DictSyn *d;
|
||||
int cur=0;
|
||||
FILE *fin;
|
||||
char *filename;
|
||||
char buf[SYNBUFLEN];
|
||||
char *starti,*starto,*end=NULL;
|
||||
int slen;
|
||||
Datum
|
||||
syn_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in;
|
||||
DictSyn *d;
|
||||
int cur = 0;
|
||||
FILE *fin;
|
||||
char *filename;
|
||||
char buf[SYNBUFLEN];
|
||||
char *starti,
|
||||
*starto,
|
||||
*end = NULL;
|
||||
int slen;
|
||||
|
||||
if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL )
|
||||
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("NULL config")));
|
||||
|
||||
in = PG_GETARG_TEXT_P(0);
|
||||
if ( VARSIZE(in) - VARHDRSZ == 0 )
|
||||
if (VARSIZE(in) - VARHDRSZ == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("VOID config")));
|
||||
|
||||
filename=text2char(in);
|
||||
filename = text2char(in);
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
if ( (fin=fopen(filename,"r")) == NULL )
|
||||
if ((fin = fopen(filename, "r")) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open file \"%s\": %m",
|
||||
filename)));
|
||||
filename)));
|
||||
|
||||
d = (DictSyn*)malloc( sizeof(DictSyn) );
|
||||
if ( !d ) {
|
||||
d = (DictSyn *) malloc(sizeof(DictSyn));
|
||||
if (!d)
|
||||
{
|
||||
fclose(fin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
memset(d,0,sizeof(DictSyn));
|
||||
memset(d, 0, sizeof(DictSyn));
|
||||
|
||||
while( fgets(buf,SYNBUFLEN,fin) ) {
|
||||
slen = strlen(buf)-1;
|
||||
while (fgets(buf, SYNBUFLEN, fin))
|
||||
{
|
||||
slen = strlen(buf) - 1;
|
||||
buf[slen] = '\0';
|
||||
if ( *buf=='\0' ) continue;
|
||||
if (cur==d->len) {
|
||||
d->len = (d->len) ? 2*d->len : 16;
|
||||
d->syn=(Syn*)realloc( d->syn, sizeof(Syn)*d->len );
|
||||
if ( !d->syn ) {
|
||||
if (*buf == '\0')
|
||||
continue;
|
||||
if (cur == d->len)
|
||||
{
|
||||
d->len = (d->len) ? 2 * d->len : 16;
|
||||
d->syn = (Syn *) realloc(d->syn, sizeof(Syn) * d->len);
|
||||
if (!d->syn)
|
||||
{
|
||||
fclose(fin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
@ -107,64 +120,66 @@ syn_init(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
}
|
||||
|
||||
starti=findwrd(buf,&end);
|
||||
if ( !starti )
|
||||
starti = findwrd(buf, &end);
|
||||
if (!starti)
|
||||
continue;
|
||||
*end='\0';
|
||||
if ( end >= buf+slen )
|
||||
*end = '\0';
|
||||
if (end >= buf + slen)
|
||||
continue;
|
||||
|
||||
starto= findwrd(end+1, &end);
|
||||
if ( !starto )
|
||||
starto = findwrd(end + 1, &end);
|
||||
if (!starto)
|
||||
continue;
|
||||
*end='\0';
|
||||
*end = '\0';
|
||||
|
||||
d->syn[cur].in=strdup(lowerstr(starti));
|
||||
d->syn[cur].out=strdup(lowerstr(starto));
|
||||
if ( !(d->syn[cur].in && d->syn[cur].out) ) {
|
||||
d->syn[cur].in = strdup(lowerstr(starti));
|
||||
d->syn[cur].out = strdup(lowerstr(starto));
|
||||
if (!(d->syn[cur].in && d->syn[cur].out))
|
||||
{
|
||||
fclose(fin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
|
||||
cur++;
|
||||
cur++;
|
||||
}
|
||||
|
||||
fclose(fin);
|
||||
|
||||
d->len=cur;
|
||||
if ( cur>1 )
|
||||
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
|
||||
|
||||
fclose(fin);
|
||||
|
||||
d->len = cur;
|
||||
if (cur > 1)
|
||||
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
|
||||
|
||||
pfree(filename);
|
||||
PG_RETURN_POINTER(d);
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
syn_lexize(PG_FUNCTION_ARGS) {
|
||||
DictSyn *d = (DictSyn*)PG_GETARG_POINTER(0);
|
||||
char *in = (char*)PG_GETARG_POINTER(1);
|
||||
Syn key,*found;
|
||||
char **res=NULL;
|
||||
syn_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
Syn key,
|
||||
*found;
|
||||
char **res = NULL;
|
||||
|
||||
if ( !PG_GETARG_INT32(2) )
|
||||
if (!PG_GETARG_INT32(2))
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
key.out=NULL;
|
||||
key.in=lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
|
||||
key.out = NULL;
|
||||
key.in = lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
|
||||
|
||||
found=(Syn*)bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
|
||||
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
|
||||
pfree(key.in);
|
||||
|
||||
if ( !found )
|
||||
if (!found)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
res=palloc(sizeof(char*)*2);
|
||||
res = palloc(sizeof(char *) * 2);
|
||||
|
||||
res[0]=pstrdup(found->out);
|
||||
res[1]=NULL;
|
||||
res[0] = pstrdup(found->out);
|
||||
res[1] = NULL;
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,48 +4,53 @@
|
||||
#include <sys/types.h>
|
||||
#include <regex.h>
|
||||
|
||||
typedef struct spell_struct {
|
||||
char * word;
|
||||
char flag[10];
|
||||
} SPELL;
|
||||
typedef struct spell_struct
|
||||
{
|
||||
char *word;
|
||||
char flag[10];
|
||||
} SPELL;
|
||||
|
||||
typedef struct aff_struct {
|
||||
char flag;
|
||||
char type;
|
||||
char mask[33];
|
||||
char find[16];
|
||||
char repl[16];
|
||||
regex_t reg;
|
||||
size_t replen;
|
||||
char compile;
|
||||
} AFFIX;
|
||||
typedef struct aff_struct
|
||||
{
|
||||
char flag;
|
||||
char type;
|
||||
char mask[33];
|
||||
char find[16];
|
||||
char repl[16];
|
||||
regex_t reg;
|
||||
size_t replen;
|
||||
char compile;
|
||||
} AFFIX;
|
||||
|
||||
typedef struct Tree_struct {
|
||||
int Left[256], Right[256];
|
||||
} Tree_struct;
|
||||
typedef struct Tree_struct
|
||||
{
|
||||
int Left[256],
|
||||
Right[256];
|
||||
} Tree_struct;
|
||||
|
||||
typedef struct {
|
||||
int maffixes;
|
||||
int naffixes;
|
||||
AFFIX * Affix;
|
||||
typedef struct
|
||||
{
|
||||
int maffixes;
|
||||
int naffixes;
|
||||
AFFIX *Affix;
|
||||
|
||||
int nspell;
|
||||
int mspell;
|
||||
SPELL *Spell;
|
||||
Tree_struct SpellTree;
|
||||
Tree_struct PrefixTree;
|
||||
Tree_struct SuffixTree;
|
||||
int nspell;
|
||||
int mspell;
|
||||
SPELL *Spell;
|
||||
Tree_struct SpellTree;
|
||||
Tree_struct PrefixTree;
|
||||
Tree_struct SuffixTree;
|
||||
|
||||
} IspellDict;
|
||||
} IspellDict;
|
||||
|
||||
char ** NormalizeWord(IspellDict * Conf,char *word);
|
||||
int ImportAffixes(IspellDict * Conf, const char *filename);
|
||||
int ImportDictionary(IspellDict * Conf,const char *filename);
|
||||
char **NormalizeWord(IspellDict * Conf, char *word);
|
||||
int ImportAffixes(IspellDict * Conf, const char *filename);
|
||||
int ImportDictionary(IspellDict * Conf, const char *filename);
|
||||
|
||||
int AddSpell(IspellDict * Conf,const char * word,const char *flag);
|
||||
int AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
|
||||
void SortDictionary(IspellDict * Conf);
|
||||
void SortAffixes(IspellDict * Conf);
|
||||
void FreeIspell (IspellDict *Conf);
|
||||
int AddSpell(IspellDict * Conf, const char *word, const char *flag);
|
||||
int AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type);
|
||||
void SortDictionary(IspellDict * Conf);
|
||||
void SortAffixes(IspellDict * Conf);
|
||||
void FreeIspell(IspellDict * Conf);
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Simple config parser
|
||||
/*
|
||||
* Simple config parser
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
@ -16,126 +16,164 @@
|
||||
#define CS_WAITEQ 2
|
||||
#define CS_WAITVALUE 3
|
||||
#define CS_INVALUE 4
|
||||
#define CS_IN2VALUE 5
|
||||
#define CS_IN2VALUE 5
|
||||
#define CS_WAITDELIM 6
|
||||
#define CS_INESC 7
|
||||
#define CS_IN2ESC 8
|
||||
|
||||
static char *
|
||||
nstrdup(char *ptr, int len) {
|
||||
char *res=palloc(len+1), *cptr;
|
||||
memcpy(res,ptr,len);
|
||||
res[len]='\0';
|
||||
nstrdup(char *ptr, int len)
|
||||
{
|
||||
char *res = palloc(len + 1),
|
||||
*cptr;
|
||||
|
||||
memcpy(res, ptr, len);
|
||||
res[len] = '\0';
|
||||
cptr = ptr = res;
|
||||
while(*ptr) {
|
||||
if ( *ptr == '\\' )
|
||||
while (*ptr)
|
||||
{
|
||||
if (*ptr == '\\')
|
||||
ptr++;
|
||||
*cptr=*ptr; ptr++; cptr++;
|
||||
*cptr = *ptr;
|
||||
ptr++;
|
||||
cptr++;
|
||||
}
|
||||
*cptr='\0';
|
||||
*cptr = '\0';
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void
|
||||
parse_cfgdict(text *in, Map **m) {
|
||||
Map *mptr;
|
||||
char *ptr=VARDATA(in), *begin=NULL;
|
||||
char num=0;
|
||||
int state=CS_WAITKEY;
|
||||
parse_cfgdict(text *in, Map ** m)
|
||||
{
|
||||
Map *mptr;
|
||||
char *ptr = VARDATA(in),
|
||||
*begin = NULL;
|
||||
char num = 0;
|
||||
int state = CS_WAITKEY;
|
||||
|
||||
while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
|
||||
if ( *ptr==',' ) num++;
|
||||
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
|
||||
{
|
||||
if (*ptr == ',')
|
||||
num++;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
*m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
|
||||
memset(mptr, 0, sizeof(Map)*(num+2) );
|
||||
ptr=VARDATA(in);
|
||||
while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
|
||||
if (state==CS_WAITKEY) {
|
||||
if (isalpha(*ptr)) {
|
||||
begin=ptr;
|
||||
state=CS_INKEY;
|
||||
} else if ( !isspace(*ptr) )
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d near \"%c\"",
|
||||
(int) (ptr-VARDATA(in)), *ptr)));
|
||||
} else if (state==CS_INKEY) {
|
||||
if ( isspace(*ptr) ) {
|
||||
mptr->key=nstrdup(begin, ptr-begin);
|
||||
state=CS_WAITEQ;
|
||||
} else if ( *ptr=='=' ) {
|
||||
mptr->key=nstrdup(begin, ptr-begin);
|
||||
state=CS_WAITVALUE;
|
||||
} else if ( !isalpha(*ptr) )
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d near \"%c\"",
|
||||
(int) (ptr-VARDATA(in)), *ptr)));
|
||||
} else if ( state==CS_WAITEQ ) {
|
||||
if ( *ptr=='=' )
|
||||
state=CS_WAITVALUE;
|
||||
else if ( !isspace(*ptr) )
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d near \"%c\"",
|
||||
(int) (ptr-VARDATA(in)), *ptr)));
|
||||
} else if ( state==CS_WAITVALUE ) {
|
||||
if ( *ptr=='"' ) {
|
||||
begin=ptr+1;
|
||||
state=CS_INVALUE;
|
||||
} else if ( !isspace(*ptr) ) {
|
||||
begin=ptr;
|
||||
state=CS_IN2VALUE;
|
||||
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
|
||||
memset(mptr, 0, sizeof(Map) * (num + 2));
|
||||
ptr = VARDATA(in);
|
||||
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
|
||||
{
|
||||
if (state == CS_WAITKEY)
|
||||
{
|
||||
if (isalpha(*ptr))
|
||||
{
|
||||
begin = ptr;
|
||||
state = CS_INKEY;
|
||||
}
|
||||
} else if ( state==CS_INVALUE ) {
|
||||
if ( *ptr=='"' ) {
|
||||
mptr->value = nstrdup(begin, ptr-begin);
|
||||
mptr++;
|
||||
state=CS_WAITDELIM;
|
||||
} else if ( *ptr=='\\' )
|
||||
state=CS_INESC;
|
||||
} else if ( state==CS_IN2VALUE ) {
|
||||
if ( isspace(*ptr) || *ptr==',' ) {
|
||||
mptr->value = nstrdup(begin, ptr-begin);
|
||||
mptr++;
|
||||
state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
|
||||
} else if ( *ptr=='\\' )
|
||||
state=CS_INESC;
|
||||
} else if ( state==CS_WAITDELIM ) {
|
||||
if ( *ptr==',' )
|
||||
state=CS_WAITKEY;
|
||||
else if ( !isspace(*ptr) )
|
||||
else if (!isspace(*ptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d near \"%c\"",
|
||||
(int) (ptr-VARDATA(in)), *ptr)));
|
||||
} else if ( state == CS_INESC ) {
|
||||
state=CS_INVALUE;
|
||||
} else if ( state == CS_IN2ESC ) {
|
||||
state=CS_IN2VALUE;
|
||||
} else
|
||||
errdetail("Syntax error in position %d near \"%c\"",
|
||||
(int) (ptr - VARDATA(in)), *ptr)));
|
||||
}
|
||||
else if (state == CS_INKEY)
|
||||
{
|
||||
if (isspace(*ptr))
|
||||
{
|
||||
mptr->key = nstrdup(begin, ptr - begin);
|
||||
state = CS_WAITEQ;
|
||||
}
|
||||
else if (*ptr == '=')
|
||||
{
|
||||
mptr->key = nstrdup(begin, ptr - begin);
|
||||
state = CS_WAITVALUE;
|
||||
}
|
||||
else if (!isalpha(*ptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d near \"%c\"",
|
||||
(int) (ptr - VARDATA(in)), *ptr)));
|
||||
}
|
||||
else if (state == CS_WAITEQ)
|
||||
{
|
||||
if (*ptr == '=')
|
||||
state = CS_WAITVALUE;
|
||||
else if (!isspace(*ptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d near \"%c\"",
|
||||
(int) (ptr - VARDATA(in)), *ptr)));
|
||||
}
|
||||
else if (state == CS_WAITVALUE)
|
||||
{
|
||||
if (*ptr == '"')
|
||||
{
|
||||
begin = ptr + 1;
|
||||
state = CS_INVALUE;
|
||||
}
|
||||
else if (!isspace(*ptr))
|
||||
{
|
||||
begin = ptr;
|
||||
state = CS_IN2VALUE;
|
||||
}
|
||||
}
|
||||
else if (state == CS_INVALUE)
|
||||
{
|
||||
if (*ptr == '"')
|
||||
{
|
||||
mptr->value = nstrdup(begin, ptr - begin);
|
||||
mptr++;
|
||||
state = CS_WAITDELIM;
|
||||
}
|
||||
else if (*ptr == '\\')
|
||||
state = CS_INESC;
|
||||
}
|
||||
else if (state == CS_IN2VALUE)
|
||||
{
|
||||
if (isspace(*ptr) || *ptr == ',')
|
||||
{
|
||||
mptr->value = nstrdup(begin, ptr - begin);
|
||||
mptr++;
|
||||
state = (*ptr == ',') ? CS_WAITKEY : CS_WAITDELIM;
|
||||
}
|
||||
else if (*ptr == '\\')
|
||||
state = CS_INESC;
|
||||
}
|
||||
else if (state == CS_WAITDELIM)
|
||||
{
|
||||
if (*ptr == ',')
|
||||
state = CS_WAITKEY;
|
||||
else if (!isspace(*ptr))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error"),
|
||||
errdetail("Syntax error in position %d near \"%c\"",
|
||||
(int) (ptr - VARDATA(in)), *ptr)));
|
||||
}
|
||||
else if (state == CS_INESC)
|
||||
state = CS_INVALUE;
|
||||
else if (state == CS_IN2ESC)
|
||||
state = CS_IN2VALUE;
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("bad parser state"),
|
||||
errdetail("%d at position %d near \"%c\"",
|
||||
state, (int) (ptr-VARDATA(in)), *ptr)));
|
||||
state, (int) (ptr - VARDATA(in)), *ptr)));
|
||||
ptr++;
|
||||
}
|
||||
|
||||
if (state==CS_IN2VALUE) {
|
||||
mptr->value = nstrdup(begin, ptr-begin);
|
||||
if (state == CS_IN2VALUE)
|
||||
{
|
||||
mptr->value = nstrdup(begin, ptr - begin);
|
||||
mptr++;
|
||||
} else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) )
|
||||
}
|
||||
else if (!(state == CS_WAITDELIM || state == CS_WAITKEY))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("unexpected end of line")));
|
||||
}
|
||||
|
||||
|
||||
|
@ -99,28 +99,40 @@ typedef struct
|
||||
TI_IN_STATE valstate;
|
||||
|
||||
/* tscfg */
|
||||
int cfg_id;
|
||||
int cfg_id;
|
||||
} QPRS_STATE;
|
||||
|
||||
static char*
|
||||
get_weight(char *buf, int2 *weight) {
|
||||
static char *
|
||||
get_weight(char *buf, int2 *weight)
|
||||
{
|
||||
*weight = 0;
|
||||
|
||||
if ( *buf != ':' )
|
||||
if (*buf != ':')
|
||||
return buf;
|
||||
|
||||
buf++;
|
||||
while( *buf ) {
|
||||
switch(tolower(*buf)) {
|
||||
case 'a': *weight |= 1<<3; break;
|
||||
case 'b': *weight |= 1<<2; break;
|
||||
case 'c': *weight |= 1<<1; break;
|
||||
case 'd': *weight |= 1; break;
|
||||
default: return buf;
|
||||
while (*buf)
|
||||
{
|
||||
switch (tolower(*buf))
|
||||
{
|
||||
case 'a':
|
||||
*weight |= 1 << 3;
|
||||
break;
|
||||
case 'b':
|
||||
*weight |= 1 << 2;
|
||||
break;
|
||||
case 'c':
|
||||
*weight |= 1 << 1;
|
||||
break;
|
||||
case 'd':
|
||||
*weight |= 1;
|
||||
break;
|
||||
default:
|
||||
return buf;
|
||||
}
|
||||
buf++;
|
||||
}
|
||||
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -146,11 +158,15 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
|
||||
state->count++;
|
||||
(state->buf)++;
|
||||
return OPEN;
|
||||
} else if ( *(state->buf) == ':' ) {
|
||||
}
|
||||
else if (*(state->buf) == ':')
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("error at start of operand")));
|
||||
} else if (*(state->buf) != ' ') {
|
||||
}
|
||||
else if (*(state->buf) != ' ')
|
||||
{
|
||||
state->valstate.prsbuf = state->buf;
|
||||
state->state = WAITOPERATOR;
|
||||
if (gettoken_tsvector(&(state->valstate)))
|
||||
@ -257,7 +273,7 @@ static void
|
||||
pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
|
||||
{
|
||||
int4 count = 0;
|
||||
PRSTEXT prs;
|
||||
PRSTEXT prs;
|
||||
|
||||
prs.lenwords = 32;
|
||||
prs.curwords = 0;
|
||||
@ -266,16 +282,17 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
|
||||
|
||||
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
|
||||
|
||||
for(count=0;count<prs.curwords;count++) {
|
||||
for (count = 0; count < prs.curwords; count++)
|
||||
{
|
||||
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
|
||||
pfree( prs.words[count].word );
|
||||
pfree(prs.words[count].word);
|
||||
if (count)
|
||||
pushquery(state, OPR, (int4) '&', 0, 0, 0 );
|
||||
}
|
||||
pushquery(state, OPR, (int4) '&', 0, 0, 0);
|
||||
}
|
||||
pfree(prs.words);
|
||||
|
||||
/* XXX */
|
||||
if ( prs.curwords==0 )
|
||||
if (prs.curwords == 0)
|
||||
pushval_asis(state, VALTRUE, 0, 0, 0);
|
||||
}
|
||||
|
||||
@ -381,15 +398,18 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
|
||||
* check weight info
|
||||
*/
|
||||
static bool
|
||||
checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
|
||||
WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
|
||||
uint16 len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
|
||||
while (len--) {
|
||||
if ( item->weight & ( 1<<ptr->weight ) )
|
||||
checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item)
|
||||
{
|
||||
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
|
||||
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
|
||||
|
||||
while (len--)
|
||||
{
|
||||
if (item->weight & (1 << ptr->weight))
|
||||
return true;
|
||||
ptr++;
|
||||
}
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -410,8 +430,8 @@ checkcondition_str(void *checkval, ITEM * val)
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
|
||||
if (difference == 0)
|
||||
return ( val->weight && StopMiddle->haspos ) ?
|
||||
checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
|
||||
return (val->weight && StopMiddle->haspos) ?
|
||||
checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
|
||||
else if (difference < 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
@ -468,7 +488,7 @@ rexectsq(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
exectsq(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
||||
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
|
||||
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
|
||||
CHKVAL chkval;
|
||||
bool result;
|
||||
@ -485,10 +505,10 @@ exectsq(PG_FUNCTION_ARGS)
|
||||
chkval.values = STRPTR(val);
|
||||
chkval.operand = GETOPERAND(query);
|
||||
result = TS_execute(
|
||||
GETQUERY(query),
|
||||
&chkval,
|
||||
true,
|
||||
checkcondition_str
|
||||
GETQUERY(query),
|
||||
&chkval,
|
||||
true,
|
||||
checkcondition_str
|
||||
);
|
||||
|
||||
PG_FREE_IF_COPY(val, 0);
|
||||
@ -534,7 +554,7 @@ findoprnd(ITEM * ptr, int4 *pos)
|
||||
* input
|
||||
*/
|
||||
static QUERYTYPE *
|
||||
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
|
||||
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
|
||||
{
|
||||
QPRS_STATE state;
|
||||
int4 i;
|
||||
@ -555,7 +575,7 @@ queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int c
|
||||
state.count = 0;
|
||||
state.num = 0;
|
||||
state.str = NULL;
|
||||
state.cfg_id=cfg_id;
|
||||
state.cfg_id = cfg_id;
|
||||
|
||||
/* init value parser's state */
|
||||
state.valstate.oprisdelim = true;
|
||||
@ -678,12 +698,30 @@ infix(INFIX * in, bool first)
|
||||
}
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
if ( in->curpol->weight ) {
|
||||
*(in->cur) = ':'; in->cur++;
|
||||
if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
|
||||
if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
|
||||
if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
|
||||
if ( in->curpol->weight & 1 ) { *(in->cur) = 'D'; in->cur++; }
|
||||
if (in->curpol->weight)
|
||||
{
|
||||
*(in->cur) = ':';
|
||||
in->cur++;
|
||||
if (in->curpol->weight & (1 << 3))
|
||||
{
|
||||
*(in->cur) = 'A';
|
||||
in->cur++;
|
||||
}
|
||||
if (in->curpol->weight & (1 << 2))
|
||||
{
|
||||
*(in->cur) = 'B';
|
||||
in->cur++;
|
||||
}
|
||||
if (in->curpol->weight & (1 << 1))
|
||||
{
|
||||
*(in->cur) = 'C';
|
||||
in->cur++;
|
||||
}
|
||||
if (in->curpol->weight & 1)
|
||||
{
|
||||
*(in->cur) = 'D';
|
||||
in->cur++;
|
||||
}
|
||||
}
|
||||
*(in->cur) = '\0';
|
||||
in->curpol++;
|
||||
@ -827,15 +865,16 @@ tsquerytree(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
Datum
|
||||
to_tsquery(PG_FUNCTION_ARGS) {
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
char *str;
|
||||
to_tsquery(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
char *str;
|
||||
QUERYTYPE *query;
|
||||
ITEM *res;
|
||||
int4 len;
|
||||
|
||||
str=text2char(in);
|
||||
PG_FREE_IF_COPY(in,1);
|
||||
str = text2char(in);
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
|
||||
query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
|
||||
res = clean_fakeval_v2(GETQUERY(query), &len);
|
||||
@ -851,25 +890,25 @@ to_tsquery(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
|
||||
Datum
|
||||
to_tsquery_name(PG_FUNCTION_ARGS) {
|
||||
text *name=PG_GETARG_TEXT_P(0);
|
||||
Datum res= DirectFunctionCall2(
|
||||
to_tsquery,
|
||||
Int32GetDatum( name2id_cfg(name) ),
|
||||
PG_GETARG_DATUM(1)
|
||||
to_tsquery_name(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
Datum res = DirectFunctionCall2(
|
||||
to_tsquery,
|
||||
Int32GetDatum(name2id_cfg(name)),
|
||||
PG_GETARG_DATUM(1)
|
||||
);
|
||||
|
||||
PG_FREE_IF_COPY(name,1);
|
||||
|
||||
PG_FREE_IF_COPY(name, 1);
|
||||
PG_RETURN_DATUM(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
to_tsquery_current(PG_FUNCTION_ARGS) {
|
||||
PG_RETURN_DATUM( DirectFunctionCall2(
|
||||
to_tsquery,
|
||||
Int32GetDatum( get_currcfg() ),
|
||||
PG_GETARG_DATUM(0)
|
||||
));
|
||||
to_tsquery_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall2(
|
||||
to_tsquery,
|
||||
Int32GetDatum(get_currcfg()),
|
||||
PG_GETARG_DATUM(0)
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
|
@ -16,10 +16,10 @@ typedef struct ITEM
|
||||
int2 left;
|
||||
int4 val;
|
||||
/* user-friendly value, must correlate with WordEntry */
|
||||
uint32
|
||||
unused:1,
|
||||
length:11,
|
||||
distance:20;
|
||||
uint32
|
||||
unused:1,
|
||||
length:11,
|
||||
distance:20;
|
||||
} ITEM;
|
||||
|
||||
/*
|
||||
@ -50,6 +50,6 @@ typedef struct
|
||||
#define VALFALSE 7
|
||||
|
||||
bool TS_execute(ITEM * curitem, void *checkval,
|
||||
bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
|
||||
bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
|
||||
|
||||
#endif
|
||||
|
@ -37,29 +37,35 @@ Datum rank_cd_def(PG_FUNCTION_ARGS);
|
||||
PG_FUNCTION_INFO_V1(get_covers);
|
||||
Datum get_covers(PG_FUNCTION_ARGS);
|
||||
|
||||
static float weights[]={0.1, 0.2, 0.4, 1.0};
|
||||
static float weights[] = {0.1, 0.2, 0.4, 1.0};
|
||||
|
||||
#define wpos(wep) ( w[ ((WordEntryPos*)(wep))->weight ] )
|
||||
|
||||
#define DEF_NORM_METHOD 0
|
||||
#define DEF_NORM_METHOD 0
|
||||
|
||||
/*
|
||||
* Returns a weight of a word collocation
|
||||
*/
|
||||
static float4 word_distance ( int4 w ) {
|
||||
if ( w>100 )
|
||||
return 1e-30;
|
||||
static float4
|
||||
word_distance(int4 w)
|
||||
{
|
||||
if (w > 100)
|
||||
return 1e-30;
|
||||
|
||||
return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
|
||||
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
|
||||
}
|
||||
|
||||
static int
|
||||
cnt_length( tsvector *t ) {
|
||||
WordEntry *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
|
||||
int len = 0, clen;
|
||||
cnt_length(tsvector * t)
|
||||
{
|
||||
WordEntry *ptr = ARRPTR(t),
|
||||
*end = (WordEntry *) STRPTR(t);
|
||||
int len = 0,
|
||||
clen;
|
||||
|
||||
while(ptr < end) {
|
||||
if ( (clen=POSDATALEN(t, ptr)) == 0 )
|
||||
while (ptr < end)
|
||||
{
|
||||
if ((clen = POSDATALEN(t, ptr)) == 0)
|
||||
len += 1;
|
||||
else
|
||||
len += clen;
|
||||
@ -70,191 +76,225 @@ cnt_length( tsvector *t ) {
|
||||
}
|
||||
|
||||
static int4
|
||||
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
|
||||
if (ptr->len == item->length)
|
||||
return strncmp(
|
||||
eval + ptr->pos,
|
||||
qval + item->distance,
|
||||
item->length);
|
||||
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
|
||||
{
|
||||
if (ptr->len == item->length)
|
||||
return strncmp(
|
||||
eval + ptr->pos,
|
||||
qval + item->distance,
|
||||
item->length);
|
||||
|
||||
return (ptr->len > item->length) ? 1 : -1;
|
||||
return (ptr->len > item->length) ? 1 : -1;
|
||||
}
|
||||
|
||||
static WordEntry*
|
||||
find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
|
||||
WordEntry *StopLow = ARRPTR(t);
|
||||
WordEntry *StopHigh = (WordEntry*)STRPTR(t);
|
||||
WordEntry *StopMiddle;
|
||||
int difference;
|
||||
static WordEntry *
|
||||
find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
|
||||
{
|
||||
WordEntry *StopLow = ARRPTR(t);
|
||||
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
||||
WordEntry *StopMiddle;
|
||||
int difference;
|
||||
|
||||
/* Loop invariant: StopLow <= item < StopHigh */
|
||||
/* Loop invariant: StopLow <= item < StopHigh */
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
||||
if (difference == 0)
|
||||
return StopMiddle;
|
||||
else if (difference < 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
||||
if (difference == 0)
|
||||
return StopMiddle;
|
||||
else if (difference < 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static WordEntryPos POSNULL[]={
|
||||
{0,0},
|
||||
{0,MAXENTRYPOS-1}
|
||||
static WordEntryPos POSNULL[] = {
|
||||
{0, 0},
|
||||
{0, MAXENTRYPOS - 1}
|
||||
};
|
||||
|
||||
static float
|
||||
calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
|
||||
uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
|
||||
int i,k,l,p;
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post,*ct;
|
||||
int4 dimt,lenct,dist;
|
||||
float res=-1.0;
|
||||
ITEM *item=GETQUERY(q);
|
||||
calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
|
||||
{
|
||||
uint16 **pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
|
||||
int i,
|
||||
k,
|
||||
l,
|
||||
p;
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post,
|
||||
*ct;
|
||||
int4 dimt,
|
||||
lenct,
|
||||
dist;
|
||||
float res = -1.0;
|
||||
ITEM *item = GETQUERY(q);
|
||||
|
||||
memset(pos,0,sizeof(uint16**) * q->size);
|
||||
*(uint16*)POSNULL = lengthof(POSNULL)-1;
|
||||
memset(pos, 0, sizeof(uint16 **) * q->size);
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
|
||||
for(i=0; i<q->size; i++) {
|
||||
|
||||
if ( item[i].type != VAL )
|
||||
for (i = 0; i < q->size; i++)
|
||||
{
|
||||
|
||||
if (item[i].type != VAL)
|
||||
continue;
|
||||
|
||||
entry=find_wordentry(t,q,&(item[i]));
|
||||
if ( !entry )
|
||||
entry = find_wordentry(t, q, &(item[i]));
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if ( entry->haspos )
|
||||
pos[i] = (uint16*)_POSDATAPTR(t,entry);
|
||||
if (entry->haspos)
|
||||
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
|
||||
else
|
||||
pos[i] = (uint16*)POSNULL;
|
||||
pos[i] = (uint16 *) POSNULL;
|
||||
|
||||
|
||||
dimt = *(uint16*)(pos[i]);
|
||||
post = (WordEntryPos*)(pos[i]+1);
|
||||
for( k=0; k<i; k++ ) {
|
||||
if ( !pos[k] ) continue;
|
||||
lenct = *(uint16*)(pos[k]);
|
||||
ct = (WordEntryPos*)(pos[k]+1);
|
||||
for(l=0; l<dimt; l++) {
|
||||
for(p=0; p<lenct; p++) {
|
||||
dist = abs( post[l].pos - ct[p].pos );
|
||||
if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
|
||||
float curw;
|
||||
if ( !dist ) dist=MAXENTRYPOS;
|
||||
curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
|
||||
res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
|
||||
dimt = *(uint16 *) (pos[i]);
|
||||
post = (WordEntryPos *) (pos[i] + 1);
|
||||
for (k = 0; k < i; k++)
|
||||
{
|
||||
if (!pos[k])
|
||||
continue;
|
||||
lenct = *(uint16 *) (pos[k]);
|
||||
ct = (WordEntryPos *) (pos[k] + 1);
|
||||
for (l = 0; l < dimt; l++)
|
||||
{
|
||||
for (p = 0; p < lenct; p++)
|
||||
{
|
||||
dist = abs(post[l].pos - ct[p].pos);
|
||||
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
|
||||
{
|
||||
float curw;
|
||||
|
||||
if (!dist)
|
||||
dist = MAXENTRYPOS;
|
||||
curw = sqrt(wpos(&(post[l])) * wpos(&(ct[p])) * word_distance(dist));
|
||||
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pfree(pos);
|
||||
return res;
|
||||
return res;
|
||||
}
|
||||
|
||||
static float
|
||||
calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,j,i;
|
||||
float res=-1.0;
|
||||
ITEM *item=GETQUERY(q);
|
||||
calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
|
||||
{
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
float res = -1.0;
|
||||
ITEM *item = GETQUERY(q);
|
||||
|
||||
*(uint16*)POSNULL = lengthof(POSNULL)-1;
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
|
||||
for(i=0; i<q->size; i++) {
|
||||
if ( item[i].type != VAL )
|
||||
for (i = 0; i < q->size; i++)
|
||||
{
|
||||
if (item[i].type != VAL)
|
||||
continue;
|
||||
|
||||
entry=find_wordentry(t,q,&(item[i]));
|
||||
if ( !entry )
|
||||
entry = find_wordentry(t, q, &(item[i]));
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if ( entry->haspos ) {
|
||||
dimt = POSDATALEN(t,entry);
|
||||
post = POSDATAPTR(t,entry);
|
||||
} else {
|
||||
dimt = *(uint16*)POSNULL;
|
||||
post = POSNULL+1;
|
||||
if (entry->haspos)
|
||||
{
|
||||
dimt = POSDATALEN(t, entry);
|
||||
post = POSDATAPTR(t, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = *(uint16 *) POSNULL;
|
||||
post = POSNULL + 1;
|
||||
}
|
||||
|
||||
for(j=0;j<dimt;j++) {
|
||||
if ( res < 0 )
|
||||
res = wpos( &(post[j]) );
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
if (res < 0)
|
||||
res = wpos(&(post[j]));
|
||||
else
|
||||
res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
|
||||
res = 1.0 - (1.0 - res) * (1.0 - wpos(&(post[j])));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static float
|
||||
calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
|
||||
ITEM *item = GETQUERY(q);
|
||||
float res=0.0;
|
||||
calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
|
||||
{
|
||||
ITEM *item = GETQUERY(q);
|
||||
float res = 0.0;
|
||||
|
||||
if (!t->size || !q->size)
|
||||
return 0.0;
|
||||
|
||||
res = ( item->type != VAL && item->val == (int4) '&' ) ?
|
||||
calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
|
||||
res = (item->type != VAL && item->val == (int4) '&') ?
|
||||
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
|
||||
|
||||
if ( res < 0 )
|
||||
if (res < 0)
|
||||
res = 1e-20;
|
||||
|
||||
switch(method) {
|
||||
case 0: break;
|
||||
case 1: res /= log((float)cnt_length(t)); break;
|
||||
case 2: res /= (float)cnt_length(t); break;
|
||||
switch (method)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
res /= log((float) cnt_length(t));
|
||||
break;
|
||||
case 2:
|
||||
res /= (float) cnt_length(t);
|
||||
break;
|
||||
default:
|
||||
/* internal error */
|
||||
elog(ERROR,"unrecognized normalization method: %d", method);
|
||||
}
|
||||
/* internal error */
|
||||
elog(ERROR, "unrecognized normalization method: %d", method);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Datum
|
||||
rank(PG_FUNCTION_ARGS) {
|
||||
rank(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
int method=DEF_NORM_METHOD;
|
||||
float res=0.0;
|
||||
float ws[ lengthof(weights) ];
|
||||
int i;
|
||||
int method = DEF_NORM_METHOD;
|
||||
float res = 0.0;
|
||||
float ws[lengthof(weights)];
|
||||
int i;
|
||||
|
||||
if ( ARR_NDIM(win) != 1 )
|
||||
if (ARR_NDIM(win) != 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight must be one-dimensional")));
|
||||
|
||||
if ( ARRNELEMS(win) < lengthof(weights) )
|
||||
if (ARRNELEMS(win) < lengthof(weights))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight is too short")));
|
||||
|
||||
for(i=0;i<lengthof(weights);i++) {
|
||||
ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
|
||||
if ( ws[ i ] > 1.0 )
|
||||
for (i = 0; i < lengthof(weights); i++)
|
||||
{
|
||||
ws[i] = (((float4 *) ARR_DATA_PTR(win))[i] >= 0) ? ((float4 *) ARR_DATA_PTR(win))[i] : weights[i];
|
||||
if (ws[i] > 1.0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("weight out of range")));
|
||||
}
|
||||
}
|
||||
|
||||
if ( PG_NARGS() == 4 )
|
||||
method=PG_GETARG_INT32(3);
|
||||
if (PG_NARGS() == 4)
|
||||
method = PG_GETARG_INT32(3);
|
||||
|
||||
res = calc_rank(ws, txt, query, method);
|
||||
|
||||
res=calc_rank(ws, txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
@ -262,108 +302,127 @@ rank(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
|
||||
Datum
|
||||
rank_def(PG_FUNCTION_ARGS) {
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
rank_def(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
float res=0.0;
|
||||
int method=DEF_NORM_METHOD;
|
||||
float res = 0.0;
|
||||
int method = DEF_NORM_METHOD;
|
||||
|
||||
if ( PG_NARGS() == 3 )
|
||||
method=PG_GETARG_INT32(2);
|
||||
if (PG_NARGS() == 3)
|
||||
method = PG_GETARG_INT32(2);
|
||||
|
||||
res = calc_rank(weights, txt, query, method);
|
||||
|
||||
res=calc_rank(weights, txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
|
||||
typedef struct {
|
||||
ITEM *item;
|
||||
int32 pos;
|
||||
} DocRepresentation;
|
||||
typedef struct
|
||||
{
|
||||
ITEM *item;
|
||||
int32 pos;
|
||||
} DocRepresentation;
|
||||
|
||||
static int
|
||||
compareDocR(const void *a, const void *b) {
|
||||
if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
|
||||
compareDocR(const void *a, const void *b)
|
||||
{
|
||||
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
|
||||
return 1;
|
||||
return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
|
||||
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
DocRepresentation *doc;
|
||||
int len;
|
||||
int len;
|
||||
} ChkDocR;
|
||||
|
||||
static bool
|
||||
checkcondition_DR(void *checkval, ITEM *val) {
|
||||
DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
|
||||
checkcondition_DR(void *checkval, ITEM * val)
|
||||
{
|
||||
DocRepresentation *ptr = ((ChkDocR *) checkval)->doc;
|
||||
|
||||
while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
|
||||
if ( val == ptr->item )
|
||||
while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len)
|
||||
{
|
||||
if (val == ptr->item)
|
||||
return true;
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
|
||||
int i;
|
||||
DocRepresentation *ptr,*f=(DocRepresentation*)0xffffffff;
|
||||
ITEM *item=GETQUERY(query);
|
||||
int lastpos=*pos;
|
||||
int oldq=*q;
|
||||
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q)
|
||||
{
|
||||
int i;
|
||||
DocRepresentation *ptr,
|
||||
*f = (DocRepresentation *) 0xffffffff;
|
||||
ITEM *item = GETQUERY(query);
|
||||
int lastpos = *pos;
|
||||
int oldq = *q;
|
||||
|
||||
*p=0x7fffffff;
|
||||
*q=0;
|
||||
*p = 0x7fffffff;
|
||||
*q = 0;
|
||||
|
||||
for(i=0; i<query->size; i++) {
|
||||
if ( item->type != VAL ) {
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type != VAL)
|
||||
{
|
||||
item++;
|
||||
continue;
|
||||
}
|
||||
ptr = doc + *pos;
|
||||
|
||||
while(ptr-doc<len) {
|
||||
if ( ptr->item == item ) {
|
||||
if ( ptr->pos > *q ) {
|
||||
while (ptr - doc < len)
|
||||
{
|
||||
if (ptr->item == item)
|
||||
{
|
||||
if (ptr->pos > *q)
|
||||
{
|
||||
*q = ptr->pos;
|
||||
lastpos= ptr - doc;
|
||||
}
|
||||
lastpos = ptr - doc;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
item++;
|
||||
}
|
||||
|
||||
if (*q==0 )
|
||||
if (*q == 0)
|
||||
return false;
|
||||
|
||||
if (*q==oldq) { /* already check this pos */
|
||||
if (*q == oldq)
|
||||
{ /* already check this pos */
|
||||
(*pos)++;
|
||||
return Cover(doc, len, query, pos,p,q);
|
||||
}
|
||||
return Cover(doc, len, query, pos, p, q);
|
||||
}
|
||||
|
||||
item=GETQUERY(query);
|
||||
for(i=0; i<query->size; i++) {
|
||||
if ( item->type != VAL ) {
|
||||
item = GETQUERY(query);
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type != VAL)
|
||||
{
|
||||
item++;
|
||||
continue;
|
||||
}
|
||||
ptr = doc + lastpos;
|
||||
|
||||
while(ptr>=doc+*pos) {
|
||||
if ( ptr->item == item ) {
|
||||
if ( ptr->pos < *p ) {
|
||||
while (ptr >= doc + *pos)
|
||||
{
|
||||
if (ptr->item == item)
|
||||
{
|
||||
if (ptr->pos < *p)
|
||||
{
|
||||
*p = ptr->pos;
|
||||
f=ptr;
|
||||
f = ptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -371,106 +430,135 @@ Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *
|
||||
}
|
||||
item++;
|
||||
}
|
||||
|
||||
if ( *p<=*q ) {
|
||||
ChkDocR ch = { f, (doc + lastpos)-f+1 };
|
||||
*pos = f-doc+1;
|
||||
if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) {
|
||||
/*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/
|
||||
|
||||
if (*p <= *q)
|
||||
{
|
||||
ChkDocR ch = {f, (doc + lastpos) - f + 1};
|
||||
|
||||
*pos = f - doc + 1;
|
||||
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR))
|
||||
{
|
||||
/*
|
||||
* elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p,
|
||||
* *q);
|
||||
*/
|
||||
return true;
|
||||
} else
|
||||
return Cover(doc, len, query, pos,p,q);
|
||||
}
|
||||
else
|
||||
return Cover(doc, len, query, pos, p, q);
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static DocRepresentation*
|
||||
get_docrep(tsvector *txt, QUERYTYPE *query, int *doclen) {
|
||||
ITEM *item=GETQUERY(query);
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,j,i;
|
||||
int len=query->size*4,cur=0;
|
||||
static DocRepresentation *
|
||||
get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
|
||||
{
|
||||
ITEM *item = GETQUERY(query);
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
int len = query->size * 4,
|
||||
cur = 0;
|
||||
DocRepresentation *doc;
|
||||
|
||||
*(uint16*)POSNULL = lengthof(POSNULL)-1;
|
||||
doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
|
||||
for(i=0; i<query->size; i++) {
|
||||
if ( item[i].type != VAL )
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item[i].type != VAL)
|
||||
continue;
|
||||
|
||||
entry=find_wordentry(txt,query,&(item[i]));
|
||||
if ( !entry )
|
||||
entry = find_wordentry(txt, query, &(item[i]));
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if ( entry->haspos ) {
|
||||
dimt = POSDATALEN(txt,entry);
|
||||
post = POSDATAPTR(txt,entry);
|
||||
} else {
|
||||
dimt = *(uint16*)POSNULL;
|
||||
post = POSNULL+1;
|
||||
if (entry->haspos)
|
||||
{
|
||||
dimt = POSDATALEN(txt, entry);
|
||||
post = POSDATAPTR(txt, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = *(uint16 *) POSNULL;
|
||||
post = POSNULL + 1;
|
||||
}
|
||||
|
||||
while( cur+dimt >= len ) {
|
||||
len*=2;
|
||||
doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
|
||||
while (cur + dimt >= len)
|
||||
{
|
||||
len *= 2;
|
||||
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
||||
}
|
||||
|
||||
for(j=0;j<dimt;j++) {
|
||||
doc[cur].item=&(item[i]);
|
||||
doc[cur].pos=post[j].pos;
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
doc[cur].item = &(item[i]);
|
||||
doc[cur].pos = post[j].pos;
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
*doclen=cur;
|
||||
|
||||
if ( cur>0 ) {
|
||||
if ( cur>1 )
|
||||
*doclen = cur;
|
||||
|
||||
if (cur > 0)
|
||||
{
|
||||
if (cur > 1)
|
||||
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
pfree(doc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
rank_cd(PG_FUNCTION_ARGS) {
|
||||
int K = PG_GETARG_INT32(0);
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
rank_cd(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int K = PG_GETARG_INT32(0);
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
int method=DEF_NORM_METHOD;
|
||||
DocRepresentation *doc;
|
||||
float res=0.0;
|
||||
int p=0,q=0,len,cur;
|
||||
int method = DEF_NORM_METHOD;
|
||||
DocRepresentation *doc;
|
||||
float res = 0.0;
|
||||
int p = 0,
|
||||
q = 0,
|
||||
len,
|
||||
cur;
|
||||
|
||||
doc = get_docrep(txt, query, &len);
|
||||
if ( !doc ) {
|
||||
if (!doc)
|
||||
{
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_FLOAT4(0.0);
|
||||
}
|
||||
|
||||
cur=0;
|
||||
if (K<=0)
|
||||
K=4;
|
||||
while( Cover(doc, len, query, &cur, &p, &q) )
|
||||
res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
|
||||
cur = 0;
|
||||
if (K <= 0)
|
||||
K = 4;
|
||||
while (Cover(doc, len, query, &cur, &p, &q))
|
||||
res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0;
|
||||
|
||||
if ( PG_NARGS() == 4 )
|
||||
method=PG_GETARG_INT32(3);
|
||||
if (PG_NARGS() == 4)
|
||||
method = PG_GETARG_INT32(3);
|
||||
|
||||
switch(method) {
|
||||
case 0: break;
|
||||
case 1: res /= log((float)cnt_length(txt)); break;
|
||||
case 2: res /= (float)cnt_length(txt); break;
|
||||
switch (method)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
res /= log((float) cnt_length(txt));
|
||||
break;
|
||||
case 2:
|
||||
res /= (float) cnt_length(txt);
|
||||
break;
|
||||
default:
|
||||
/* internal error */
|
||||
elog(ERROR,"unrecognized normalization method: %d", method);
|
||||
}
|
||||
/* internal error */
|
||||
elog(ERROR, "unrecognized normalization method: %d", method);
|
||||
}
|
||||
|
||||
pfree(doc);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
@ -481,120 +569,141 @@ rank_cd(PG_FUNCTION_ARGS) {
|
||||
|
||||
|
||||
Datum
|
||||
rank_cd_def(PG_FUNCTION_ARGS) {
|
||||
PG_RETURN_DATUM( DirectFunctionCall4(
|
||||
rank_cd,
|
||||
Int32GetDatum(-1),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
|
||||
));
|
||||
rank_cd_def(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall4(
|
||||
rank_cd,
|
||||
Int32GetDatum(-1),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
(PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
|
||||
));
|
||||
}
|
||||
|
||||
/**************debug*************/
|
||||
|
||||
typedef struct {
|
||||
char *w;
|
||||
int2 len;
|
||||
int2 pos;
|
||||
int2 start;
|
||||
int2 finish;
|
||||
} DocWord;
|
||||
typedef struct
|
||||
{
|
||||
char *w;
|
||||
int2 len;
|
||||
int2 pos;
|
||||
int2 start;
|
||||
int2 finish;
|
||||
} DocWord;
|
||||
|
||||
static int
|
||||
compareDocWord(const void *a, const void *b) {
|
||||
if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
|
||||
compareDocWord(const void *a, const void *b)
|
||||
{
|
||||
if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
|
||||
return 1;
|
||||
return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
|
||||
return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
get_covers(PG_FUNCTION_ARGS) {
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
Datum
|
||||
get_covers(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
WordEntry *pptr=ARRPTR(txt);
|
||||
int i,dlen=0,j,cur=0,len=0,rlen;
|
||||
DocWord *dw,*dwptr;
|
||||
text *out;
|
||||
char *cptr;
|
||||
WordEntry *pptr = ARRPTR(txt);
|
||||
int i,
|
||||
dlen = 0,
|
||||
j,
|
||||
cur = 0,
|
||||
len = 0,
|
||||
rlen;
|
||||
DocWord *dw,
|
||||
*dwptr;
|
||||
text *out;
|
||||
char *cptr;
|
||||
DocRepresentation *doc;
|
||||
int pos=0,p,q,olddwpos=0;
|
||||
int ncover=1;
|
||||
int pos = 0,
|
||||
p,
|
||||
q,
|
||||
olddwpos = 0;
|
||||
int ncover = 1;
|
||||
|
||||
doc = get_docrep(txt, query, &rlen);
|
||||
|
||||
if ( !doc ) {
|
||||
out=palloc(VARHDRSZ);
|
||||
if (!doc)
|
||||
{
|
||||
out = palloc(VARHDRSZ);
|
||||
VARATT_SIZEP(out) = VARHDRSZ;
|
||||
PG_FREE_IF_COPY(txt,0);
|
||||
PG_FREE_IF_COPY(query,1);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
for(i=0;i<txt->size;i++) {
|
||||
for (i = 0; i < txt->size; i++)
|
||||
{
|
||||
if (!pptr[i].haspos)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("no pos info")));
|
||||
dlen += POSDATALEN(txt,&(pptr[i]));
|
||||
dlen += POSDATALEN(txt, &(pptr[i]));
|
||||
}
|
||||
|
||||
dwptr=dw=palloc(sizeof(DocWord)*dlen);
|
||||
memset(dw,0,sizeof(DocWord)*dlen);
|
||||
dwptr = dw = palloc(sizeof(DocWord) * dlen);
|
||||
memset(dw, 0, sizeof(DocWord) * dlen);
|
||||
|
||||
for(i=0;i<txt->size;i++) {
|
||||
WordEntryPos *posdata = POSDATAPTR(txt,&(pptr[i]));
|
||||
for(j=0;j<POSDATALEN(txt,&(pptr[i]));j++) {
|
||||
dw[cur].w=STRPTR(txt)+pptr[i].pos;
|
||||
dw[cur].len=pptr[i].len;
|
||||
dw[cur].pos=posdata[j].pos;
|
||||
for (i = 0; i < txt->size; i++)
|
||||
{
|
||||
WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
|
||||
|
||||
for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
|
||||
{
|
||||
dw[cur].w = STRPTR(txt) + pptr[i].pos;
|
||||
dw[cur].len = pptr[i].len;
|
||||
dw[cur].pos = posdata[j].pos;
|
||||
cur++;
|
||||
}
|
||||
len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
|
||||
len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
|
||||
}
|
||||
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
|
||||
|
||||
while( Cover(doc, rlen, query, &pos, &p, &q) ) {
|
||||
dwptr=dw+olddwpos;
|
||||
while(dwptr->pos < p && dwptr-dw<dlen)
|
||||
while (Cover(doc, rlen, query, &pos, &p, &q))
|
||||
{
|
||||
dwptr = dw + olddwpos;
|
||||
while (dwptr->pos < p && dwptr - dw < dlen)
|
||||
dwptr++;
|
||||
olddwpos=dwptr-dw;
|
||||
dwptr->start=ncover;
|
||||
while(dwptr->pos < q+1 && dwptr-dw<dlen)
|
||||
olddwpos = dwptr - dw;
|
||||
dwptr->start = ncover;
|
||||
while (dwptr->pos < q + 1 && dwptr - dw < dlen)
|
||||
dwptr++;
|
||||
(dwptr-1)->finish=ncover;
|
||||
len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
|
||||
ncover++;
|
||||
}
|
||||
|
||||
out=palloc(VARHDRSZ+len);
|
||||
cptr=((char*)out)+VARHDRSZ;
|
||||
dwptr=dw;
|
||||
(dwptr - 1)->finish = ncover;
|
||||
len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
|
||||
ncover++;
|
||||
}
|
||||
|
||||
while( dwptr-dw < dlen) {
|
||||
if ( dwptr->start ) {
|
||||
sprintf(cptr,"{%d ",dwptr->start);
|
||||
cptr=strchr(cptr,'\0');
|
||||
out = palloc(VARHDRSZ + len);
|
||||
cptr = ((char *) out) + VARHDRSZ;
|
||||
dwptr = dw;
|
||||
|
||||
while (dwptr - dw < dlen)
|
||||
{
|
||||
if (dwptr->start)
|
||||
{
|
||||
sprintf(cptr, "{%d ", dwptr->start);
|
||||
cptr = strchr(cptr, '\0');
|
||||
}
|
||||
memcpy(cptr,dwptr->w,dwptr->len);
|
||||
cptr+=dwptr->len;
|
||||
*cptr=' ';
|
||||
memcpy(cptr, dwptr->w, dwptr->len);
|
||||
cptr += dwptr->len;
|
||||
*cptr = ' ';
|
||||
cptr++;
|
||||
if ( dwptr->finish ) {
|
||||
sprintf(cptr,"}%d ",dwptr->finish);
|
||||
cptr=strchr(cptr,'\0');
|
||||
if (dwptr->finish)
|
||||
{
|
||||
sprintf(cptr, "}%d ", dwptr->finish);
|
||||
cptr = strchr(cptr, '\0');
|
||||
}
|
||||
dwptr++;
|
||||
}
|
||||
}
|
||||
|
||||
VARATT_SIZEP(out) = cptr - ((char *) out);
|
||||
|
||||
VARATT_SIZEP(out) = cptr - ((char*)out);
|
||||
|
||||
pfree(dw);
|
||||
pfree(doc);
|
||||
|
||||
PG_FREE_IF_COPY(txt,0);
|
||||
PG_FREE_IF_COPY(query,1);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*
|
||||
/*
|
||||
* simple but fast map from str to Oid
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
@ -11,69 +11,85 @@
|
||||
#include "common.h"
|
||||
|
||||
static int
|
||||
compareSNMapEntry(const void *a, const void *b) {
|
||||
return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
|
||||
compareSNMapEntry(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(((SNMapEntry *) a)->key, ((SNMapEntry *) b)->key);
|
||||
}
|
||||
|
||||
void
|
||||
addSNMap( SNMap *map, char *key, Oid value ) {
|
||||
if (map->len>=map->reallen) {
|
||||
void
|
||||
addSNMap(SNMap * map, char *key, Oid value)
|
||||
{
|
||||
if (map->len >= map->reallen)
|
||||
{
|
||||
SNMapEntry *tmp;
|
||||
int len = (map->reallen) ? 2*map->reallen : 16;
|
||||
tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
|
||||
if ( !tmp )
|
||||
int len = (map->reallen) ? 2 * map->reallen : 16;
|
||||
|
||||
tmp = (SNMapEntry *) realloc(map->list, sizeof(SNMapEntry) * len);
|
||||
if (!tmp)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
map->reallen=len;
|
||||
map->list=tmp;
|
||||
map->reallen = len;
|
||||
map->list = tmp;
|
||||
}
|
||||
map->list[ map->len ].key = strdup(key);
|
||||
if ( ! map->list[ map->len ].key )
|
||||
map->list[map->len].key = strdup(key);
|
||||
if (!map->list[map->len].key)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
map->list[ map->len ].value=value;
|
||||
map->list[map->len].value = value;
|
||||
map->len++;
|
||||
if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
|
||||
if (map->len > 1)
|
||||
qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
|
||||
}
|
||||
|
||||
void
|
||||
addSNMap_t( SNMap *map, text *key, Oid value ) {
|
||||
char *k=text2char( key );
|
||||
void
|
||||
addSNMap_t(SNMap * map, text *key, Oid value)
|
||||
{
|
||||
char *k = text2char(key);
|
||||
|
||||
addSNMap(map, k, value);
|
||||
pfree(k);
|
||||
}
|
||||
|
||||
Oid
|
||||
findSNMap( SNMap *map, char *key ) {
|
||||
Oid
|
||||
findSNMap(SNMap * map, char *key)
|
||||
{
|
||||
SNMapEntry *ptr;
|
||||
SNMapEntry ks = {key, 0};
|
||||
if ( map->len==0 || !map->list )
|
||||
return 0;
|
||||
ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
|
||||
SNMapEntry ks = {key, 0};
|
||||
|
||||
if (map->len == 0 || !map->list)
|
||||
return 0;
|
||||
ptr = (SNMapEntry *) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
|
||||
return (ptr) ? ptr->value : 0;
|
||||
}
|
||||
|
||||
Oid
|
||||
findSNMap_t( SNMap *map, text *key ) {
|
||||
char *k=text2char(key);
|
||||
int res;
|
||||
res= findSNMap(map, k);
|
||||
Oid
|
||||
findSNMap_t(SNMap * map, text *key)
|
||||
{
|
||||
char *k = text2char(key);
|
||||
int res;
|
||||
|
||||
res = findSNMap(map, k);
|
||||
pfree(k);
|
||||
return res;
|
||||
}
|
||||
|
||||
void freeSNMap( SNMap *map ) {
|
||||
SNMapEntry *entry=map->list;
|
||||
if ( map->list ) {
|
||||
while( map->len ) {
|
||||
if ( entry->key ) free(entry->key);
|
||||
entry++; map->len--;
|
||||
void
|
||||
freeSNMap(SNMap * map)
|
||||
{
|
||||
SNMapEntry *entry = map->list;
|
||||
|
||||
if (map->list)
|
||||
{
|
||||
while (map->len)
|
||||
{
|
||||
if (entry->key)
|
||||
free(entry->key);
|
||||
entry++;
|
||||
map->len--;
|
||||
}
|
||||
free( map->list );
|
||||
free(map->list);
|
||||
}
|
||||
memset(map,0,sizeof(SNMap));
|
||||
memset(map, 0, sizeof(SNMap));
|
||||
}
|
||||
|
||||
|
||||
|
@ -3,21 +3,23 @@
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
typedef struct {
|
||||
char *key;
|
||||
Oid value;
|
||||
} SNMapEntry;
|
||||
typedef struct
|
||||
{
|
||||
char *key;
|
||||
Oid value;
|
||||
} SNMapEntry;
|
||||
|
||||
typedef struct {
|
||||
int len;
|
||||
int reallen;
|
||||
SNMapEntry *list;
|
||||
} SNMap;
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
int reallen;
|
||||
SNMapEntry *list;
|
||||
} SNMap;
|
||||
|
||||
void addSNMap( SNMap *map, char *key, Oid value );
|
||||
void addSNMap_t( SNMap *map, text *key, Oid value );
|
||||
Oid findSNMap( SNMap *map, char *key );
|
||||
Oid findSNMap_t( SNMap *map, text *key );
|
||||
void freeSNMap( SNMap *map );
|
||||
void addSNMap(SNMap * map, char *key, Oid value);
|
||||
void addSNMap_t(SNMap * map, text *key, Oid value);
|
||||
Oid findSNMap(SNMap * map, char *key);
|
||||
Oid findSNMap_t(SNMap * map, text *key);
|
||||
void freeSNMap(SNMap * map);
|
||||
|
||||
#endif
|
||||
|
@ -2,48 +2,64 @@
|
||||
|
||||
#include "header.h"
|
||||
|
||||
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
|
||||
{ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
|
||||
z->p = create_s();
|
||||
if (S_size)
|
||||
{ z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
|
||||
{ int i;
|
||||
for (i = 0; i < S_size; i++) z->S[i] = create_s();
|
||||
}
|
||||
z->S_size = S_size;
|
||||
}
|
||||
|
||||
if (I_size)
|
||||
{ z->I = (int *) calloc(I_size, sizeof(int));
|
||||
z->I_size = I_size;
|
||||
}
|
||||
|
||||
if (B_size)
|
||||
{ z->B = (symbol *) calloc(B_size, sizeof(symbol));
|
||||
z->B_size = B_size;
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
extern void SN_close_env(struct SN_env * z)
|
||||
extern struct SN_env *
|
||||
SN_create_env(int S_size, int I_size, int B_size)
|
||||
{
|
||||
if (z->S_size)
|
||||
{
|
||||
{ int i;
|
||||
for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
|
||||
}
|
||||
free(z->S);
|
||||
}
|
||||
if (z->I_size) free(z->I);
|
||||
if (z->B_size) free(z->B);
|
||||
if (z->p) lose_s(z->p);
|
||||
free(z);
|
||||
struct SN_env *z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
|
||||
|
||||
z->p = create_s();
|
||||
if (S_size)
|
||||
{
|
||||
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < S_size; i++)
|
||||
z->S[i] = create_s();
|
||||
}
|
||||
z->S_size = S_size;
|
||||
}
|
||||
|
||||
if (I_size)
|
||||
{
|
||||
z->I = (int *) calloc(I_size, sizeof(int));
|
||||
z->I_size = I_size;
|
||||
}
|
||||
|
||||
if (B_size)
|
||||
{
|
||||
z->B = (symbol *) calloc(B_size, sizeof(symbol));
|
||||
z->B_size = B_size;
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
|
||||
extern void
|
||||
SN_close_env(struct SN_env * z)
|
||||
{
|
||||
replace_s(z, 0, z->l, size, s);
|
||||
z->c = 0;
|
||||
if (z->S_size)
|
||||
{
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < z->S_size; i++)
|
||||
lose_s(z->S[i]);
|
||||
}
|
||||
free(z->S);
|
||||
}
|
||||
if (z->I_size)
|
||||
free(z->I);
|
||||
if (z->B_size)
|
||||
free(z->B);
|
||||
if (z->p)
|
||||
lose_s(z->p);
|
||||
free(z);
|
||||
}
|
||||
|
||||
extern void
|
||||
SN_set_current(struct SN_env * z, int size, const symbol * s)
|
||||
{
|
||||
replace_s(z, 0, z->l, size, s);
|
||||
z->c = 0;
|
||||
}
|
||||
|
@ -11,17 +11,24 @@ typedef unsigned char symbol;
|
||||
|
||||
*/
|
||||
|
||||
struct SN_env {
|
||||
symbol * p;
|
||||
int c; int a; int l; int lb; int bra; int ket;
|
||||
int S_size; int I_size; int B_size;
|
||||
symbol * * S;
|
||||
int * I;
|
||||
symbol * B;
|
||||
struct SN_env
|
||||
{
|
||||
symbol *p;
|
||||
int c;
|
||||
int a;
|
||||
int l;
|
||||
int lb;
|
||||
int bra;
|
||||
int ket;
|
||||
int S_size;
|
||||
int I_size;
|
||||
int B_size;
|
||||
symbol **S;
|
||||
int *I;
|
||||
symbol *B;
|
||||
};
|
||||
|
||||
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
|
||||
extern struct SN_env *SN_create_env(int S_size, int I_size, int B_size);
|
||||
extern void SN_close_env(struct SN_env * z);
|
||||
|
||||
extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,7 @@
|
||||
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
extern struct SN_env * english_create_env(void);
|
||||
extern struct SN_env *english_create_env(void);
|
||||
extern void english_close_env(struct SN_env * z);
|
||||
|
||||
extern int english_stem(struct SN_env * z);
|
||||
|
||||
extern int english_stem(struct SN_env * z);
|
||||
|
@ -2,41 +2,42 @@
|
||||
|
||||
#define HEAD 2*sizeof(int)
|
||||
|
||||
#define SIZE(p) ((int *)(p))[-1]
|
||||
#define SIZE(p) ((int *)(p))[-1]
|
||||
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
|
||||
#define CAPACITY(p) ((int *)(p))[-2]
|
||||
|
||||
struct among
|
||||
{ int s_size; /* number of chars in string */
|
||||
symbol * s; /* search string */
|
||||
int substring_i;/* index to longest matching substring */
|
||||
int result; /* result of the lookup */
|
||||
int (* function)(struct SN_env *);
|
||||
{
|
||||
int s_size; /* number of chars in string */
|
||||
symbol *s; /* search string */
|
||||
int substring_i; /* index to longest matching substring */
|
||||
int result; /* result of the lookup */
|
||||
int (*function) (struct SN_env *);
|
||||
};
|
||||
|
||||
extern symbol * create_s(void);
|
||||
extern symbol *create_s(void);
|
||||
extern void lose_s(symbol * p);
|
||||
|
||||
extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
|
||||
extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
|
||||
extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
|
||||
extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
|
||||
extern int in_grouping(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int in_grouping_b(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int out_grouping(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
extern int out_grouping_b(struct SN_env * z, unsigned char *s, int min, int max);
|
||||
|
||||
extern int in_range(struct SN_env * z, int min, int max);
|
||||
extern int in_range_b(struct SN_env * z, int min, int max);
|
||||
extern int out_range(struct SN_env * z, int min, int max);
|
||||
extern int out_range_b(struct SN_env * z, int min, int max);
|
||||
extern int in_range(struct SN_env * z, int min, int max);
|
||||
extern int in_range_b(struct SN_env * z, int min, int max);
|
||||
extern int out_range(struct SN_env * z, int min, int max);
|
||||
extern int out_range_b(struct SN_env * z, int min, int max);
|
||||
|
||||
extern int eq_s(struct SN_env * z, int s_size, symbol * s);
|
||||
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
|
||||
extern int eq_v(struct SN_env * z, symbol * p);
|
||||
extern int eq_v_b(struct SN_env * z, symbol * p);
|
||||
extern int eq_s(struct SN_env * z, int s_size, symbol * s);
|
||||
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
|
||||
extern int eq_v(struct SN_env * z, symbol * p);
|
||||
extern int eq_v_b(struct SN_env * z, symbol * p);
|
||||
|
||||
extern int find_among(struct SN_env * z, struct among * v, int v_size);
|
||||
extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
|
||||
extern int find_among(struct SN_env * z, struct among * v, int v_size);
|
||||
extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
|
||||
|
||||
extern symbol * increase_size(symbol * p, int n);
|
||||
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
|
||||
extern symbol *increase_size(symbol * p, int n);
|
||||
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
|
||||
extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
|
||||
extern void slice_from_v(struct SN_env * z, symbol * p);
|
||||
extern void slice_del(struct SN_env * z);
|
||||
@ -44,8 +45,7 @@ extern void slice_del(struct SN_env * z);
|
||||
extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
|
||||
extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
|
||||
|
||||
extern symbol * slice_to(struct SN_env * z, symbol * p);
|
||||
extern symbol * assign_to(struct SN_env * z, symbol * p);
|
||||
extern symbol *slice_to(struct SN_env * z, symbol * p);
|
||||
extern symbol *assign_to(struct SN_env * z, symbol * p);
|
||||
|
||||
extern void debug(struct SN_env * z, int number, int line_count);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,7 @@
|
||||
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
extern struct SN_env * russian_create_env(void);
|
||||
extern struct SN_env *russian_create_env(void);
|
||||
extern void russian_close_env(struct SN_env * z);
|
||||
|
||||
extern int russian_stem(struct SN_env * z);
|
||||
|
||||
extern int russian_stem(struct SN_env * z);
|
||||
|
@ -9,320 +9,507 @@
|
||||
|
||||
#define CREATE_SIZE 1
|
||||
|
||||
extern symbol * create_s(void)
|
||||
{ symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
|
||||
CAPACITY(p) = CREATE_SIZE;
|
||||
SET_SIZE(p, CREATE_SIZE);
|
||||
return p;
|
||||
}
|
||||
|
||||
extern void lose_s(symbol * p) { free((char *) p - HEAD); }
|
||||
|
||||
extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
|
||||
{ if (z->c >= z->l) return 0;
|
||||
{ int ch = z->p[z->c];
|
||||
if
|
||||
(ch > max || (ch -= min) < 0 ||
|
||||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
}
|
||||
z->c++; return 1;
|
||||
}
|
||||
|
||||
extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
|
||||
{ if (z->c <= z->lb) return 0;
|
||||
{ int ch = z->p[z->c - 1];
|
||||
if
|
||||
(ch > max || (ch -= min) < 0 ||
|
||||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
}
|
||||
z->c--; return 1;
|
||||
}
|
||||
|
||||
extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
|
||||
{ if (z->c >= z->l) return 0;
|
||||
{ int ch = z->p[z->c];
|
||||
unless
|
||||
(ch > max || (ch -= min) < 0 ||
|
||||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
}
|
||||
z->c++; return 1;
|
||||
}
|
||||
|
||||
extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
|
||||
{ if (z->c <= z->lb) return 0;
|
||||
{ int ch = z->p[z->c - 1];
|
||||
unless
|
||||
(ch > max || (ch -= min) < 0 ||
|
||||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
}
|
||||
z->c--; return 1;
|
||||
}
|
||||
|
||||
|
||||
extern int in_range(struct SN_env * z, int min, int max)
|
||||
{ if (z->c >= z->l) return 0;
|
||||
{ int ch = z->p[z->c];
|
||||
if
|
||||
(ch > max || ch < min) return 0;
|
||||
}
|
||||
z->c++; return 1;
|
||||
}
|
||||
|
||||
extern int in_range_b(struct SN_env * z, int min, int max)
|
||||
{ if (z->c <= z->lb) return 0;
|
||||
{ int ch = z->p[z->c - 1];
|
||||
if
|
||||
(ch > max || ch < min) return 0;
|
||||
}
|
||||
z->c--; return 1;
|
||||
}
|
||||
|
||||
extern int out_range(struct SN_env * z, int min, int max)
|
||||
{ if (z->c >= z->l) return 0;
|
||||
{ int ch = z->p[z->c];
|
||||
unless
|
||||
(ch > max || ch < min) return 0;
|
||||
}
|
||||
z->c++; return 1;
|
||||
}
|
||||
|
||||
extern int out_range_b(struct SN_env * z, int min, int max)
|
||||
{ if (z->c <= z->lb) return 0;
|
||||
{ int ch = z->p[z->c - 1];
|
||||
unless
|
||||
(ch > max || ch < min) return 0;
|
||||
}
|
||||
z->c--; return 1;
|
||||
}
|
||||
|
||||
extern int eq_s(struct SN_env * z, int s_size, symbol * s)
|
||||
{ if (z->l - z->c < s_size ||
|
||||
memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
|
||||
z->c += s_size; return 1;
|
||||
}
|
||||
|
||||
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
|
||||
{ if (z->c - z->lb < s_size ||
|
||||
memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
|
||||
z->c -= s_size; return 1;
|
||||
}
|
||||
|
||||
extern int eq_v(struct SN_env * z, symbol * p)
|
||||
{ return eq_s(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int eq_v_b(struct SN_env * z, symbol * p)
|
||||
{ return eq_s_b(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int find_among(struct SN_env * z, struct among * v, int v_size)
|
||||
extern symbol *
|
||||
create_s(void)
|
||||
{
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
symbol *p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
|
||||
|
||||
int c = z->c; int l = z->l;
|
||||
symbol * q = z->p + c;
|
||||
CAPACITY(p) = CREATE_SIZE;
|
||||
SET_SIZE(p, CREATE_SIZE);
|
||||
return p;
|
||||
}
|
||||
|
||||
struct among * w;
|
||||
extern void lose_s(symbol * p)
|
||||
{
|
||||
free((char *) p - HEAD);
|
||||
}
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
extern int
|
||||
in_grouping(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
if (z->c >= z->l)
|
||||
return 0;
|
||||
{
|
||||
int ch = z->p[z->c];
|
||||
|
||||
int first_key_inspected = 0;
|
||||
if
|
||||
(ch > max || (ch -= min) < 0 ||
|
||||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 0;
|
||||
}
|
||||
z->c++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
while(1)
|
||||
{ int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j; /* smaller */
|
||||
w = v + k;
|
||||
{ int i; for (i = common; i < w->s_size; i++)
|
||||
{ if (c + common == l) { diff = -1; break; }
|
||||
diff = q[common] - w->s[i];
|
||||
if (diff != 0) break;
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0) { j = k; common_j = common; }
|
||||
else { i = k; common_i = common; }
|
||||
if (j - i <= 1)
|
||||
{ if (i > 0) break; /* v->s has been inspected */
|
||||
if (j == i) break; /* only one item in v */
|
||||
extern int
|
||||
in_grouping_b(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
if (z->c <= z->lb)
|
||||
return 0;
|
||||
{
|
||||
int ch = z->p[z->c - 1];
|
||||
|
||||
/* - but now we need to go round once more to get
|
||||
v->s inspected. This looks messy, but is actually
|
||||
the optimal approach. */
|
||||
if
|
||||
(ch > max || (ch -= min) < 0 ||
|
||||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
||||
return 0;
|
||||
}
|
||||
z->c--;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (first_key_inspected) break;
|
||||
first_key_inspected = 1;
|
||||
}
|
||||
}
|
||||
while(1)
|
||||
{ w = v + i;
|
||||
if (common_i >= w->s_size)
|
||||
{ z->c = c + w->s_size;
|
||||
if (w->function == 0) return w->result;
|
||||
{ int res = w->function(z);
|
||||
z->c = c + w->s_size;
|
||||
if (res) return w->result;
|
||||
}
|
||||
}
|
||||
i = w->substring_i;
|
||||
if (i < 0) return 0;
|
||||
}
|
||||
extern int
|
||||
out_grouping(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
if (z->c >= z->l)
|
||||
return 0;
|
||||
{
|
||||
int ch = z->p[z->c];
|
||||
|
||||
unless
|
||||
(ch > max || (ch -= min) < 0 ||
|
||||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
}
|
||||
z->c++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
out_grouping_b(struct SN_env * z, unsigned char *s, int min, int max)
|
||||
{
|
||||
if (z->c <= z->lb)
|
||||
return 0;
|
||||
{
|
||||
int ch = z->p[z->c - 1];
|
||||
|
||||
unless
|
||||
(ch > max || (ch -= min) < 0 ||
|
||||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
||||
}
|
||||
z->c--;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
in_range(struct SN_env * z, int min, int max)
|
||||
{
|
||||
if (z->c >= z->l)
|
||||
return 0;
|
||||
{
|
||||
int ch = z->p[z->c];
|
||||
|
||||
if
|
||||
(ch > max || ch < min)
|
||||
return 0;
|
||||
}
|
||||
z->c++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
in_range_b(struct SN_env * z, int min, int max)
|
||||
{
|
||||
if (z->c <= z->lb)
|
||||
return 0;
|
||||
{
|
||||
int ch = z->p[z->c - 1];
|
||||
|
||||
if
|
||||
(ch > max || ch < min)
|
||||
return 0;
|
||||
}
|
||||
z->c--;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
out_range(struct SN_env * z, int min, int max)
|
||||
{
|
||||
if (z->c >= z->l)
|
||||
return 0;
|
||||
{
|
||||
int ch = z->p[z->c];
|
||||
|
||||
unless
|
||||
(ch > max || ch < min) return 0;
|
||||
}
|
||||
z->c++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
out_range_b(struct SN_env * z, int min, int max)
|
||||
{
|
||||
if (z->c <= z->lb)
|
||||
return 0;
|
||||
{
|
||||
int ch = z->p[z->c - 1];
|
||||
|
||||
unless
|
||||
(ch > max || ch < min) return 0;
|
||||
}
|
||||
z->c--;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
eq_s(struct SN_env * z, int s_size, symbol * s)
|
||||
{
|
||||
if (z->l - z->c < s_size ||
|
||||
memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0)
|
||||
return 0;
|
||||
z->c += s_size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
eq_s_b(struct SN_env * z, int s_size, symbol * s)
|
||||
{
|
||||
if (z->c - z->lb < s_size ||
|
||||
memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0)
|
||||
return 0;
|
||||
z->c -= s_size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
eq_v(struct SN_env * z, symbol * p)
|
||||
{
|
||||
return eq_s(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int
|
||||
eq_v_b(struct SN_env * z, symbol * p)
|
||||
{
|
||||
return eq_s_b(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern int
|
||||
find_among(struct SN_env * z, struct among * v, int v_size)
|
||||
{
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
|
||||
int c = z->c;
|
||||
int l = z->l;
|
||||
symbol *q = z->p + c;
|
||||
|
||||
struct among *w;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
int first_key_inspected = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j; /* smaller */
|
||||
|
||||
w = v + k;
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = common; i < w->s_size; i++)
|
||||
{
|
||||
if (c + common == l)
|
||||
{
|
||||
diff = -1;
|
||||
break;
|
||||
}
|
||||
diff = q[common] - w->s[i];
|
||||
if (diff != 0)
|
||||
break;
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0)
|
||||
{
|
||||
j = k;
|
||||
common_j = common;
|
||||
}
|
||||
else
|
||||
{
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
if (j - i <= 1)
|
||||
{
|
||||
if (i > 0)
|
||||
break; /* v->s has been inspected */
|
||||
if (j == i)
|
||||
break; /* only one item in v */
|
||||
|
||||
/*
|
||||
* - but now we need to go round once more to get v->s
|
||||
* inspected. This looks messy, but is actually the optimal
|
||||
* approach.
|
||||
*/
|
||||
|
||||
if (first_key_inspected)
|
||||
break;
|
||||
first_key_inspected = 1;
|
||||
}
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
w = v + i;
|
||||
if (common_i >= w->s_size)
|
||||
{
|
||||
z->c = c + w->s_size;
|
||||
if (w->function == 0)
|
||||
return w->result;
|
||||
{
|
||||
int res = w->function(z);
|
||||
|
||||
z->c = c + w->s_size;
|
||||
if (res)
|
||||
return w->result;
|
||||
}
|
||||
}
|
||||
i = w->substring_i;
|
||||
if (i < 0)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* find_among_b is for backwards processing. Same comments apply */
|
||||
|
||||
extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
|
||||
extern int
|
||||
find_among_b(struct SN_env * z, struct among * v, int v_size)
|
||||
{
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
int i = 0;
|
||||
int j = v_size;
|
||||
|
||||
int c = z->c; int lb = z->lb;
|
||||
symbol * q = z->p + c - 1;
|
||||
int c = z->c;
|
||||
int lb = z->lb;
|
||||
symbol *q = z->p + c - 1;
|
||||
|
||||
struct among * w;
|
||||
struct among *w;
|
||||
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
int common_i = 0;
|
||||
int common_j = 0;
|
||||
|
||||
int first_key_inspected = 0;
|
||||
int first_key_inspected = 0;
|
||||
|
||||
while(1)
|
||||
{ int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j;
|
||||
w = v + k;
|
||||
{ int i; for (i = w->s_size - 1 - common; i >= 0; i--)
|
||||
{ if (c - common == lb) { diff = -1; break; }
|
||||
diff = q[- common] - w->s[i];
|
||||
if (diff != 0) break;
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0) { j = k; common_j = common; }
|
||||
else { i = k; common_i = common; }
|
||||
if (j - i <= 1)
|
||||
{ if (i > 0) break;
|
||||
if (j == i) break;
|
||||
if (first_key_inspected) break;
|
||||
first_key_inspected = 1;
|
||||
}
|
||||
}
|
||||
while(1)
|
||||
{ w = v + i;
|
||||
if (common_i >= w->s_size)
|
||||
{ z->c = c - w->s_size;
|
||||
if (w->function == 0) return w->result;
|
||||
{ int res = w->function(z);
|
||||
z->c = c - w->s_size;
|
||||
if (res) return w->result;
|
||||
}
|
||||
}
|
||||
i = w->substring_i;
|
||||
if (i < 0) return 0;
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
int k = i + ((j - i) >> 1);
|
||||
int diff = 0;
|
||||
int common = common_i < common_j ? common_i : common_j;
|
||||
|
||||
w = v + k;
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = w->s_size - 1 - common; i >= 0; i--)
|
||||
{
|
||||
if (c - common == lb)
|
||||
{
|
||||
diff = -1;
|
||||
break;
|
||||
}
|
||||
diff = q[-common] - w->s[i];
|
||||
if (diff != 0)
|
||||
break;
|
||||
common++;
|
||||
}
|
||||
}
|
||||
if (diff < 0)
|
||||
{
|
||||
j = k;
|
||||
common_j = common;
|
||||
}
|
||||
else
|
||||
{
|
||||
i = k;
|
||||
common_i = common;
|
||||
}
|
||||
if (j - i <= 1)
|
||||
{
|
||||
if (i > 0)
|
||||
break;
|
||||
if (j == i)
|
||||
break;
|
||||
if (first_key_inspected)
|
||||
break;
|
||||
first_key_inspected = 1;
|
||||
}
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
w = v + i;
|
||||
if (common_i >= w->s_size)
|
||||
{
|
||||
z->c = c - w->s_size;
|
||||
if (w->function == 0)
|
||||
return w->result;
|
||||
{
|
||||
int res = w->function(z);
|
||||
|
||||
z->c = c - w->s_size;
|
||||
if (res)
|
||||
return w->result;
|
||||
}
|
||||
}
|
||||
i = w->substring_i;
|
||||
if (i < 0)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern symbol * increase_size(symbol * p, int n)
|
||||
{ int new_size = n + 20;
|
||||
symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
|
||||
CAPACITY(q) = new_size;
|
||||
memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
|
||||
extern symbol *
|
||||
increase_size(symbol * p, int n)
|
||||
{
|
||||
int new_size = n + 20;
|
||||
symbol *q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
|
||||
|
||||
CAPACITY(q) = new_size;
|
||||
memmove(q, p, CAPACITY(p) * sizeof(symbol));
|
||||
lose_s(p);
|
||||
return q;
|
||||
}
|
||||
|
||||
/* to replace symbols between c_bra and c_ket in z->p by the
|
||||
s_size symbols at s
|
||||
*/
|
||||
|
||||
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
|
||||
{ int adjustment = s_size - (c_ket - c_bra);
|
||||
int len = SIZE(z->p);
|
||||
if (adjustment != 0)
|
||||
{ if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
|
||||
memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
|
||||
SET_SIZE(z->p, adjustment + len);
|
||||
z->l += adjustment;
|
||||
if (z->c >= c_ket) z->c += adjustment; else
|
||||
if (z->c > c_bra) z->c = c_bra;
|
||||
}
|
||||
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
|
||||
return adjustment;
|
||||
}
|
||||
|
||||
static void slice_check(struct SN_env * z)
|
||||
extern int
|
||||
replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
|
||||
{
|
||||
if (!(0 <= z->bra &&
|
||||
z->bra <= z->ket &&
|
||||
z->ket <= z->l &&
|
||||
z->l <= SIZE(z->p))) /* this line could be removed */
|
||||
{
|
||||
fprintf(stderr, "faulty slice operation:\n");
|
||||
debug(z, -1, 0);
|
||||
exit(1);
|
||||
}
|
||||
int adjustment = s_size - (c_ket - c_bra);
|
||||
int len = SIZE(z->p);
|
||||
|
||||
if (adjustment != 0)
|
||||
{
|
||||
if (adjustment + len > CAPACITY(z->p))
|
||||
z->p = increase_size(z->p, adjustment + len);
|
||||
memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
|
||||
SET_SIZE(z->p, adjustment + len);
|
||||
z->l += adjustment;
|
||||
if (z->c >= c_ket)
|
||||
z->c += adjustment;
|
||||
else if (z->c > c_bra)
|
||||
z->c = c_bra;
|
||||
}
|
||||
unless(s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
|
||||
return adjustment;
|
||||
}
|
||||
|
||||
extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
|
||||
{ slice_check(z);
|
||||
replace_s(z, z->bra, z->ket, s_size, s);
|
||||
static void
|
||||
slice_check(struct SN_env * z)
|
||||
{
|
||||
if (!(0 <= z->bra &&
|
||||
z->bra <= z->ket &&
|
||||
z->ket <= z->l &&
|
||||
z->l <= SIZE(z->p))) /* this line could be removed */
|
||||
{
|
||||
fprintf(stderr, "faulty slice operation:\n");
|
||||
debug(z, -1, 0);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
extern void slice_from_v(struct SN_env * z, symbol * p)
|
||||
{ slice_from_s(z, SIZE(p), p);
|
||||
extern void
|
||||
slice_from_s(struct SN_env * z, int s_size, symbol * s)
|
||||
{
|
||||
slice_check(z);
|
||||
replace_s(z, z->bra, z->ket, s_size, s);
|
||||
}
|
||||
|
||||
extern void slice_del(struct SN_env * z)
|
||||
{ slice_from_s(z, 0, 0);
|
||||
extern void
|
||||
slice_from_v(struct SN_env * z, symbol * p)
|
||||
{
|
||||
slice_from_s(z, SIZE(p), p);
|
||||
}
|
||||
|
||||
extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
|
||||
{ int adjustment = replace_s(z, bra, ket, s_size, s);
|
||||
if (bra <= z->bra) z->bra += adjustment;
|
||||
if (bra <= z->ket) z->ket += adjustment;
|
||||
extern void
|
||||
slice_del(struct SN_env * z)
|
||||
{
|
||||
slice_from_s(z, 0, 0);
|
||||
}
|
||||
|
||||
extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
|
||||
{ int adjustment = replace_s(z, bra, ket, SIZE(p), p);
|
||||
if (bra <= z->bra) z->bra += adjustment;
|
||||
if (bra <= z->ket) z->ket += adjustment;
|
||||
extern void
|
||||
insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
|
||||
{
|
||||
int adjustment = replace_s(z, bra, ket, s_size, s);
|
||||
|
||||
if (bra <= z->bra)
|
||||
z->bra += adjustment;
|
||||
if (bra <= z->ket)
|
||||
z->ket += adjustment;
|
||||
}
|
||||
|
||||
extern symbol * slice_to(struct SN_env * z, symbol * p)
|
||||
{ slice_check(z);
|
||||
{ int len = z->ket - z->bra;
|
||||
if (CAPACITY(p) < len) p = increase_size(p, len);
|
||||
memmove(p, z->p + z->bra, len * sizeof(symbol));
|
||||
SET_SIZE(p, len);
|
||||
}
|
||||
return p;
|
||||
extern void
|
||||
insert_v(struct SN_env * z, int bra, int ket, symbol * p)
|
||||
{
|
||||
int adjustment = replace_s(z, bra, ket, SIZE(p), p);
|
||||
|
||||
if (bra <= z->bra)
|
||||
z->bra += adjustment;
|
||||
if (bra <= z->ket)
|
||||
z->ket += adjustment;
|
||||
}
|
||||
|
||||
extern symbol * assign_to(struct SN_env * z, symbol * p)
|
||||
{ int len = z->l;
|
||||
if (CAPACITY(p) < len) p = increase_size(p, len);
|
||||
memmove(p, z->p, len * sizeof(symbol));
|
||||
SET_SIZE(p, len);
|
||||
return p;
|
||||
extern symbol *
|
||||
slice_to(struct SN_env * z, symbol * p)
|
||||
{
|
||||
slice_check(z);
|
||||
{
|
||||
int len = z->ket - z->bra;
|
||||
|
||||
if (CAPACITY(p) < len)
|
||||
p = increase_size(p, len);
|
||||
memmove(p, z->p + z->bra, len * sizeof(symbol));
|
||||
SET_SIZE(p, len);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern void debug(struct SN_env * z, int number, int line_count)
|
||||
{ int i;
|
||||
int limit = SIZE(z->p);
|
||||
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
|
||||
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
|
||||
for (i = 0; i <= limit; i++)
|
||||
{ if (z->lb == i) printf("{");
|
||||
if (z->bra == i) printf("[");
|
||||
if (z->c == i) printf("|");
|
||||
if (z->ket == i) printf("]");
|
||||
if (z->l == i) printf("}");
|
||||
if (i < limit)
|
||||
{ int ch = z->p[i];
|
||||
if (ch == 0) ch = '#';
|
||||
printf("%c", ch);
|
||||
}
|
||||
}
|
||||
printf("'\n");
|
||||
extern symbol *
|
||||
assign_to(struct SN_env * z, symbol * p)
|
||||
{
|
||||
int len = z->l;
|
||||
|
||||
if (CAPACITY(p) < len)
|
||||
p = increase_size(p, len);
|
||||
memmove(p, z->p, len * sizeof(symbol));
|
||||
SET_SIZE(p, len);
|
||||
return p;
|
||||
}
|
||||
|
||||
extern void
|
||||
debug(struct SN_env * z, int number, int line_count)
|
||||
{
|
||||
int i;
|
||||
int limit = SIZE(z->p);
|
||||
|
||||
/* if (number >= 0) printf("%3d (line %4d): '", number, line_count); */
|
||||
if (number >= 0)
|
||||
printf("%3d (line %4d): [%d]'", number, line_count, limit);
|
||||
for (i = 0; i <= limit; i++)
|
||||
{
|
||||
if (z->lb == i)
|
||||
printf("{");
|
||||
if (z->bra == i)
|
||||
printf("[");
|
||||
if (z->c == i)
|
||||
printf("|");
|
||||
if (z->ket == i)
|
||||
printf("]");
|
||||
if (z->l == i)
|
||||
printf("}");
|
||||
if (i < limit)
|
||||
{
|
||||
int ch = z->p[i];
|
||||
|
||||
if (ch == 0)
|
||||
ch = '#';
|
||||
printf("%c", ch);
|
||||
}
|
||||
}
|
||||
printf("'\n");
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*
|
||||
/*
|
||||
* stopword library
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
@ -13,97 +13,114 @@
|
||||
|
||||
#define STOPBUFLEN 4096
|
||||
|
||||
char*
|
||||
lowerstr(char *str) {
|
||||
char *ptr=str;
|
||||
while(*ptr) {
|
||||
*ptr = tolower(*(unsigned char*)ptr);
|
||||
char *
|
||||
lowerstr(char *str)
|
||||
{
|
||||
char *ptr = str;
|
||||
|
||||
while (*ptr)
|
||||
{
|
||||
*ptr = tolower(*(unsigned char *) ptr);
|
||||
ptr++;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
void
|
||||
freestoplist(StopList *s) {
|
||||
char **ptr=s->stop;
|
||||
if ( ptr )
|
||||
while( *ptr && s->len >0 ) {
|
||||
freestoplist(StopList * s)
|
||||
{
|
||||
char **ptr = s->stop;
|
||||
|
||||
if (ptr)
|
||||
while (*ptr && s->len > 0)
|
||||
{
|
||||
free(*ptr);
|
||||
ptr++; s->len--;
|
||||
free(s->stop);
|
||||
}
|
||||
memset(s,0,sizeof(StopList));
|
||||
ptr++;
|
||||
s->len--;
|
||||
free(s->stop);
|
||||
}
|
||||
memset(s, 0, sizeof(StopList));
|
||||
}
|
||||
|
||||
void
|
||||
readstoplist(text *in, StopList *s) {
|
||||
char **stop=NULL;
|
||||
s->len=0;
|
||||
if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
|
||||
char *filename=text2char(in);
|
||||
FILE *hin=NULL;
|
||||
char buf[STOPBUFLEN];
|
||||
int reallen=0;
|
||||
readstoplist(text *in, StopList * s)
|
||||
{
|
||||
char **stop = NULL;
|
||||
|
||||
if ( (hin=fopen(filename,"r")) == NULL )
|
||||
s->len = 0;
|
||||
if (in && VARSIZE(in) - VARHDRSZ > 0)
|
||||
{
|
||||
char *filename = text2char(in);
|
||||
FILE *hin = NULL;
|
||||
char buf[STOPBUFLEN];
|
||||
int reallen = 0;
|
||||
|
||||
if ((hin = fopen(filename, "r")) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not open file \"%s\": %m",
|
||||
filename)));
|
||||
filename)));
|
||||
|
||||
while( fgets(buf,STOPBUFLEN,hin) ) {
|
||||
buf[strlen(buf)-1] = '\0';
|
||||
if ( *buf=='\0' ) continue;
|
||||
while (fgets(buf, STOPBUFLEN, hin))
|
||||
{
|
||||
buf[strlen(buf) - 1] = '\0';
|
||||
if (*buf == '\0')
|
||||
continue;
|
||||
|
||||
if ( s->len>= reallen ) {
|
||||
char **tmp;
|
||||
reallen=(reallen) ? reallen*2 : 16;
|
||||
tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
|
||||
if (!tmp) {
|
||||
if (s->len >= reallen)
|
||||
{
|
||||
char **tmp;
|
||||
|
||||
reallen = (reallen) ? reallen * 2 : 16;
|
||||
tmp = (char **) realloc((void *) stop, sizeof(char *) * reallen);
|
||||
if (!tmp)
|
||||
{
|
||||
freestoplist(s);
|
||||
fclose(hin);
|
||||
fclose(hin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
stop=tmp;
|
||||
stop = tmp;
|
||||
}
|
||||
|
||||
stop[s->len]=strdup(buf);
|
||||
if ( !stop[s->len] ) {
|
||||
|
||||
stop[s->len] = strdup(buf);
|
||||
if (!stop[s->len])
|
||||
{
|
||||
freestoplist(s);
|
||||
fclose(hin);
|
||||
fclose(hin);
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
}
|
||||
if ( s->wordop )
|
||||
stop[s->len]=(s->wordop)(stop[s->len]);
|
||||
if (s->wordop)
|
||||
stop[s->len] = (s->wordop) (stop[s->len]);
|
||||
|
||||
(s->len)++;
|
||||
(s->len)++;
|
||||
}
|
||||
fclose(hin);
|
||||
pfree(filename);
|
||||
pfree(filename);
|
||||
}
|
||||
s->stop=stop;
|
||||
}
|
||||
s->stop = stop;
|
||||
}
|
||||
|
||||
static int
|
||||
comparestr(const void *a, const void *b) {
|
||||
return strcmp( *(char**)a, *(char**)b );
|
||||
comparestr(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(*(char **) a, *(char **) b);
|
||||
}
|
||||
|
||||
void
|
||||
sortstoplist(StopList *s) {
|
||||
if (s->stop && s->len>0)
|
||||
qsort(s->stop, s->len, sizeof(char*), comparestr);
|
||||
sortstoplist(StopList * s)
|
||||
{
|
||||
if (s->stop && s->len > 0)
|
||||
qsort(s->stop, s->len, sizeof(char *), comparestr);
|
||||
}
|
||||
|
||||
bool
|
||||
searchstoplist(StopList *s, char *key) {
|
||||
if ( s->wordop )
|
||||
key=(*(s->wordop))(key);
|
||||
return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
|
||||
searchstoplist(StopList * s, char *key)
|
||||
{
|
||||
if (s->wordop)
|
||||
key = (*(s->wordop)) (key);
|
||||
return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* interface functions to tscfg
|
||||
/*
|
||||
* interface functions to tscfg
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include <errno.h>
|
||||
@ -23,263 +23,299 @@
|
||||
|
||||
/*********top interface**********/
|
||||
|
||||
static void *plan_getcfg_bylocale=NULL;
|
||||
static void *plan_getcfg=NULL;
|
||||
static void *plan_getmap=NULL;
|
||||
static void *plan_name2id=NULL;
|
||||
static Oid current_cfg_id=0;
|
||||
static void *plan_getcfg_bylocale = NULL;
|
||||
static void *plan_getcfg = NULL;
|
||||
static void *plan_getmap = NULL;
|
||||
static void *plan_name2id = NULL;
|
||||
static Oid current_cfg_id = 0;
|
||||
|
||||
void
|
||||
init_cfg(Oid id, TSCfgInfo *cfg) {
|
||||
Oid arg[2]={ OIDOID, OIDOID };
|
||||
bool isnull;
|
||||
Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
|
||||
int stat,i,j;
|
||||
text *ptr;
|
||||
text *prsname=NULL;
|
||||
MemoryContext oldcontext;
|
||||
init_cfg(Oid id, TSCfgInfo * cfg)
|
||||
{
|
||||
Oid arg[2] = {OIDOID, OIDOID};
|
||||
bool isnull;
|
||||
Datum pars[2] = {ObjectIdGetDatum(id), ObjectIdGetDatum(id)};
|
||||
int stat,
|
||||
i,
|
||||
j;
|
||||
text *ptr;
|
||||
text *prsname = NULL;
|
||||
MemoryContext oldcontext;
|
||||
|
||||
memset(cfg,0,sizeof(TSCfgInfo));
|
||||
memset(cfg, 0, sizeof(TSCfgInfo));
|
||||
SPI_connect();
|
||||
if ( !plan_getcfg ) {
|
||||
plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
|
||||
if ( !plan_getcfg )
|
||||
if (!plan_getcfg)
|
||||
{
|
||||
plan_getcfg = SPI_saveplan(SPI_prepare("select prs_name from pg_ts_cfg where oid = $1", 1, arg));
|
||||
if (!plan_getcfg)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
}
|
||||
|
||||
stat = SPI_execp(plan_getcfg, pars, " ", 1);
|
||||
if ( stat < 0 )
|
||||
ts_error (ERROR, "SPI_execp return %d", stat);
|
||||
if ( SPI_processed > 0 ) {
|
||||
prsname = (text*) DatumGetPointer(
|
||||
SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
|
||||
);
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
{
|
||||
prsname = (text *) DatumGetPointer(
|
||||
SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
|
||||
);
|
||||
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
|
||||
prsname = ptextdup( prsname );
|
||||
prsname = ptextdup(prsname);
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
|
||||
cfg->id=id;
|
||||
} else
|
||||
|
||||
cfg->id = id;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "No tsearch cfg with id %d", id);
|
||||
|
||||
arg[0]=TEXTOID;
|
||||
if ( !plan_getmap ) {
|
||||
plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
|
||||
if ( !plan_getmap )
|
||||
arg[0] = TEXTOID;
|
||||
if (!plan_getmap)
|
||||
{
|
||||
plan_getmap = SPI_saveplan(SPI_prepare("select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;", 2, arg));
|
||||
if (!plan_getmap)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
}
|
||||
|
||||
pars[0]=PointerGetDatum( prsname );
|
||||
pars[0] = PointerGetDatum(prsname);
|
||||
stat = SPI_execp(plan_getmap, pars, " ", 0);
|
||||
if ( stat < 0 )
|
||||
ts_error (ERROR, "SPI_execp return %d", stat);
|
||||
if ( SPI_processed <= 0 )
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed <= 0)
|
||||
ts_error(ERROR, "No parser with id %d", id);
|
||||
|
||||
for(i=0;i<SPI_processed;i++) {
|
||||
int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
|
||||
ArrayType *a;
|
||||
for (i = 0; i < SPI_processed; i++)
|
||||
{
|
||||
int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
ArrayType *toasted_a = (ArrayType *) PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
|
||||
ArrayType *a;
|
||||
|
||||
if ( !cfg->map ) {
|
||||
cfg->len=lexid+1;
|
||||
cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
|
||||
if ( !cfg->map )
|
||||
if (!cfg->map)
|
||||
{
|
||||
cfg->len = lexid + 1;
|
||||
cfg->map = (ListDictionary *) malloc(sizeof(ListDictionary) * cfg->len);
|
||||
if (!cfg->map)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
|
||||
memset(cfg->map, 0, sizeof(ListDictionary) * cfg->len);
|
||||
}
|
||||
|
||||
if (isnull)
|
||||
continue;
|
||||
|
||||
a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
|
||||
|
||||
if ( ARR_NDIM(a) != 1 )
|
||||
ts_error(ERROR,"Wrong dimension");
|
||||
if ( ARRNELEMS(a) < 1 )
|
||||
a = (ArrayType *) PointerGetDatum(PG_DETOAST_DATUM(DatumGetPointer(toasted_a)));
|
||||
|
||||
if (ARR_NDIM(a) != 1)
|
||||
ts_error(ERROR, "Wrong dimension");
|
||||
if (ARRNELEMS(a) < 1)
|
||||
continue;
|
||||
|
||||
cfg->map[lexid].len=ARRNELEMS(a);
|
||||
cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
|
||||
memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
|
||||
ptr=(text*)ARR_DATA_PTR(a);
|
||||
cfg->map[lexid].len = ARRNELEMS(a);
|
||||
cfg->map[lexid].dict_id = (Datum *) malloc(sizeof(Datum) * cfg->map[lexid].len);
|
||||
memset(cfg->map[lexid].dict_id, 0, sizeof(Datum) * cfg->map[lexid].len);
|
||||
ptr = (text *) ARR_DATA_PTR(a);
|
||||
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
|
||||
for(j=0;j<cfg->map[lexid].len;j++) {
|
||||
for (j = 0; j < cfg->map[lexid].len; j++)
|
||||
{
|
||||
cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
|
||||
ptr=NEXTVAL(ptr);
|
||||
}
|
||||
ptr = NEXTVAL(ptr);
|
||||
}
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
|
||||
if ( a != toasted_a )
|
||||
if (a != toasted_a)
|
||||
pfree(a);
|
||||
}
|
||||
|
||||
|
||||
SPI_finish();
|
||||
cfg->prs_id = name2id_prs( prsname );
|
||||
cfg->prs_id = name2id_prs(prsname);
|
||||
pfree(prsname);
|
||||
for(i=0;i<cfg->len;i++) {
|
||||
for(j=0;j<cfg->map[i].len;j++) {
|
||||
ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
|
||||
cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
|
||||
for (i = 0; i < cfg->len; i++)
|
||||
{
|
||||
for (j = 0; j < cfg->map[i].len; j++)
|
||||
{
|
||||
ptr = (text *) DatumGetPointer(cfg->map[i].dict_id[j]);
|
||||
cfg->map[i].dict_id[j] = ObjectIdGetDatum(name2id_dict(ptr));
|
||||
pfree(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
TSCfgInfo *last_cfg;
|
||||
int len;
|
||||
int reallen;
|
||||
TSCfgInfo *list;
|
||||
typedef struct
|
||||
{
|
||||
TSCfgInfo *last_cfg;
|
||||
int len;
|
||||
int reallen;
|
||||
TSCfgInfo *list;
|
||||
SNMap name2id_map;
|
||||
} CFGList;
|
||||
} CFGList;
|
||||
|
||||
static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
|
||||
static CFGList CList = {NULL, 0, 0, NULL, {0, 0, NULL}};
|
||||
|
||||
void
|
||||
reset_cfg(void) {
|
||||
freeSNMap( &(CList.name2id_map) );
|
||||
if ( CList.list ) {
|
||||
int i,j;
|
||||
for(i=0;i<CList.len;i++)
|
||||
if ( CList.list[i].map ) {
|
||||
for(j=0;j<CList.list[i].len;j++)
|
||||
if ( CList.list[i].map[j].dict_id )
|
||||
reset_cfg(void)
|
||||
{
|
||||
freeSNMap(&(CList.name2id_map));
|
||||
if (CList.list)
|
||||
{
|
||||
int i,
|
||||
j;
|
||||
|
||||
for (i = 0; i < CList.len; i++)
|
||||
if (CList.list[i].map)
|
||||
{
|
||||
for (j = 0; j < CList.list[i].len; j++)
|
||||
if (CList.list[i].map[j].dict_id)
|
||||
free(CList.list[i].map[j].dict_id);
|
||||
free( CList.list[i].map );
|
||||
free(CList.list[i].map);
|
||||
}
|
||||
free(CList.list);
|
||||
free(CList.list);
|
||||
}
|
||||
memset(&CList,0,sizeof(CFGList));
|
||||
memset(&CList, 0, sizeof(CFGList));
|
||||
}
|
||||
|
||||
static int
|
||||
comparecfg(const void *a, const void *b) {
|
||||
return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
|
||||
comparecfg(const void *a, const void *b)
|
||||
{
|
||||
return ((TSCfgInfo *) a)->id - ((TSCfgInfo *) b)->id;
|
||||
}
|
||||
|
||||
TSCfgInfo *
|
||||
findcfg(Oid id) {
|
||||
findcfg(Oid id)
|
||||
{
|
||||
/* last used cfg */
|
||||
if ( CList.last_cfg && CList.last_cfg->id==id )
|
||||
if (CList.last_cfg && CList.last_cfg->id == id)
|
||||
return CList.last_cfg;
|
||||
|
||||
/* already used cfg */
|
||||
if ( CList.len != 0 ) {
|
||||
TSCfgInfo key;
|
||||
key.id=id;
|
||||
if (CList.len != 0)
|
||||
{
|
||||
TSCfgInfo key;
|
||||
|
||||
key.id = id;
|
||||
CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
|
||||
if ( CList.last_cfg != NULL )
|
||||
if (CList.last_cfg != NULL)
|
||||
return CList.last_cfg;
|
||||
}
|
||||
|
||||
/* last chance */
|
||||
if ( CList.len==CList.reallen ) {
|
||||
TSCfgInfo *tmp;
|
||||
int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
|
||||
tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
|
||||
if ( !tmp )
|
||||
ts_error(ERROR,"No memory");
|
||||
CList.reallen=reallen;
|
||||
CList.list=tmp;
|
||||
if (CList.len == CList.reallen)
|
||||
{
|
||||
TSCfgInfo *tmp;
|
||||
int reallen = (CList.reallen) ? 2 * CList.reallen : 16;
|
||||
|
||||
tmp = (TSCfgInfo *) realloc(CList.list, sizeof(TSCfgInfo) * reallen);
|
||||
if (!tmp)
|
||||
ts_error(ERROR, "No memory");
|
||||
CList.reallen = reallen;
|
||||
CList.list = tmp;
|
||||
}
|
||||
CList.last_cfg=&(CList.list[CList.len]);
|
||||
CList.last_cfg = &(CList.list[CList.len]);
|
||||
init_cfg(id, CList.last_cfg);
|
||||
CList.len++;
|
||||
qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
|
||||
return findcfg(id); /* qsort changed order!! */;
|
||||
return findcfg(id); /* qsort changed order!! */ ;
|
||||
}
|
||||
|
||||
|
||||
Oid
|
||||
name2id_cfg(text *name) {
|
||||
Oid arg[1]={ TEXTOID };
|
||||
bool isnull;
|
||||
Datum pars[1]={ PointerGetDatum(name) };
|
||||
int stat;
|
||||
Oid id=findSNMap_t( &(CList.name2id_map), name );
|
||||
|
||||
if ( id )
|
||||
name2id_cfg(text *name)
|
||||
{
|
||||
Oid arg[1] = {TEXTOID};
|
||||
bool isnull;
|
||||
Datum pars[1] = {PointerGetDatum(name)};
|
||||
int stat;
|
||||
Oid id = findSNMap_t(&(CList.name2id_map), name);
|
||||
|
||||
if (id)
|
||||
return id;
|
||||
|
||||
|
||||
SPI_connect();
|
||||
if ( !plan_name2id ) {
|
||||
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
|
||||
if ( !plan_name2id )
|
||||
if (!plan_name2id)
|
||||
{
|
||||
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_cfg where ts_name = $1", 1, arg));
|
||||
if (!plan_name2id)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_prepare() failed");
|
||||
}
|
||||
|
||||
stat = SPI_execp(plan_name2id, pars, " ", 1);
|
||||
if ( stat < 0 )
|
||||
if (stat < 0)
|
||||
/* internal error */
|
||||
elog (ERROR, "SPI_execp return %d", stat);
|
||||
if ( SPI_processed > 0 ) {
|
||||
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||||
if ( isnull )
|
||||
elog(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
{
|
||||
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
if (isnull)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("null id for tsearch config")));
|
||||
} else
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("no tsearch config")));
|
||||
|
||||
SPI_finish();
|
||||
addSNMap_t( &(CList.name2id_map), name, id );
|
||||
addSNMap_t(&(CList.name2id_map), name, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
|
||||
int type, lenlemm, i;
|
||||
char *lemm=NULL;
|
||||
void
|
||||
parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
|
||||
{
|
||||
int type,
|
||||
lenlemm,
|
||||
i;
|
||||
char *lemm = NULL;
|
||||
WParserInfo *prsobj = findprs(cfg->prs_id);
|
||||
|
||||
prsobj->prs=(void*)DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prsobj->start_info),
|
||||
PointerGetDatum(buf),
|
||||
Int32GetDatum(buflen)
|
||||
)
|
||||
);
|
||||
prsobj->prs = (void *) DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prsobj->start_info),
|
||||
PointerGetDatum(buf),
|
||||
Int32GetDatum(buflen)
|
||||
)
|
||||
);
|
||||
|
||||
while( ( type=DatumGetInt32(FunctionCall3(
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm))) ) != 0 ) {
|
||||
while ((type = DatumGetInt32(FunctionCall3(
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm)))) != 0)
|
||||
{
|
||||
|
||||
if ( lenlemm >= MAXSTRLEN )
|
||||
if (lenlemm >= MAXSTRLEN)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long")));
|
||||
|
||||
if ( type >= cfg->len ) /* skip this type of lexem */
|
||||
continue;
|
||||
if (type >= cfg->len) /* skip this type of lexem */
|
||||
continue;
|
||||
|
||||
for(i=0;i<cfg->map[type].len;i++) {
|
||||
DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
|
||||
char **norms, **ptr;
|
||||
|
||||
norms = ptr = (char**)DatumGetPointer(
|
||||
FunctionCall3(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(lemm),
|
||||
PointerGetDatum(lenlemm)
|
||||
)
|
||||
);
|
||||
if ( !norms ) /* dictionary doesn't know this lexem */
|
||||
for (i = 0; i < cfg->map[type].len; i++)
|
||||
{
|
||||
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
|
||||
char **norms,
|
||||
**ptr;
|
||||
|
||||
norms = ptr = (char **) DatumGetPointer(
|
||||
FunctionCall3(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(lemm),
|
||||
PointerGetDatum(lenlemm)
|
||||
)
|
||||
);
|
||||
if (!norms) /* dictionary doesn't know this lexem */
|
||||
continue;
|
||||
|
||||
prs->pos++; /*set pos*/
|
||||
prs->pos++; /* set pos */
|
||||
|
||||
while( *ptr ) {
|
||||
if (prs->curwords == prs->lenwords) {
|
||||
while (*ptr)
|
||||
{
|
||||
if (prs->curwords == prs->lenwords)
|
||||
{
|
||||
prs->lenwords *= 2;
|
||||
prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
|
||||
}
|
||||
@ -292,191 +328,220 @@ parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
|
||||
prs->curwords++;
|
||||
}
|
||||
pfree(norms);
|
||||
break; /* lexem already normalized or is stop word*/
|
||||
break; /* lexem already normalized or is stop
|
||||
* word */
|
||||
}
|
||||
}
|
||||
|
||||
FunctionCall1(
|
||||
&(prsobj->end_info),
|
||||
PointerGetDatum(prsobj->prs)
|
||||
);
|
||||
&(prsobj->end_info),
|
||||
PointerGetDatum(prsobj->prs)
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
|
||||
while (prs->curwords >= prs->lenwords) {
|
||||
hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type)
|
||||
{
|
||||
while (prs->curwords >= prs->lenwords)
|
||||
{
|
||||
prs->lenwords *= 2;
|
||||
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
|
||||
}
|
||||
memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) );
|
||||
prs->words[prs->curwords].type = (uint8)type;
|
||||
prs->words[prs->curwords].len = buflen;
|
||||
memset(&(prs->words[prs->curwords]), 0, sizeof(HLWORD));
|
||||
prs->words[prs->curwords].type = (uint8) type;
|
||||
prs->words[prs->curwords].len = buflen;
|
||||
prs->words[prs->curwords].word = palloc(buflen);
|
||||
memcpy(prs->words[prs->curwords].word, buf, buflen);
|
||||
prs->curwords++;
|
||||
prs->curwords++;
|
||||
}
|
||||
|
||||
static void
|
||||
hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
|
||||
int i;
|
||||
ITEM *item=GETQUERY(query);
|
||||
HLWORD *word=&( prs->words[prs->curwords-1] );
|
||||
hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
|
||||
{
|
||||
int i;
|
||||
ITEM *item = GETQUERY(query);
|
||||
HLWORD *word = &(prs->words[prs->curwords - 1]);
|
||||
|
||||
while (prs->curwords + query->size >= prs->lenwords) {
|
||||
while (prs->curwords + query->size >= prs->lenwords)
|
||||
{
|
||||
prs->lenwords *= 2;
|
||||
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
|
||||
}
|
||||
|
||||
for(i=0; i<query->size; i++) {
|
||||
if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
|
||||
if ( word->item ) {
|
||||
memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
|
||||
prs->words[prs->curwords].item=item;
|
||||
prs->words[prs->curwords].repeated=1;
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
|
||||
{
|
||||
if (word->item)
|
||||
{
|
||||
memcpy(&(prs->words[prs->curwords]), word, sizeof(HLWORD));
|
||||
prs->words[prs->curwords].item = item;
|
||||
prs->words[prs->curwords].repeated = 1;
|
||||
prs->curwords++;
|
||||
} else
|
||||
word->item=item;
|
||||
}
|
||||
else
|
||||
word->item = item;
|
||||
}
|
||||
item++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
|
||||
int type, lenlemm, i;
|
||||
char *lemm=NULL;
|
||||
void
|
||||
hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
|
||||
{
|
||||
int type,
|
||||
lenlemm,
|
||||
i;
|
||||
char *lemm = NULL;
|
||||
WParserInfo *prsobj = findprs(cfg->prs_id);
|
||||
|
||||
prsobj->prs=(void*)DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prsobj->start_info),
|
||||
PointerGetDatum(buf),
|
||||
Int32GetDatum(buflen)
|
||||
)
|
||||
);
|
||||
prsobj->prs = (void *) DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prsobj->start_info),
|
||||
PointerGetDatum(buf),
|
||||
Int32GetDatum(buflen)
|
||||
)
|
||||
);
|
||||
|
||||
while( ( type=DatumGetInt32(FunctionCall3(
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm))) ) != 0 ) {
|
||||
while ((type = DatumGetInt32(FunctionCall3(
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm)))) != 0)
|
||||
{
|
||||
|
||||
if ( lenlemm >= MAXSTRLEN )
|
||||
if (lenlemm >= MAXSTRLEN)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long")));
|
||||
|
||||
hladdword(prs,lemm,lenlemm,type);
|
||||
hladdword(prs, lemm, lenlemm, type);
|
||||
|
||||
if ( type >= cfg->len )
|
||||
continue;
|
||||
if (type >= cfg->len)
|
||||
continue;
|
||||
|
||||
for(i=0;i<cfg->map[type].len;i++) {
|
||||
DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
|
||||
char **norms, **ptr;
|
||||
|
||||
norms = ptr = (char**)DatumGetPointer(
|
||||
FunctionCall3(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(lemm),
|
||||
PointerGetDatum(lenlemm)
|
||||
)
|
||||
);
|
||||
if ( !norms ) /* dictionary doesn't know this lexem */
|
||||
for (i = 0; i < cfg->map[type].len; i++)
|
||||
{
|
||||
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
|
||||
char **norms,
|
||||
**ptr;
|
||||
|
||||
norms = ptr = (char **) DatumGetPointer(
|
||||
FunctionCall3(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(lemm),
|
||||
PointerGetDatum(lenlemm)
|
||||
)
|
||||
);
|
||||
if (!norms) /* dictionary doesn't know this lexem */
|
||||
continue;
|
||||
|
||||
while( *ptr ) {
|
||||
hlfinditem(prs,query,*ptr,strlen(*ptr));
|
||||
while (*ptr)
|
||||
{
|
||||
hlfinditem(prs, query, *ptr, strlen(*ptr));
|
||||
pfree(*ptr);
|
||||
ptr++;
|
||||
}
|
||||
pfree(norms);
|
||||
break; /* lexem already normalized or is stop word*/
|
||||
break; /* lexem already normalized or is stop
|
||||
* word */
|
||||
}
|
||||
}
|
||||
|
||||
FunctionCall1(
|
||||
&(prsobj->end_info),
|
||||
PointerGetDatum(prsobj->prs)
|
||||
);
|
||||
&(prsobj->end_info),
|
||||
PointerGetDatum(prsobj->prs)
|
||||
);
|
||||
}
|
||||
|
||||
text*
|
||||
genhl(HLPRSTEXT * prs) {
|
||||
text *out;
|
||||
int len=128;
|
||||
char *ptr;
|
||||
HLWORD *wrd=prs->words;
|
||||
text *
|
||||
genhl(HLPRSTEXT * prs)
|
||||
{
|
||||
text *out;
|
||||
int len = 128;
|
||||
char *ptr;
|
||||
HLWORD *wrd = prs->words;
|
||||
|
||||
out = (text*)palloc( len );
|
||||
ptr=((char*)out) + VARHDRSZ;
|
||||
out = (text *) palloc(len);
|
||||
ptr = ((char *) out) + VARHDRSZ;
|
||||
|
||||
while( wrd - prs->words < prs->curwords ) {
|
||||
while ( wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
|
||||
int dist = ptr - ((char*)out);
|
||||
len*= 2;
|
||||
while (wrd - prs->words < prs->curwords)
|
||||
{
|
||||
while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len)
|
||||
{
|
||||
int dist = ptr - ((char *) out);
|
||||
|
||||
len *= 2;
|
||||
out = (text *) repalloc(out, len);
|
||||
ptr=((char*)out) + dist;
|
||||
ptr = ((char *) out) + dist;
|
||||
}
|
||||
|
||||
if ( wrd->in && !wrd->skip && !wrd->repeated ) {
|
||||
if ( wrd->replace ) {
|
||||
*ptr=' ';
|
||||
if (wrd->in && !wrd->skip && !wrd->repeated)
|
||||
{
|
||||
if (wrd->replace)
|
||||
{
|
||||
*ptr = ' ';
|
||||
ptr++;
|
||||
} else {
|
||||
if (wrd->selected) {
|
||||
memcpy(ptr,prs->startsel,prs->startsellen);
|
||||
ptr+=prs->startsellen;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (wrd->selected)
|
||||
{
|
||||
memcpy(ptr, prs->startsel, prs->startsellen);
|
||||
ptr += prs->startsellen;
|
||||
}
|
||||
memcpy(ptr,wrd->word,wrd->len);
|
||||
ptr+=wrd->len;
|
||||
if (wrd->selected) {
|
||||
memcpy(ptr,prs->stopsel,prs->stopsellen);
|
||||
ptr+=prs->stopsellen;
|
||||
memcpy(ptr, wrd->word, wrd->len);
|
||||
ptr += wrd->len;
|
||||
if (wrd->selected)
|
||||
{
|
||||
memcpy(ptr, prs->stopsel, prs->stopsellen);
|
||||
ptr += prs->stopsellen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( !wrd->repeated )
|
||||
if (!wrd->repeated)
|
||||
pfree(wrd->word);
|
||||
|
||||
wrd++;
|
||||
}
|
||||
|
||||
VARATT_SIZEP(out)=ptr - ((char*)out);
|
||||
return out;
|
||||
VARATT_SIZEP(out) = ptr - ((char *) out);
|
||||
return out;
|
||||
}
|
||||
|
||||
int
|
||||
get_currcfg(void) {
|
||||
Oid arg[1]={ TEXTOID };
|
||||
int
|
||||
get_currcfg(void)
|
||||
{
|
||||
Oid arg[1] = {TEXTOID};
|
||||
const char *curlocale;
|
||||
Datum pars[1];
|
||||
bool isnull;
|
||||
int stat;
|
||||
Datum pars[1];
|
||||
bool isnull;
|
||||
int stat;
|
||||
|
||||
if ( current_cfg_id > 0 )
|
||||
if (current_cfg_id > 0)
|
||||
return current_cfg_id;
|
||||
|
||||
SPI_connect();
|
||||
if ( !plan_getcfg_bylocale ) {
|
||||
plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
|
||||
if ( !plan_getcfg_bylocale )
|
||||
if (!plan_getcfg_bylocale)
|
||||
{
|
||||
plan_getcfg_bylocale = SPI_saveplan(SPI_prepare("select oid from pg_ts_cfg where locale = $1 ", 1, arg));
|
||||
if (!plan_getcfg_bylocale)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_prepare() failed");
|
||||
}
|
||||
|
||||
curlocale = setlocale(LC_CTYPE, NULL);
|
||||
pars[0] = PointerGetDatum( char2text((char*)curlocale) );
|
||||
pars[0] = PointerGetDatum(char2text((char *) curlocale));
|
||||
stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
|
||||
|
||||
if ( stat < 0 )
|
||||
if (stat < 0)
|
||||
/* internal error */
|
||||
elog (ERROR, "SPI_execp return %d", stat);
|
||||
if ( SPI_processed > 0 )
|
||||
current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||||
else
|
||||
elog(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
current_cfg_id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not find tsearch config by locale")));
|
||||
@ -487,39 +552,43 @@ get_currcfg(void) {
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curcfg);
|
||||
Datum set_curcfg(PG_FUNCTION_ARGS);
|
||||
Datum set_curcfg(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curcfg(PG_FUNCTION_ARGS) {
|
||||
findcfg(PG_GETARG_OID(0));
|
||||
current_cfg_id=PG_GETARG_OID(0);
|
||||
PG_RETURN_VOID();
|
||||
set_curcfg(PG_FUNCTION_ARGS)
|
||||
{
|
||||
findcfg(PG_GETARG_OID(0));
|
||||
current_cfg_id = PG_GETARG_OID(0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curcfg_byname);
|
||||
Datum set_curcfg_byname(PG_FUNCTION_ARGS);
|
||||
Datum set_curcfg_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curcfg_byname(PG_FUNCTION_ARGS) {
|
||||
text *name=PG_GETARG_TEXT_P(0);
|
||||
|
||||
DirectFunctionCall1(
|
||||
set_curcfg,
|
||||
ObjectIdGetDatum( name2id_cfg(name) )
|
||||
);
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
set_curcfg_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
|
||||
DirectFunctionCall1(
|
||||
set_curcfg,
|
||||
ObjectIdGetDatum(name2id_cfg(name))
|
||||
);
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(show_curcfg);
|
||||
Datum show_curcfg(PG_FUNCTION_ARGS);
|
||||
Datum show_curcfg(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
show_curcfg(PG_FUNCTION_ARGS) {
|
||||
PG_RETURN_OID( get_currcfg() );
|
||||
show_curcfg(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_OID(get_currcfg());
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(reset_tsearch);
|
||||
Datum reset_tsearch(PG_FUNCTION_ARGS);
|
||||
Datum reset_tsearch(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
reset_tsearch(PG_FUNCTION_ARGS) {
|
||||
ts_error(NOTICE,"TSearch cache cleaned");
|
||||
PG_RETURN_VOID();
|
||||
reset_tsearch(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ts_error(NOTICE, "TSearch cache cleaned");
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
@ -3,66 +3,73 @@
|
||||
#include "postgres.h"
|
||||
#include "query.h"
|
||||
|
||||
typedef struct {
|
||||
int len;
|
||||
Datum *dict_id;
|
||||
} ListDictionary;
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
Datum *dict_id;
|
||||
} ListDictionary;
|
||||
|
||||
typedef struct {
|
||||
Oid id;
|
||||
Oid prs_id;
|
||||
int len;
|
||||
ListDictionary *map;
|
||||
typedef struct
|
||||
{
|
||||
Oid id;
|
||||
Oid prs_id;
|
||||
int len;
|
||||
ListDictionary *map;
|
||||
} TSCfgInfo;
|
||||
|
||||
Oid name2id_cfg(text *name);
|
||||
TSCfgInfo * findcfg(Oid id);
|
||||
void init_cfg(Oid id, TSCfgInfo *cfg);
|
||||
void reset_cfg(void);
|
||||
Oid name2id_cfg(text *name);
|
||||
TSCfgInfo *findcfg(Oid id);
|
||||
void init_cfg(Oid id, TSCfgInfo * cfg);
|
||||
void reset_cfg(void);
|
||||
|
||||
typedef struct {
|
||||
uint16 len;
|
||||
union {
|
||||
typedef struct
|
||||
{
|
||||
uint16 len;
|
||||
union
|
||||
{
|
||||
uint16 pos;
|
||||
uint16 *apos;
|
||||
} pos;
|
||||
char *word;
|
||||
uint32 alen;
|
||||
} WORD;
|
||||
|
||||
typedef struct {
|
||||
WORD *words;
|
||||
int4 lenwords;
|
||||
int4 curwords;
|
||||
uint16 *apos;
|
||||
} pos;
|
||||
char *word;
|
||||
uint32 alen;
|
||||
} WORD;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
WORD *words;
|
||||
int4 lenwords;
|
||||
int4 curwords;
|
||||
int4 pos;
|
||||
} PRSTEXT;
|
||||
} PRSTEXT;
|
||||
|
||||
typedef struct {
|
||||
uint16 len;
|
||||
uint8 selected:1,
|
||||
in:1,
|
||||
skip:1,
|
||||
replace:1,
|
||||
repeated:1;
|
||||
uint8 type;
|
||||
char *word;
|
||||
ITEM *item;
|
||||
} HLWORD;
|
||||
|
||||
typedef struct {
|
||||
HLWORD *words;
|
||||
int4 lenwords;
|
||||
int4 curwords;
|
||||
char *startsel;
|
||||
char *stopsel;
|
||||
int2 startsellen;
|
||||
int2 stopsellen;
|
||||
} HLPRSTEXT;
|
||||
typedef struct
|
||||
{
|
||||
uint16 len;
|
||||
uint8 selected:1,
|
||||
in:1,
|
||||
skip:1,
|
||||
replace:1,
|
||||
repeated:1;
|
||||
uint8 type;
|
||||
char *word;
|
||||
ITEM *item;
|
||||
} HLWORD;
|
||||
|
||||
void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
|
||||
text* genhl(HLPRSTEXT * prs);
|
||||
typedef struct
|
||||
{
|
||||
HLWORD *words;
|
||||
int4 lenwords;
|
||||
int4 curwords;
|
||||
char *startsel;
|
||||
char *stopsel;
|
||||
int2 startsellen;
|
||||
int2 stopsellen;
|
||||
} HLPRSTEXT;
|
||||
|
||||
void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
|
||||
int get_currcfg(void);
|
||||
void hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen);
|
||||
text *genhl(HLPRSTEXT * prs);
|
||||
|
||||
void parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen);
|
||||
int get_currcfg(void);
|
||||
|
||||
#endif
|
||||
|
@ -10,108 +10,128 @@
|
||||
#include "common.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsstat_in);
|
||||
Datum tsstat_in(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
tsstat_in(PG_FUNCTION_ARGS) {
|
||||
tsstat *stat=palloc(STATHDRSIZE);
|
||||
stat->len=STATHDRSIZE;
|
||||
stat->size=0;
|
||||
Datum tsstat_in(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
tsstat_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsstat *stat = palloc(STATHDRSIZE);
|
||||
|
||||
stat->len = STATHDRSIZE;
|
||||
stat->size = 0;
|
||||
PG_RETURN_POINTER(stat);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(tsstat_out);
|
||||
Datum tsstat_out(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
tsstat_out(PG_FUNCTION_ARGS) {
|
||||
Datum tsstat_out(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
tsstat_out(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("tsstat_out not implemented")));
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
static WordEntry**
|
||||
SEI_realloc( WordEntry** in, uint32 *len ) {
|
||||
if ( *len==0 || in==NULL ) {
|
||||
*len=8;
|
||||
in=palloc( sizeof(WordEntry*)* (*len) );
|
||||
} else {
|
||||
static WordEntry **
|
||||
SEI_realloc(WordEntry ** in, uint32 *len)
|
||||
{
|
||||
if (*len == 0 || in == NULL)
|
||||
{
|
||||
*len = 8;
|
||||
in = palloc(sizeof(WordEntry *) * (*len));
|
||||
}
|
||||
else
|
||||
{
|
||||
*len *= 2;
|
||||
in=repalloc( in, sizeof(WordEntry*)* (*len) );
|
||||
in = repalloc(in, sizeof(WordEntry *) * (*len));
|
||||
}
|
||||
return in;
|
||||
}
|
||||
|
||||
static int
|
||||
compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
|
||||
if ( a->len == b->len )
|
||||
compareStatWord(StatEntry * a, WordEntry * b, tsstat * stat, tsvector * txt)
|
||||
{
|
||||
if (a->len == b->len)
|
||||
return strncmp(
|
||||
STATSTRPTR(stat) + a->pos,
|
||||
STRPTR(txt) + b->pos,
|
||||
a->len
|
||||
STATSTRPTR(stat) + a->pos,
|
||||
STRPTR(txt) + b->pos,
|
||||
a->len
|
||||
);
|
||||
return ( a->len > b->len ) ? 1 : -1;
|
||||
return (a->len > b->len) ? 1 : -1;
|
||||
}
|
||||
|
||||
static tsstat*
|
||||
formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
|
||||
tsstat *newstat;
|
||||
uint32 totallen, nentry;
|
||||
uint32 slen=0;
|
||||
WordEntry **ptr=entry;
|
||||
char *curptr;
|
||||
StatEntry *sptr,*nptr;
|
||||
static tsstat *
|
||||
formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
|
||||
{
|
||||
tsstat *newstat;
|
||||
uint32 totallen,
|
||||
nentry;
|
||||
uint32 slen = 0;
|
||||
WordEntry **ptr = entry;
|
||||
char *curptr;
|
||||
StatEntry *sptr,
|
||||
*nptr;
|
||||
|
||||
while(ptr-entry<len) {
|
||||
while (ptr - entry < len)
|
||||
{
|
||||
slen += (*ptr)->len;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
nentry=stat->size + len;
|
||||
slen+=STATSTRSIZE(stat);
|
||||
totallen=CALCSTATSIZE(nentry,slen);
|
||||
newstat=palloc(totallen);
|
||||
newstat->len=totallen;
|
||||
newstat->size=nentry;
|
||||
nentry = stat->size + len;
|
||||
slen += STATSTRSIZE(stat);
|
||||
totallen = CALCSTATSIZE(nentry, slen);
|
||||
newstat = palloc(totallen);
|
||||
newstat->len = totallen;
|
||||
newstat->size = nentry;
|
||||
|
||||
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
|
||||
curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
|
||||
curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
|
||||
|
||||
ptr=entry;
|
||||
sptr=STATPTR(stat);
|
||||
nptr=STATPTR(newstat);
|
||||
ptr = entry;
|
||||
sptr = STATPTR(stat);
|
||||
nptr = STATPTR(newstat);
|
||||
|
||||
if ( len == 1 ) {
|
||||
StatEntry *StopLow = STATPTR(stat);
|
||||
StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
|
||||
if (len == 1)
|
||||
{
|
||||
StatEntry *StopLow = STATPTR(stat);
|
||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
||||
|
||||
while (StopLow < StopHigh) {
|
||||
sptr=StopLow + (StopHigh - StopLow) / 2;
|
||||
if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
||||
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
||||
StopLow = sptr + 1;
|
||||
else
|
||||
StopHigh = sptr;
|
||||
StopHigh = sptr;
|
||||
}
|
||||
nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
|
||||
memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
|
||||
nptr->nentry=POSDATALEN(txt,*ptr);
|
||||
if ( nptr->nentry==0 )
|
||||
nptr->nentry=1;
|
||||
nptr->ndoc=1;
|
||||
nptr->len=(*ptr)->len;
|
||||
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
|
||||
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
|
||||
nptr->nentry = POSDATALEN(txt, *ptr);
|
||||
if (nptr->nentry == 0)
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
|
||||
} else {
|
||||
while( sptr-STATPTR(stat) < stat->size && ptr-entry<len) {
|
||||
if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
|
||||
memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
|
||||
}
|
||||
else
|
||||
{
|
||||
while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
|
||||
{
|
||||
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
||||
{
|
||||
memcpy(nptr, sptr, sizeof(StatEntry));
|
||||
sptr++;
|
||||
} else {
|
||||
nptr->nentry=POSDATALEN(txt,*ptr);
|
||||
if ( nptr->nentry==0 )
|
||||
nptr->nentry=1;
|
||||
nptr->ndoc=1;
|
||||
nptr->len=(*ptr)->len;
|
||||
}
|
||||
else
|
||||
{
|
||||
nptr->nentry = POSDATALEN(txt, *ptr);
|
||||
if (nptr->nentry == 0)
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
curptr += nptr->len;
|
||||
@ -120,138 +140,168 @@ formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
|
||||
nptr++;
|
||||
}
|
||||
|
||||
memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) );
|
||||
|
||||
while(ptr-entry<len) {
|
||||
nptr->nentry=POSDATALEN(txt,*ptr);
|
||||
if ( nptr->nentry==0 )
|
||||
nptr->nentry=1;
|
||||
nptr->ndoc=1;
|
||||
nptr->len=(*ptr)->len;
|
||||
memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
|
||||
|
||||
while (ptr - entry < len)
|
||||
{
|
||||
nptr->nentry = POSDATALEN(txt, *ptr);
|
||||
if (nptr->nentry == 0)
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
curptr += nptr->len;
|
||||
ptr++; nptr++;
|
||||
ptr++;
|
||||
nptr++;
|
||||
}
|
||||
}
|
||||
|
||||
return newstat;
|
||||
}
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(ts_accum);
|
||||
Datum ts_accum(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_accum(PG_FUNCTION_ARGS) {
|
||||
tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
WordEntry **newentry=NULL;
|
||||
uint32 len=0, cur=0;
|
||||
StatEntry *sptr;
|
||||
WordEntry *wptr;
|
||||
Datum ts_accum(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_accum(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsstat *newstat,
|
||||
*stat = (tsstat *) PG_GETARG_POINTER(0);
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
WordEntry **newentry = NULL;
|
||||
uint32 len = 0,
|
||||
cur = 0;
|
||||
StatEntry *sptr;
|
||||
WordEntry *wptr;
|
||||
|
||||
if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */
|
||||
stat=palloc(STATHDRSIZE);
|
||||
stat->len=STATHDRSIZE;
|
||||
stat->size=0;
|
||||
if (stat == NULL || PG_ARGISNULL(0))
|
||||
{ /* Init in first */
|
||||
stat = palloc(STATHDRSIZE);
|
||||
stat->len = STATHDRSIZE;
|
||||
stat->size = 0;
|
||||
}
|
||||
|
||||
/* simple check of correctness */
|
||||
if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
|
||||
PG_FREE_IF_COPY(txt,1);
|
||||
if (txt == NULL || PG_ARGISNULL(1) || txt->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_RETURN_POINTER(stat);
|
||||
}
|
||||
|
||||
sptr=STATPTR(stat);
|
||||
wptr=ARRPTR(txt);
|
||||
sptr = STATPTR(stat);
|
||||
wptr = ARRPTR(txt);
|
||||
|
||||
if ( stat->size < 100*txt->size ) { /* merge */
|
||||
while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
|
||||
int cmp = compareStatWord(sptr,wptr,stat,txt);
|
||||
if ( cmp<0 ) {
|
||||
if (stat->size < 100 * txt->size)
|
||||
{ /* merge */
|
||||
while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
int cmp = compareStatWord(sptr, wptr, stat, txt);
|
||||
|
||||
if (cmp < 0)
|
||||
sptr++;
|
||||
} else if ( cmp==0 ) {
|
||||
int n=POSDATALEN(txt,wptr);
|
||||
|
||||
if (n==0) n=1;
|
||||
else if (cmp == 0)
|
||||
{
|
||||
int n = POSDATALEN(txt, wptr);
|
||||
|
||||
if (n == 0)
|
||||
n = 1;
|
||||
sptr->ndoc++;
|
||||
sptr->nentry +=n ;
|
||||
sptr++; wptr++;
|
||||
} else {
|
||||
if ( cur==len )
|
||||
newentry=SEI_realloc(newentry, &len);
|
||||
newentry[cur]=wptr;
|
||||
wptr++; cur++;
|
||||
sptr->nentry += n;
|
||||
sptr++;
|
||||
wptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
wptr++;
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
while( wptr-ARRPTR(txt) < txt->size ) {
|
||||
if ( cur==len )
|
||||
newentry=SEI_realloc(newentry, &len);
|
||||
newentry[cur]=wptr;
|
||||
wptr++; cur++;
|
||||
while (wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
wptr++;
|
||||
cur++;
|
||||
}
|
||||
} else { /* search */
|
||||
while( wptr-ARRPTR(txt) < txt->size ) {
|
||||
StatEntry *StopLow = STATPTR(stat);
|
||||
StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
|
||||
int cmp;
|
||||
}
|
||||
else
|
||||
{ /* search */
|
||||
while (wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
StatEntry *StopLow = STATPTR(stat);
|
||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
||||
int cmp;
|
||||
|
||||
while (StopLow < StopHigh) {
|
||||
sptr=StopLow + (StopHigh - StopLow) / 2;
|
||||
cmp = compareStatWord(sptr,wptr,stat,txt);
|
||||
if (cmp==0) {
|
||||
int n=POSDATALEN(txt,wptr);
|
||||
if (n==0) n=1;
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
||||
cmp = compareStatWord(sptr, wptr, stat, txt);
|
||||
if (cmp == 0)
|
||||
{
|
||||
int n = POSDATALEN(txt, wptr);
|
||||
|
||||
if (n == 0)
|
||||
n = 1;
|
||||
sptr->ndoc++;
|
||||
sptr->nentry +=n ;
|
||||
sptr->nentry += n;
|
||||
break;
|
||||
} else if ( cmp < 0 )
|
||||
}
|
||||
else if (cmp < 0)
|
||||
StopLow = sptr + 1;
|
||||
else
|
||||
StopHigh = sptr;
|
||||
StopHigh = sptr;
|
||||
}
|
||||
|
||||
if ( StopLow >= StopHigh ) { /* not found */
|
||||
if ( cur==len )
|
||||
newentry=SEI_realloc(newentry, &len);
|
||||
newentry[cur]=wptr;
|
||||
|
||||
if (StopLow >= StopHigh)
|
||||
{ /* not found */
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( cur==0 ) { /* no new words */
|
||||
PG_FREE_IF_COPY(txt,1);
|
||||
|
||||
if (cur == 0)
|
||||
{ /* no new words */
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_RETURN_POINTER(stat);
|
||||
}
|
||||
|
||||
newstat = formstat(stat, txt, newentry, cur);
|
||||
pfree(newentry);
|
||||
PG_FREE_IF_COPY(txt,1);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
/* pfree(stat); */
|
||||
|
||||
PG_RETURN_POINTER(newstat);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32 cur;
|
||||
tsvector *stat;
|
||||
} StatStorage;
|
||||
typedef struct
|
||||
{
|
||||
uint32 cur;
|
||||
tsvector *stat;
|
||||
} StatStorage;
|
||||
|
||||
static void
|
||||
ts_setup_firstcall(FuncCallContext *funcctx, tsstat *stat) {
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
StatStorage *st;
|
||||
|
||||
ts_setup_firstcall(FuncCallContext *funcctx, tsstat * stat)
|
||||
{
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
StatStorage *st;
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
st=palloc( sizeof(StatStorage) );
|
||||
st->cur=0;
|
||||
st->stat=palloc( stat->len );
|
||||
st = palloc(sizeof(StatStorage));
|
||||
st->cur = 0;
|
||||
st->stat = palloc(stat->len);
|
||||
memcpy(st->stat, stat, stat->len);
|
||||
funcctx->user_fctx = (void*)st;
|
||||
funcctx->user_fctx = (void *) st;
|
||||
tupdesc = RelationNameGetTupleDesc("statinfo");
|
||||
funcctx->slot = TupleDescGetSlot(tupdesc);
|
||||
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
||||
@ -260,162 +310,175 @@ ts_setup_firstcall(FuncCallContext *funcctx, tsstat *stat) {
|
||||
|
||||
|
||||
static Datum
|
||||
ts_process_call(FuncCallContext *funcctx) {
|
||||
StatStorage *st;
|
||||
st=(StatStorage*)funcctx->user_fctx;
|
||||
ts_process_call(FuncCallContext *funcctx)
|
||||
{
|
||||
StatStorage *st;
|
||||
|
||||
if ( st->cur < st->stat->size ) {
|
||||
Datum result;
|
||||
char* values[3];
|
||||
char ndoc[16];
|
||||
char nentry[16];
|
||||
StatEntry *entry=STATPTR(st->stat) + st->cur;
|
||||
HeapTuple tuple;
|
||||
st = (StatStorage *) funcctx->user_fctx;
|
||||
|
||||
values[1]=ndoc;
|
||||
sprintf(ndoc,"%d",entry->ndoc);
|
||||
values[2]=nentry;
|
||||
sprintf(nentry,"%d",entry->nentry);
|
||||
values[0]=palloc( entry->len+1 );
|
||||
memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
|
||||
(values[0])[entry->len]='\0';
|
||||
if (st->cur < st->stat->size)
|
||||
{
|
||||
Datum result;
|
||||
char *values[3];
|
||||
char ndoc[16];
|
||||
char nentry[16];
|
||||
StatEntry *entry = STATPTR(st->stat) + st->cur;
|
||||
HeapTuple tuple;
|
||||
|
||||
values[1] = ndoc;
|
||||
sprintf(ndoc, "%d", entry->ndoc);
|
||||
values[2] = nentry;
|
||||
sprintf(nentry, "%d", entry->nentry);
|
||||
values[0] = palloc(entry->len + 1);
|
||||
memcpy(values[0], STATSTRPTR(st->stat) + entry->pos, entry->len);
|
||||
(values[0])[entry->len] = '\0';
|
||||
|
||||
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
||||
result = TupleGetDatum(funcctx->slot, tuple);
|
||||
|
||||
pfree(values[0]);
|
||||
st->cur++;
|
||||
return result;
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
pfree(st->stat);
|
||||
pfree(st);
|
||||
}
|
||||
|
||||
return (Datum)0;
|
||||
|
||||
return (Datum) 0;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(ts_accum_finish);
|
||||
Datum ts_accum_finish(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_accum_finish(PG_FUNCTION_ARGS) {
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
Datum ts_accum_finish(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_accum_finish(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL()) {
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
|
||||
ts_setup_firstcall(funcctx, (tsstat *) PG_GETARG_POINTER(0));
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
if ( (result=ts_process_call(funcctx)) != (Datum)0 )
|
||||
if ((result = ts_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
static Oid tiOid=InvalidOid;
|
||||
static void
|
||||
get_ti_Oid(void) {
|
||||
int ret;
|
||||
bool isnull;
|
||||
static Oid tiOid = InvalidOid;
|
||||
static void
|
||||
get_ti_Oid(void)
|
||||
{
|
||||
int ret;
|
||||
bool isnull;
|
||||
|
||||
if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )
|
||||
if ((ret = SPI_exec("select oid from pg_type where typname='tsvector'", 1)) < 0)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
|
||||
|
||||
if ( SPI_processed<0 )
|
||||
if (SPI_processed < 0)
|
||||
/* internal error */
|
||||
elog(ERROR, "There is no tsvector type");
|
||||
tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||||
if ( tiOid==InvalidOid )
|
||||
tiOid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
if (tiOid == InvalidOid)
|
||||
/* internal error */
|
||||
elog(ERROR, "tsvector type has InvalidOid");
|
||||
}
|
||||
|
||||
static tsstat*
|
||||
ts_stat_sql(text *txt) {
|
||||
char *query=text2char(txt);
|
||||
int i;
|
||||
tsstat *newstat,*stat;
|
||||
bool isnull;
|
||||
Portal portal;
|
||||
void *plan;
|
||||
static tsstat *
|
||||
ts_stat_sql(text *txt)
|
||||
{
|
||||
char *query = text2char(txt);
|
||||
int i;
|
||||
tsstat *newstat,
|
||||
*stat;
|
||||
bool isnull;
|
||||
Portal portal;
|
||||
void *plan;
|
||||
|
||||
if ( tiOid==InvalidOid )
|
||||
if (tiOid == InvalidOid)
|
||||
get_ti_Oid();
|
||||
|
||||
if ( (plan = SPI_prepare(query,0,NULL))==NULL )
|
||||
if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_prepare('%s') returns NULL",query);
|
||||
elog(ERROR, "SPI_prepare('%s') returns NULL", query);
|
||||
|
||||
if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
|
||||
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL)
|
||||
/* internal error */
|
||||
elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
|
||||
elog(ERROR, "SPI_cursor_open('%s') returns NULL", query);
|
||||
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
|
||||
if ( SPI_tuptable->tupdesc->natts != 1 )
|
||||
if (SPI_tuptable->tupdesc->natts != 1)
|
||||
/* internal error */
|
||||
elog(ERROR, "number of fields doesn't equal to 1");
|
||||
|
||||
if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
|
||||
if (SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid)
|
||||
/* internal error */
|
||||
elog(ERROR, "column isn't of tsvector type");
|
||||
|
||||
stat=palloc(STATHDRSIZE);
|
||||
stat->len=STATHDRSIZE;
|
||||
stat->size=0;
|
||||
stat = palloc(STATHDRSIZE);
|
||||
stat->len = STATHDRSIZE;
|
||||
stat->size = 0;
|
||||
|
||||
while(SPI_processed>0) {
|
||||
for(i=0;i<SPI_processed;i++) {
|
||||
Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
|
||||
while (SPI_processed > 0)
|
||||
{
|
||||
for (i = 0; i < SPI_processed; i++)
|
||||
{
|
||||
Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
|
||||
|
||||
if ( !isnull ) {
|
||||
newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
|
||||
ts_accum,
|
||||
PointerGetDatum(stat),
|
||||
data
|
||||
));
|
||||
if ( stat!=newstat && stat )
|
||||
if (!isnull)
|
||||
{
|
||||
newstat = (tsstat *) DatumGetPointer(DirectFunctionCall2(
|
||||
ts_accum,
|
||||
PointerGetDatum(stat),
|
||||
data
|
||||
));
|
||||
if (stat != newstat && stat)
|
||||
pfree(stat);
|
||||
stat=newstat;
|
||||
stat = newstat;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SPI_freetuptable(SPI_tuptable);
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
}
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
}
|
||||
|
||||
SPI_freetuptable(SPI_tuptable);
|
||||
SPI_cursor_close(portal);
|
||||
SPI_freeplan(plan);
|
||||
pfree(query);
|
||||
|
||||
return stat;
|
||||
return stat;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(ts_stat);
|
||||
Datum ts_stat(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_stat(PG_FUNCTION_ARGS) {
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
Datum ts_stat(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
ts_stat(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
tsstat *stat;
|
||||
text *txt = PG_GETARG_TEXT_P(0);
|
||||
|
||||
if (SRF_IS_FIRSTCALL()) {
|
||||
tsstat *stat;
|
||||
text *txt=PG_GETARG_TEXT_P(0);
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
SPI_connect();
|
||||
stat = ts_stat_sql(txt);
|
||||
PG_FREE_IF_COPY(txt,0);
|
||||
ts_setup_firstcall(funcctx, stat );
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
ts_setup_firstcall(funcctx, stat);
|
||||
SPI_finish();
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
if ( (result=ts_process_call(funcctx)) != (Datum)0 )
|
||||
if ((result = ts_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
|
||||
|
@ -8,14 +8,16 @@
|
||||
#include "utils/builtins.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
typedef struct {
|
||||
uint32 len;
|
||||
uint32 pos;
|
||||
uint32 ndoc;
|
||||
uint32 nentry;
|
||||
typedef struct
|
||||
{
|
||||
uint32 len;
|
||||
uint32 pos;
|
||||
uint32 ndoc;
|
||||
uint32 nentry;
|
||||
} StatEntry;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int4 len;
|
||||
int4 size;
|
||||
char data[1];
|
||||
|
@ -31,8 +31,10 @@ Datum tsvector_out(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(to_tsvector);
|
||||
Datum to_tsvector(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(to_tsvector_current);
|
||||
Datum to_tsvector_current(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(to_tsvector_name);
|
||||
Datum to_tsvector_name(PG_FUNCTION_ARGS);
|
||||
|
||||
@ -45,32 +47,38 @@ Datum tsvector_length(PG_FUNCTION_ARGS);
|
||||
/*
|
||||
* in/out text index type
|
||||
*/
|
||||
static int
|
||||
comparePos(const void *a, const void *b) {
|
||||
if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
|
||||
static int
|
||||
comparePos(const void *a, const void *b)
|
||||
{
|
||||
if (((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos)
|
||||
return 1;
|
||||
return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
|
||||
return (((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int
|
||||
uniquePos(WordEntryPos *a, int4 l) {
|
||||
WordEntryPos *ptr, *res;
|
||||
uniquePos(WordEntryPos * a, int4 l)
|
||||
{
|
||||
WordEntryPos *ptr,
|
||||
*res;
|
||||
|
||||
res=a;
|
||||
if (l==1)
|
||||
res = a;
|
||||
if (l == 1)
|
||||
return l;
|
||||
|
||||
qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
|
||||
|
||||
ptr = a + 1;
|
||||
while (ptr - a < l) {
|
||||
if ( ptr->pos != res->pos ) {
|
||||
while (ptr - a < l)
|
||||
{
|
||||
if (ptr->pos != res->pos)
|
||||
{
|
||||
res++;
|
||||
res->pos = ptr->pos;
|
||||
res->weight = ptr->weight;
|
||||
if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
|
||||
if (res - a >= MAXNUMPOS - 1 || res->pos == MAXENTRYPOS - 1)
|
||||
break;
|
||||
} else if ( ptr->weight > res->weight )
|
||||
}
|
||||
else if (ptr->weight > res->weight)
|
||||
res->weight = ptr->weight;
|
||||
ptr++;
|
||||
}
|
||||
@ -81,27 +89,29 @@ static char *BufferStr;
|
||||
static int
|
||||
compareentry(const void *a, const void *b)
|
||||
{
|
||||
if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
|
||||
if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
|
||||
{
|
||||
return strncmp(
|
||||
&BufferStr[((WordEntryIN *) a)->entry.pos],
|
||||
&BufferStr[((WordEntryIN *) b)->entry.pos],
|
||||
((WordEntryIN *) a)->entry.len);
|
||||
}
|
||||
return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
|
||||
return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int
|
||||
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
|
||||
{
|
||||
WordEntryIN *ptr,
|
||||
WordEntryIN *ptr,
|
||||
*res;
|
||||
|
||||
res = a;
|
||||
if (l == 1) {
|
||||
if ( a->entry.haspos ) {
|
||||
*(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
|
||||
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
|
||||
if (l == 1)
|
||||
{
|
||||
if (a->entry.haspos)
|
||||
{
|
||||
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
|
||||
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
|
||||
}
|
||||
return l;
|
||||
}
|
||||
@ -115,31 +125,39 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
|
||||
if (!(ptr->entry.len == res->entry.len &&
|
||||
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
|
||||
{
|
||||
if ( res->entry.haspos ) {
|
||||
*(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
|
||||
*outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
|
||||
if (res->entry.haspos)
|
||||
{
|
||||
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
|
||||
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
|
||||
}
|
||||
*outbuflen += SHORTALIGN(res->entry.len);
|
||||
res++;
|
||||
memcpy(res,ptr,sizeof(WordEntryIN));
|
||||
} else if ( ptr->entry.haspos ){
|
||||
if ( res->entry.haspos ) {
|
||||
int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
|
||||
res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
|
||||
memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]),
|
||||
&(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
|
||||
*(uint16*)(res->pos) += *(uint16*)(ptr->pos);
|
||||
pfree( ptr->pos );
|
||||
} else {
|
||||
res->entry.haspos=1;
|
||||
memcpy(res, ptr, sizeof(WordEntryIN));
|
||||
}
|
||||
else if (ptr->entry.haspos)
|
||||
{
|
||||
if (res->entry.haspos)
|
||||
{
|
||||
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
|
||||
|
||||
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
|
||||
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
|
||||
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
|
||||
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
|
||||
pfree(ptr->pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
res->entry.haspos = 1;
|
||||
res->pos = ptr->pos;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
if ( res->entry.haspos ) {
|
||||
*(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
|
||||
*outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
|
||||
if (res->entry.haspos)
|
||||
{
|
||||
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
|
||||
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
|
||||
}
|
||||
*outbuflen += SHORTALIGN(res->entry.len);
|
||||
|
||||
@ -150,7 +168,7 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
|
||||
#define WAITENDWORD 2
|
||||
#define WAITNEXTCHAR 3
|
||||
#define WAITENDCMPLX 4
|
||||
#define WAITPOSINFO 5
|
||||
#define WAITPOSINFO 5
|
||||
#define INPOSINFO 6
|
||||
#define WAITPOSDELIM 7
|
||||
|
||||
@ -172,7 +190,7 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
|
||||
state->curpos = state->word;
|
||||
state->state = WAITWORD;
|
||||
state->alen=0;
|
||||
state->alen = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
@ -228,14 +246,16 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
*(state->curpos) = '\0';
|
||||
return 1;
|
||||
} else if ( *(state->prsbuf) == ':' ) {
|
||||
return 1;
|
||||
}
|
||||
else if (*(state->prsbuf) == ':')
|
||||
{
|
||||
if (state->curpos == state->word)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
*(state->curpos) = '\0';
|
||||
if ( state->oprisdelim )
|
||||
if (state->oprisdelim)
|
||||
return 1;
|
||||
else
|
||||
state->state = INPOSINFO;
|
||||
@ -257,10 +277,12 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
if ( state->oprisdelim ) {
|
||||
if (state->oprisdelim)
|
||||
{
|
||||
state->prsbuf++;
|
||||
return 1;
|
||||
} else
|
||||
}
|
||||
else
|
||||
state->state = WAITPOSINFO;
|
||||
}
|
||||
else if (*(state->prsbuf) == '\\')
|
||||
@ -278,67 +300,87 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
*(state->curpos) = *(state->prsbuf);
|
||||
state->curpos++;
|
||||
}
|
||||
} else if (state->state == WAITPOSINFO) {
|
||||
if ( *(state->prsbuf) == ':' )
|
||||
state->state=INPOSINFO;
|
||||
}
|
||||
else if (state->state == WAITPOSINFO)
|
||||
{
|
||||
if (*(state->prsbuf) == ':')
|
||||
state->state = INPOSINFO;
|
||||
else
|
||||
return 1;
|
||||
} else if (state->state == INPOSINFO) {
|
||||
if ( isdigit(*(state->prsbuf)) ) {
|
||||
if ( state->alen==0 ) {
|
||||
state->alen=4;
|
||||
state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
|
||||
*(uint16*)(state->pos)=0;
|
||||
} else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
|
||||
state->alen *= 2;
|
||||
state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
|
||||
}
|
||||
else if (state->state == INPOSINFO)
|
||||
{
|
||||
if (isdigit(*(state->prsbuf)))
|
||||
{
|
||||
if (state->alen == 0)
|
||||
{
|
||||
state->alen = 4;
|
||||
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
|
||||
*(uint16 *) (state->pos) = 0;
|
||||
}
|
||||
( *(uint16*)(state->pos) )++;
|
||||
state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
|
||||
if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
|
||||
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
|
||||
{
|
||||
state->alen *= 2;
|
||||
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
|
||||
}
|
||||
(*(uint16 *) (state->pos))++;
|
||||
state->pos[*(uint16 *) (state->pos)].pos = LIMITPOS(atoi(state->prsbuf));
|
||||
if (state->pos[*(uint16 *) (state->pos)].pos == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("wrong position info")));
|
||||
state->pos[ *(uint16*)(state->pos) ].weight = 0;
|
||||
state->pos[*(uint16 *) (state->pos)].weight = 0;
|
||||
state->state = WAITPOSDELIM;
|
||||
} else
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
} else if (state->state == WAITPOSDELIM) {
|
||||
if ( *(state->prsbuf) == ',' ) {
|
||||
}
|
||||
else if (state->state == WAITPOSDELIM)
|
||||
{
|
||||
if (*(state->prsbuf) == ',')
|
||||
state->state = INPOSINFO;
|
||||
} else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
|
||||
if ( state->pos[ *(uint16*)(state->pos) ].weight )
|
||||
else if (tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf) == '*')
|
||||
{
|
||||
if (state->pos[*(uint16 *) (state->pos)].weight)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
state->pos[ *(uint16*)(state->pos) ].weight = 3;
|
||||
} else if ( tolower(*(state->prsbuf)) == 'b' ) {
|
||||
if ( state->pos[ *(uint16*)(state->pos) ].weight )
|
||||
state->pos[*(uint16 *) (state->pos)].weight = 3;
|
||||
}
|
||||
else if (tolower(*(state->prsbuf)) == 'b')
|
||||
{
|
||||
if (state->pos[*(uint16 *) (state->pos)].weight)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
state->pos[ *(uint16*)(state->pos) ].weight = 2;
|
||||
} else if ( tolower(*(state->prsbuf)) == 'c' ) {
|
||||
if ( state->pos[ *(uint16*)(state->pos) ].weight )
|
||||
state->pos[*(uint16 *) (state->pos)].weight = 2;
|
||||
}
|
||||
else if (tolower(*(state->prsbuf)) == 'c')
|
||||
{
|
||||
if (state->pos[*(uint16 *) (state->pos)].weight)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
state->pos[ *(uint16*)(state->pos) ].weight = 1;
|
||||
} else if ( tolower(*(state->prsbuf)) == 'd' ) {
|
||||
if ( state->pos[ *(uint16*)(state->pos) ].weight )
|
||||
state->pos[*(uint16 *) (state->pos)].weight = 1;
|
||||
}
|
||||
else if (tolower(*(state->prsbuf)) == 'd')
|
||||
{
|
||||
if (state->pos[*(uint16 *) (state->pos)].weight)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
state->pos[ *(uint16*)(state->pos) ].weight = 0;
|
||||
} else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
|
||||
state->pos[*(uint16 *) (state->pos)].weight = 0;
|
||||
}
|
||||
else if (isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0')
|
||||
return 1;
|
||||
} else if ( !isdigit(*(state->prsbuf)) )
|
||||
else if (!isdigit(*(state->prsbuf)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error")));
|
||||
} else
|
||||
}
|
||||
else
|
||||
/* internal error */
|
||||
elog(ERROR, "internal error");
|
||||
state->prsbuf++;
|
||||
@ -352,11 +394,11 @@ tsvector_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *buf = PG_GETARG_CSTRING(0);
|
||||
TI_IN_STATE state;
|
||||
WordEntryIN *arr;
|
||||
WordEntryIN *arr;
|
||||
WordEntry *inarr;
|
||||
int4 len = 0,
|
||||
totallen = 64;
|
||||
tsvector *in;
|
||||
tsvector *in;
|
||||
char *tmpbuf,
|
||||
*cur;
|
||||
int4 i,
|
||||
@ -388,28 +430,30 @@ tsvector_in(PG_FUNCTION_ARGS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long")));
|
||||
arr[len].entry.len= state.curpos - state.word;
|
||||
arr[len].entry.len = state.curpos - state.word;
|
||||
if (cur - tmpbuf > MAXSTRPOS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("too long value")));
|
||||
arr[len].entry.pos=cur - tmpbuf;
|
||||
arr[len].entry.pos = cur - tmpbuf;
|
||||
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
|
||||
cur += arr[len].entry.len;
|
||||
if ( state.alen ) {
|
||||
arr[len].entry.haspos=1;
|
||||
if (state.alen)
|
||||
{
|
||||
arr[len].entry.haspos = 1;
|
||||
arr[len].pos = state.pos;
|
||||
} else
|
||||
arr[len].entry.haspos=0;
|
||||
}
|
||||
else
|
||||
arr[len].entry.haspos = 0;
|
||||
len++;
|
||||
}
|
||||
pfree(state.word);
|
||||
|
||||
if ( len > 0 )
|
||||
if (len > 0)
|
||||
len = uniqueentry(arr, len, tmpbuf, &buflen);
|
||||
totallen = CALCDATASIZE(len, buflen);
|
||||
in = (tsvector *) palloc(totallen);
|
||||
memset(in,0,totallen);
|
||||
memset(in, 0, totallen);
|
||||
in->len = totallen;
|
||||
in->size = len;
|
||||
cur = STRPTR(in);
|
||||
@ -417,14 +461,15 @@ tsvector_in(PG_FUNCTION_ARGS)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
|
||||
arr[i].entry.pos=cur - STRPTR(in);
|
||||
arr[i].entry.pos = cur - STRPTR(in);
|
||||
cur += SHORTALIGN(arr[i].entry.len);
|
||||
if ( arr[i].entry.haspos ) {
|
||||
memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
|
||||
cur += (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
|
||||
pfree( arr[i].pos );
|
||||
if (arr[i].entry.haspos)
|
||||
{
|
||||
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
|
||||
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
|
||||
pfree(arr[i].pos);
|
||||
}
|
||||
memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
|
||||
memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
|
||||
}
|
||||
pfree(tmpbuf);
|
||||
pfree(arr);
|
||||
@ -434,7 +479,7 @@ tsvector_in(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
tsvector_length(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
int4 ret = in->size;
|
||||
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
@ -444,26 +489,28 @@ tsvector_length(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
tsvector_out(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
char *outbuf;
|
||||
int4 i,
|
||||
j,
|
||||
lenbuf = 0, pp;
|
||||
lenbuf = 0,
|
||||
pp;
|
||||
WordEntry *ptr = ARRPTR(out);
|
||||
char *curin,
|
||||
*curout;
|
||||
|
||||
lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
|
||||
for (i = 0; i < out->size; i++) {
|
||||
lenbuf += ptr[i].len*2 /*for escape */;
|
||||
if ( ptr[i].haspos )
|
||||
lenbuf += 7*POSDATALEN(out, &(ptr[i]));
|
||||
}
|
||||
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
|
||||
for (i = 0; i < out->size; i++)
|
||||
{
|
||||
lenbuf += ptr[i].len * 2 /* for escape */ ;
|
||||
if (ptr[i].haspos)
|
||||
lenbuf += 7 * POSDATALEN(out, &(ptr[i]));
|
||||
}
|
||||
|
||||
curout = outbuf = (char *) palloc(lenbuf);
|
||||
for (i = 0; i < out->size; i++)
|
||||
{
|
||||
curin = STRPTR(out)+ptr->pos;
|
||||
curin = STRPTR(out) + ptr->pos;
|
||||
if (i != 0)
|
||||
*curout++ = ' ';
|
||||
*curout++ = '\'';
|
||||
@ -481,27 +528,40 @@ tsvector_out(PG_FUNCTION_ARGS)
|
||||
*curout++ = *curin++;
|
||||
}
|
||||
*curout++ = '\'';
|
||||
if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
|
||||
if ((pp = POSDATALEN(out, ptr)) != 0)
|
||||
{
|
||||
WordEntryPos *wptr;
|
||||
|
||||
*curout++ = ':';
|
||||
wptr=POSDATAPTR(out,ptr);
|
||||
while(pp) {
|
||||
sprintf(curout,"%d",wptr->pos);
|
||||
curout=strchr(curout,'\0');
|
||||
switch( wptr->weight ) {
|
||||
case 3: *curout++ = 'A'; break;
|
||||
case 2: *curout++ = 'B'; break;
|
||||
case 1: *curout++ = 'C'; break;
|
||||
case 0:
|
||||
default: break;
|
||||
wptr = POSDATAPTR(out, ptr);
|
||||
while (pp)
|
||||
{
|
||||
sprintf(curout, "%d", wptr->pos);
|
||||
curout = strchr(curout, '\0');
|
||||
switch (wptr->weight)
|
||||
{
|
||||
case 3:
|
||||
*curout++ = 'A';
|
||||
break;
|
||||
case 2:
|
||||
*curout++ = 'B';
|
||||
break;
|
||||
case 1:
|
||||
*curout++ = 'C';
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if ( pp>1 ) *curout++ = ',';
|
||||
pp--; wptr++;
|
||||
if (pp > 1)
|
||||
*curout++ = ',';
|
||||
pp--;
|
||||
wptr++;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
*curout='\0';
|
||||
*curout = '\0';
|
||||
outbuf[lenbuf - 1] = '\0';
|
||||
PG_FREE_IF_COPY(out, 0);
|
||||
PG_RETURN_POINTER(outbuf);
|
||||
@ -510,13 +570,15 @@ tsvector_out(PG_FUNCTION_ARGS)
|
||||
static int
|
||||
compareWORD(const void *a, const void *b)
|
||||
{
|
||||
if (((WORD *) a)->len == ((WORD *) b)->len) {
|
||||
int res = strncmp(
|
||||
((WORD *) a)->word,
|
||||
((WORD *) b)->word,
|
||||
((WORD *) b)->len);
|
||||
if ( res==0 )
|
||||
return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
|
||||
if (((WORD *) a)->len == ((WORD *) b)->len)
|
||||
{
|
||||
int res = strncmp(
|
||||
((WORD *) a)->word,
|
||||
((WORD *) b)->word,
|
||||
((WORD *) b)->len);
|
||||
|
||||
if (res == 0)
|
||||
return (((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos) ? 1 : -1;
|
||||
return res;
|
||||
}
|
||||
return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
|
||||
@ -527,14 +589,15 @@ uniqueWORD(WORD * a, int4 l)
|
||||
{
|
||||
WORD *ptr,
|
||||
*res;
|
||||
int tmppos;
|
||||
int tmppos;
|
||||
|
||||
if (l == 1) {
|
||||
tmppos=LIMITPOS(a->pos.pos);
|
||||
a->alen=2;
|
||||
a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
|
||||
a->pos.apos[0]=1;
|
||||
a->pos.apos[1]=tmppos;
|
||||
if (l == 1)
|
||||
{
|
||||
tmppos = LIMITPOS(a->pos.pos);
|
||||
a->alen = 2;
|
||||
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
|
||||
a->pos.apos[0] = 1;
|
||||
a->pos.apos[1] = tmppos;
|
||||
return l;
|
||||
}
|
||||
|
||||
@ -542,11 +605,11 @@ uniqueWORD(WORD * a, int4 l)
|
||||
ptr = a + 1;
|
||||
|
||||
qsort((void *) a, l, sizeof(WORD), compareWORD);
|
||||
tmppos=LIMITPOS(a->pos.pos);
|
||||
a->alen=2;
|
||||
a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
|
||||
a->pos.apos[0]=1;
|
||||
a->pos.apos[1]=tmppos;
|
||||
tmppos = LIMITPOS(a->pos.pos);
|
||||
a->alen = 2;
|
||||
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
|
||||
a->pos.apos[0] = 1;
|
||||
a->pos.apos[1] = tmppos;
|
||||
|
||||
while (ptr - a < l)
|
||||
{
|
||||
@ -556,20 +619,24 @@ uniqueWORD(WORD * a, int4 l)
|
||||
res++;
|
||||
res->len = ptr->len;
|
||||
res->word = ptr->word;
|
||||
tmppos=LIMITPOS(ptr->pos.pos);
|
||||
res->alen=2;
|
||||
res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
|
||||
res->pos.apos[0]=1;
|
||||
res->pos.apos[1]=tmppos;
|
||||
} else {
|
||||
tmppos = LIMITPOS(ptr->pos.pos);
|
||||
res->alen = 2;
|
||||
res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
|
||||
res->pos.apos[0] = 1;
|
||||
res->pos.apos[1] = tmppos;
|
||||
}
|
||||
else
|
||||
{
|
||||
pfree(ptr->word);
|
||||
if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
|
||||
if ( res->pos.apos[0]+1 >= res->alen ) {
|
||||
res->alen*=2;
|
||||
res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
|
||||
if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
|
||||
{
|
||||
if (res->pos.apos[0] + 1 >= res->alen)
|
||||
{
|
||||
res->alen *= 2;
|
||||
res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
|
||||
}
|
||||
res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
|
||||
res->pos.apos[0]++;
|
||||
res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
|
||||
res->pos.apos[0]++;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
@ -584,25 +651,27 @@ uniqueWORD(WORD * a, int4 l)
|
||||
static tsvector *
|
||||
makevalue(PRSTEXT * prs)
|
||||
{
|
||||
int4 i,j,
|
||||
int4 i,
|
||||
j,
|
||||
lenstr = 0,
|
||||
totallen;
|
||||
tsvector *in;
|
||||
tsvector *in;
|
||||
WordEntry *ptr;
|
||||
char *str,
|
||||
*cur;
|
||||
|
||||
prs->curwords = uniqueWORD(prs->words, prs->curwords);
|
||||
for (i = 0; i < prs->curwords; i++) {
|
||||
for (i = 0; i < prs->curwords; i++)
|
||||
{
|
||||
lenstr += SHORTALIGN(prs->words[i].len);
|
||||
|
||||
if ( prs->words[i].alen )
|
||||
if (prs->words[i].alen)
|
||||
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
|
||||
}
|
||||
|
||||
totallen = CALCDATASIZE(prs->curwords, lenstr);
|
||||
in = (tsvector *) palloc(totallen);
|
||||
memset(in,0,totallen);
|
||||
memset(in, 0, totallen);
|
||||
in->len = totallen;
|
||||
in->size = prs->curwords;
|
||||
|
||||
@ -615,24 +684,27 @@ makevalue(PRSTEXT * prs)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("value is too big")));
|
||||
ptr->pos= cur - str;
|
||||
ptr->pos = cur - str;
|
||||
memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
|
||||
pfree(prs->words[i].word);
|
||||
cur += SHORTALIGN(prs->words[i].len);
|
||||
if ( prs->words[i].alen ) {
|
||||
if (prs->words[i].alen)
|
||||
{
|
||||
WordEntryPos *wptr;
|
||||
|
||||
ptr->haspos=1;
|
||||
*(uint16*)cur = prs->words[i].pos.apos[0];
|
||||
wptr=POSDATAPTR(in,ptr);
|
||||
for(j=0;j<*(uint16*)cur;j++) {
|
||||
wptr[j].weight=0;
|
||||
wptr[j].pos=prs->words[i].pos.apos[j+1];
|
||||
|
||||
ptr->haspos = 1;
|
||||
*(uint16 *) cur = prs->words[i].pos.apos[0];
|
||||
wptr = POSDATAPTR(in, ptr);
|
||||
for (j = 0; j < *(uint16 *) cur; j++)
|
||||
{
|
||||
wptr[j].weight = 0;
|
||||
wptr[j].pos = prs->words[i].pos.apos[j + 1];
|
||||
}
|
||||
cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
|
||||
pfree(prs->words[i].pos.apos);
|
||||
} else
|
||||
ptr->haspos=0;
|
||||
}
|
||||
else
|
||||
ptr->haspos = 0;
|
||||
ptr++;
|
||||
}
|
||||
pfree(prs->words);
|
||||
@ -645,70 +717,78 @@ to_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
PRSTEXT prs;
|
||||
tsvector *out = NULL;
|
||||
TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0));
|
||||
tsvector *out = NULL;
|
||||
TSCfgInfo *cfg = findcfg(PG_GETARG_INT32(0));
|
||||
|
||||
prs.lenwords = 32;
|
||||
prs.curwords = 0;
|
||||
prs.pos = 0;
|
||||
prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
|
||||
|
||||
|
||||
parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
|
||||
if (prs.curwords)
|
||||
out = makevalue(&prs);
|
||||
else {
|
||||
else
|
||||
{
|
||||
pfree(prs.words);
|
||||
out = palloc(CALCDATASIZE(0,0));
|
||||
out->len = CALCDATASIZE(0,0);
|
||||
out = palloc(CALCDATASIZE(0, 0));
|
||||
out->len = CALCDATASIZE(0, 0);
|
||||
out->size = 0;
|
||||
}
|
||||
}
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
Datum
|
||||
to_tsvector_name(PG_FUNCTION_ARGS) {
|
||||
text *cfg=PG_GETARG_TEXT_P(0);
|
||||
Datum res = DirectFunctionCall3(
|
||||
to_tsvector,
|
||||
Int32GetDatum( name2id_cfg( cfg ) ),
|
||||
PG_GETARG_DATUM(1),
|
||||
(Datum)0
|
||||
to_tsvector_name(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *cfg = PG_GETARG_TEXT_P(0);
|
||||
Datum res = DirectFunctionCall3(
|
||||
to_tsvector,
|
||||
Int32GetDatum(name2id_cfg(cfg)),
|
||||
PG_GETARG_DATUM(1),
|
||||
(Datum) 0
|
||||
);
|
||||
PG_FREE_IF_COPY(cfg,0);
|
||||
PG_RETURN_DATUM(res);
|
||||
|
||||
PG_FREE_IF_COPY(cfg, 0);
|
||||
PG_RETURN_DATUM(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
to_tsvector_current(PG_FUNCTION_ARGS) {
|
||||
Datum res = DirectFunctionCall3(
|
||||
to_tsvector,
|
||||
Int32GetDatum( get_currcfg() ),
|
||||
PG_GETARG_DATUM(0),
|
||||
(Datum)0
|
||||
to_tsvector_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum res = DirectFunctionCall3(
|
||||
to_tsvector,
|
||||
Int32GetDatum(get_currcfg()),
|
||||
PG_GETARG_DATUM(0),
|
||||
(Datum) 0
|
||||
);
|
||||
PG_RETURN_DATUM(res);
|
||||
|
||||
PG_RETURN_DATUM(res);
|
||||
}
|
||||
|
||||
static Oid
|
||||
findFunc(char *fname) {
|
||||
FuncCandidateList clist,ptr;
|
||||
Oid funcid = InvalidOid;
|
||||
List *names=makeList1(makeString(fname));
|
||||
findFunc(char *fname)
|
||||
{
|
||||
FuncCandidateList clist,
|
||||
ptr;
|
||||
Oid funcid = InvalidOid;
|
||||
List *names = makeList1(makeString(fname));
|
||||
|
||||
ptr = clist = FuncnameGetCandidates(names, 1);
|
||||
freeList(names);
|
||||
|
||||
if ( !ptr )
|
||||
if (!ptr)
|
||||
return funcid;
|
||||
|
||||
while(ptr) {
|
||||
if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
|
||||
funcid=ptr->oid;
|
||||
clist=ptr->next;
|
||||
while (ptr)
|
||||
{
|
||||
if (ptr->args[0] == TEXTOID && funcid == InvalidOid)
|
||||
funcid = ptr->oid;
|
||||
clist = ptr->next;
|
||||
pfree(ptr);
|
||||
ptr=clist;
|
||||
ptr = clist;
|
||||
}
|
||||
|
||||
return funcid;
|
||||
@ -724,12 +804,12 @@ tsearch2(PG_FUNCTION_ARGS)
|
||||
Trigger *trigger;
|
||||
Relation rel;
|
||||
HeapTuple rettuple = NULL;
|
||||
TSCfgInfo *cfg=findcfg(get_currcfg());
|
||||
TSCfgInfo *cfg = findcfg(get_currcfg());
|
||||
int numidxattr,
|
||||
i;
|
||||
PRSTEXT prs;
|
||||
Datum datum = (Datum) 0;
|
||||
Oid funcoid = InvalidOid;
|
||||
Oid funcoid = InvalidOid;
|
||||
|
||||
if (!CALLED_AS_TRIGGER(fcinfo))
|
||||
/* internal error */
|
||||
@ -782,8 +862,8 @@ tsearch2(PG_FUNCTION_ARGS)
|
||||
numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
|
||||
if (numattr == SPI_ERROR_NOATTRIBUTE)
|
||||
{
|
||||
funcoid=findFunc(trigger->tgargs[i]);
|
||||
if ( funcoid==InvalidOid )
|
||||
funcoid = findFunc(trigger->tgargs[i]);
|
||||
if (funcoid == InvalidOid)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
||||
errmsg("could not find function or field \"%s\"",
|
||||
@ -805,19 +885,22 @@ tsearch2(PG_FUNCTION_ARGS)
|
||||
if (isnull)
|
||||
continue;
|
||||
|
||||
if ( funcoid!=InvalidOid ) {
|
||||
text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
|
||||
funcoid,
|
||||
PointerGetDatum(txt_toasted)
|
||||
));
|
||||
if (funcoid != InvalidOid)
|
||||
{
|
||||
text *txttmp = (text *) DatumGetPointer(OidFunctionCall1(
|
||||
funcoid,
|
||||
PointerGetDatum(txt_toasted)
|
||||
));
|
||||
|
||||
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
|
||||
if ( txt == txttmp )
|
||||
if (txt == txttmp)
|
||||
txt_toasted = PointerGetDatum(txt);
|
||||
} else
|
||||
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
|
||||
}
|
||||
else
|
||||
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
|
||||
|
||||
parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
|
||||
if (txt != (text*)DatumGetPointer(txt_toasted) )
|
||||
if (txt != (text *) DatumGetPointer(txt_toasted))
|
||||
pfree(txt);
|
||||
}
|
||||
|
||||
@ -831,8 +914,9 @@ tsearch2(PG_FUNCTION_ARGS)
|
||||
}
|
||||
else
|
||||
{
|
||||
tsvector *out = palloc(CALCDATASIZE(0,0));
|
||||
out->len = CALCDATASIZE(0,0);
|
||||
tsvector *out = palloc(CALCDATASIZE(0, 0));
|
||||
|
||||
out->len = CALCDATASIZE(0, 0);
|
||||
out->size = 0;
|
||||
datum = PointerGetDatum(out);
|
||||
pfree(prs.words);
|
||||
|
@ -12,23 +12,27 @@
|
||||
#include "utils/builtins.h"
|
||||
#include "storage/bufpage.h"
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
uint32
|
||||
haspos:1,
|
||||
len:11, /* MAX 2Kb */
|
||||
pos:20; /* MAX 1Mb */
|
||||
haspos:1,
|
||||
len:11, /* MAX 2Kb */
|
||||
pos:20; /* MAX 1Mb */
|
||||
} WordEntry;
|
||||
|
||||
#define MAXSTRLEN ( 1<<11 )
|
||||
#define MAXSTRPOS ( 1<<20 )
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
uint16
|
||||
weight:2,
|
||||
pos:14;
|
||||
} WordEntryPos;
|
||||
#define MAXENTRYPOS (1<<14)
|
||||
weight:2,
|
||||
pos:14;
|
||||
} WordEntryPos;
|
||||
|
||||
#define MAXENTRYPOS (1<<14)
|
||||
#define MAXNUMPOS 256
|
||||
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
|
||||
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -43,13 +47,14 @@ typedef struct
|
||||
#define STRPTR(x) ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
|
||||
#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
|
||||
#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
|
||||
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
|
||||
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
|
||||
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
|
||||
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
|
||||
|
||||
|
||||
typedef struct {
|
||||
WordEntry entry;
|
||||
WordEntryPos *pos;
|
||||
typedef struct
|
||||
{
|
||||
WordEntry entry;
|
||||
WordEntryPos *pos;
|
||||
} WordEntryIN;
|
||||
|
||||
typedef struct
|
||||
@ -60,7 +65,7 @@ typedef struct
|
||||
int4 len;
|
||||
int4 state;
|
||||
int4 alen;
|
||||
WordEntryPos *pos;
|
||||
WordEntryPos *pos;
|
||||
bool oprisdelim;
|
||||
} TI_IN_STATE;
|
||||
|
||||
|
@ -33,30 +33,33 @@ Datum concat(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
strip(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *out;
|
||||
int i,len=0;
|
||||
WordEntry *arrin=ARRPTR(in), *arrout;
|
||||
char *cur;
|
||||
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *out;
|
||||
int i,
|
||||
len = 0;
|
||||
WordEntry *arrin = ARRPTR(in),
|
||||
*arrout;
|
||||
char *cur;
|
||||
|
||||
for(i=0;i<in->size;i++)
|
||||
len += SHORTALIGN( arrin[i].len );
|
||||
for (i = 0; i < in->size; i++)
|
||||
len += SHORTALIGN(arrin[i].len);
|
||||
|
||||
len = CALCDATASIZE(in->size, len);
|
||||
out=(tsvector*)palloc(len);
|
||||
memset(out,0,len);
|
||||
out->len=len;
|
||||
out->size=in->size;
|
||||
arrout=ARRPTR(out);
|
||||
cur=STRPTR(out);
|
||||
for(i=0;i<in->size;i++) {
|
||||
memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
|
||||
out = (tsvector *) palloc(len);
|
||||
memset(out, 0, len);
|
||||
out->len = len;
|
||||
out->size = in->size;
|
||||
arrout = ARRPTR(out);
|
||||
cur = STRPTR(out);
|
||||
for (i = 0; i < in->size; i++)
|
||||
{
|
||||
memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
|
||||
arrout[i].haspos = 0;
|
||||
arrout[i].len = arrin[i].len;
|
||||
arrout[i].pos = cur - STRPTR(out);
|
||||
cur += SHORTALIGN( arrout[i].len );
|
||||
cur += SHORTALIGN(arrout[i].len);
|
||||
}
|
||||
|
||||
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
@ -64,200 +67,263 @@ strip(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
setweight(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
char cw = PG_GETARG_CHAR(1);
|
||||
tsvector *out;
|
||||
int i,j;
|
||||
WordEntry *entry;
|
||||
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
char cw = PG_GETARG_CHAR(1);
|
||||
tsvector *out;
|
||||
int i,
|
||||
j;
|
||||
WordEntry *entry;
|
||||
WordEntryPos *p;
|
||||
int w=0;
|
||||
int w = 0;
|
||||
|
||||
switch(tolower(cw)) {
|
||||
case 'a': w=3; break;
|
||||
case 'b': w=2; break;
|
||||
case 'c': w=1; break;
|
||||
case 'd': w=0; break;
|
||||
/* internal error */
|
||||
default: elog(ERROR,"unrecognized weight");
|
||||
switch (tolower(cw))
|
||||
{
|
||||
case 'a':
|
||||
w = 3;
|
||||
break;
|
||||
case 'b':
|
||||
w = 2;
|
||||
break;
|
||||
case 'c':
|
||||
w = 1;
|
||||
break;
|
||||
case 'd':
|
||||
w = 0;
|
||||
break;
|
||||
/* internal error */
|
||||
default:
|
||||
elog(ERROR, "unrecognized weight");
|
||||
}
|
||||
|
||||
out=(tsvector*)palloc(in->len);
|
||||
memcpy(out,in,in->len);
|
||||
entry=ARRPTR(out);
|
||||
i=out->size;
|
||||
while(i--) {
|
||||
if ( (j=POSDATALEN(out,entry)) != 0 ) {
|
||||
p=POSDATAPTR(out,entry);
|
||||
while(j--) {
|
||||
p->weight=w;
|
||||
out = (tsvector *) palloc(in->len);
|
||||
memcpy(out, in, in->len);
|
||||
entry = ARRPTR(out);
|
||||
i = out->size;
|
||||
while (i--)
|
||||
{
|
||||
if ((j = POSDATALEN(out, entry)) != 0)
|
||||
{
|
||||
p = POSDATAPTR(out, entry);
|
||||
while (j--)
|
||||
{
|
||||
p->weight = w;
|
||||
p++;
|
||||
}
|
||||
}
|
||||
entry++;
|
||||
}
|
||||
|
||||
|
||||
PG_FREE_IF_COPY(in, 0);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
static int
|
||||
compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
|
||||
compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
|
||||
{
|
||||
if ( a->len == b->len)
|
||||
{
|
||||
return strncmp(
|
||||
ptra + a->pos,
|
||||
ptrb + b->pos,
|
||||
a->len);
|
||||
}
|
||||
return ( a->len > b->len ) ? 1 : -1;
|
||||
if (a->len == b->len)
|
||||
{
|
||||
return strncmp(
|
||||
ptra + a->pos,
|
||||
ptrb + b->pos,
|
||||
a->len);
|
||||
}
|
||||
return (a->len > b->len) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int4
|
||||
add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
|
||||
uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
|
||||
int i;
|
||||
uint16 slen = POSDATALEN(src, srcptr), startlen;
|
||||
WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
|
||||
add_pos(tsvector * src, WordEntry * srcptr, tsvector * dest, WordEntry * destptr, int4 maxpos)
|
||||
{
|
||||
uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr);
|
||||
int i;
|
||||
uint16 slen = POSDATALEN(src, srcptr),
|
||||
startlen;
|
||||
WordEntryPos *spos = POSDATAPTR(src, srcptr),
|
||||
*dpos = POSDATAPTR(dest, destptr);
|
||||
|
||||
if ( ! destptr->haspos )
|
||||
*clen=0;
|
||||
if (!destptr->haspos)
|
||||
*clen = 0;
|
||||
|
||||
startlen = *clen;
|
||||
for(i=0; i<slen && *clen<MAXNUMPOS && ( *clen==0 || dpos[ *clen-1 ].pos != MAXENTRYPOS-1 ) ;i++) {
|
||||
dpos[ *clen ].weight = spos[i].weight;
|
||||
dpos[ *clen ].pos = LIMITPOS(spos[i].pos + maxpos);
|
||||
for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || dpos[*clen - 1].pos != MAXENTRYPOS - 1); i++)
|
||||
{
|
||||
dpos[*clen].weight = spos[i].weight;
|
||||
dpos[*clen].pos = LIMITPOS(spos[i].pos + maxpos);
|
||||
(*clen)++;
|
||||
}
|
||||
|
||||
if ( *clen != startlen )
|
||||
destptr->haspos=1;
|
||||
return *clen - startlen;
|
||||
if (*clen != startlen)
|
||||
destptr->haspos = 1;
|
||||
return *clen - startlen;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
concat(PG_FUNCTION_ARGS) {
|
||||
tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
tsvector *out;
|
||||
WordEntry *ptr;
|
||||
WordEntry *ptr1,*ptr2;
|
||||
concat(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
tsvector *out;
|
||||
WordEntry *ptr;
|
||||
WordEntry *ptr1,
|
||||
*ptr2;
|
||||
WordEntryPos *p;
|
||||
int maxpos=0,i,j,i1,i2;
|
||||
char *cur;
|
||||
char *data,*data1,*data2;
|
||||
int maxpos = 0,
|
||||
i,
|
||||
j,
|
||||
i1,
|
||||
i2;
|
||||
char *cur;
|
||||
char *data,
|
||||
*data1,
|
||||
*data2;
|
||||
|
||||
ptr=ARRPTR(in1);
|
||||
i=in1->size;
|
||||
while(i--) {
|
||||
if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
|
||||
p=POSDATAPTR(in1,ptr);
|
||||
while(j--) {
|
||||
if ( p->pos > maxpos )
|
||||
ptr = ARRPTR(in1);
|
||||
i = in1->size;
|
||||
while (i--)
|
||||
{
|
||||
if ((j = POSDATALEN(in1, ptr)) != 0)
|
||||
{
|
||||
p = POSDATAPTR(in1, ptr);
|
||||
while (j--)
|
||||
{
|
||||
if (p->pos > maxpos)
|
||||
maxpos = p->pos;
|
||||
p++;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
|
||||
data1=STRPTR(in1); data2=STRPTR(in2);
|
||||
i1=in1->size; i2=in2->size;
|
||||
out=(tsvector*)palloc( in1->len + in2->len );
|
||||
memset(out,0,in1->len + in2->len);
|
||||
|
||||
ptr1 = ARRPTR(in1);
|
||||
ptr2 = ARRPTR(in2);
|
||||
data1 = STRPTR(in1);
|
||||
data2 = STRPTR(in2);
|
||||
i1 = in1->size;
|
||||
i2 = in2->size;
|
||||
out = (tsvector *) palloc(in1->len + in2->len);
|
||||
memset(out, 0, in1->len + in2->len);
|
||||
out->len = in1->len + in2->len;
|
||||
out->size = in1->size + in2->size;
|
||||
data=cur=STRPTR(out);
|
||||
ptr=ARRPTR(out);
|
||||
while( i1 && i2 ) {
|
||||
int cmp=compareEntry(data1,ptr1,data2,ptr2);
|
||||
if ( cmp < 0 ) { /* in1 first */
|
||||
data = cur = STRPTR(out);
|
||||
ptr = ARRPTR(out);
|
||||
while (i1 && i2)
|
||||
{
|
||||
int cmp = compareEntry(data1, ptr1, data2, ptr2);
|
||||
|
||||
if (cmp < 0)
|
||||
{ /* in1 first */
|
||||
ptr->haspos = ptr1->haspos;
|
||||
ptr->len = ptr1->len;
|
||||
memcpy( cur, data1 + ptr1->pos, ptr1->len );
|
||||
ptr->pos = cur - data;
|
||||
cur+=SHORTALIGN(ptr1->len);
|
||||
if ( ptr->haspos ) {
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
|
||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr1->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
ptr++; ptr1++; i1--;
|
||||
} else if ( cmp>0 ) { /* in2 first */
|
||||
ptr++;
|
||||
ptr1++;
|
||||
i1--;
|
||||
}
|
||||
else if (cmp > 0)
|
||||
{ /* in2 first */
|
||||
ptr->haspos = ptr2->haspos;
|
||||
ptr->len = ptr2->len;
|
||||
memcpy( cur, data2 + ptr2->pos, ptr2->len );
|
||||
ptr->pos = cur - data;
|
||||
cur+=SHORTALIGN(ptr2->len);
|
||||
if ( ptr->haspos ) {
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
|
||||
if ( addlen == 0 )
|
||||
ptr->haspos=0;
|
||||
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr2->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
||||
|
||||
if (addlen == 0)
|
||||
ptr->haspos = 0;
|
||||
else
|
||||
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
|
||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
ptr++; ptr2++; i2--;
|
||||
} else {
|
||||
ptr++;
|
||||
ptr2++;
|
||||
i2--;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr->haspos = ptr1->haspos | ptr2->haspos;
|
||||
ptr->len = ptr1->len;
|
||||
memcpy( cur, data1 + ptr1->pos, ptr1->len );
|
||||
ptr->pos = cur - data;
|
||||
cur+=SHORTALIGN(ptr1->len);
|
||||
if ( ptr->haspos ) {
|
||||
if ( ptr1->haspos ) {
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
|
||||
if ( ptr2->haspos )
|
||||
cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
|
||||
} else if ( ptr2->haspos ) {
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
|
||||
if ( addlen == 0 )
|
||||
ptr->haspos=0;
|
||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr1->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
if (ptr1->haspos)
|
||||
{
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
if (ptr2->haspos)
|
||||
cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
|
||||
}
|
||||
else if (ptr2->haspos)
|
||||
{
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
||||
|
||||
if (addlen == 0)
|
||||
ptr->haspos = 0;
|
||||
else
|
||||
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
|
||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
}
|
||||
ptr++; ptr1++; ptr2++; i1--; i2--;
|
||||
ptr++;
|
||||
ptr1++;
|
||||
ptr2++;
|
||||
i1--;
|
||||
i2--;
|
||||
}
|
||||
}
|
||||
|
||||
while(i1) {
|
||||
while (i1)
|
||||
{
|
||||
ptr->haspos = ptr1->haspos;
|
||||
ptr->len = ptr1->len;
|
||||
memcpy( cur, data1 + ptr1->pos, ptr1->len );
|
||||
ptr->pos = cur - data;
|
||||
cur+=SHORTALIGN(ptr1->len);
|
||||
if ( ptr->haspos ) {
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
|
||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr1->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
ptr++; ptr1++; i1--;
|
||||
ptr++;
|
||||
ptr1++;
|
||||
i1--;
|
||||
}
|
||||
|
||||
while(i2) {
|
||||
while (i2)
|
||||
{
|
||||
ptr->haspos = ptr2->haspos;
|
||||
ptr->len = ptr2->len;
|
||||
memcpy( cur, data2 + ptr2->pos, ptr2->len );
|
||||
ptr->pos = cur - data;
|
||||
cur+=SHORTALIGN(ptr2->len);
|
||||
if ( ptr->haspos ) {
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
|
||||
if ( addlen == 0 )
|
||||
ptr->haspos=0;
|
||||
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
||||
ptr->pos = cur - data;
|
||||
cur += SHORTALIGN(ptr2->len);
|
||||
if (ptr->haspos)
|
||||
{
|
||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
||||
|
||||
if (addlen == 0)
|
||||
ptr->haspos = 0;
|
||||
else
|
||||
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
|
||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||
}
|
||||
ptr++; ptr2++; i2--;
|
||||
ptr++;
|
||||
ptr2++;
|
||||
i2--;
|
||||
}
|
||||
|
||||
out->size=ptr-ARRPTR(out);
|
||||
out->len = CALCDATASIZE( out->size, cur-data );
|
||||
if ( data != STRPTR(out) )
|
||||
memmove( STRPTR(out), data, cur-data );
|
||||
|
||||
out->size = ptr - ARRPTR(out);
|
||||
out->len = CALCDATASIZE(out->size, cur - data);
|
||||
if (data != STRPTR(out))
|
||||
memmove(STRPTR(out), data, cur - data);
|
||||
|
||||
PG_FREE_IF_COPY(in1, 0);
|
||||
PG_FREE_IF_COPY(in2, 1);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include "deflex.h"
|
||||
|
||||
const char *lex_descr[]={
|
||||
const char *lex_descr[] = {
|
||||
"",
|
||||
"Latin word",
|
||||
"Non-latin word",
|
||||
@ -27,7 +27,7 @@ const char *lex_descr[]={
|
||||
"HTML Entity"
|
||||
};
|
||||
|
||||
const char *tok_alias[]={
|
||||
const char *tok_alias[] = {
|
||||
"",
|
||||
"lword",
|
||||
"nlword",
|
||||
@ -53,4 +53,3 @@ const char *tok_alias[]={
|
||||
"uint",
|
||||
"entity"
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* interface functions to parser
|
||||
/*
|
||||
* interface functions to parser
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include <errno.h>
|
||||
@ -21,154 +21,171 @@
|
||||
|
||||
/*********top interface**********/
|
||||
|
||||
static void *plan_getparser=NULL;
|
||||
static Oid current_parser_id=InvalidOid;
|
||||
static void *plan_getparser = NULL;
|
||||
static Oid current_parser_id = InvalidOid;
|
||||
|
||||
void
|
||||
init_prs(Oid id, WParserInfo *prs) {
|
||||
Oid arg[1]={ OIDOID };
|
||||
bool isnull;
|
||||
Datum pars[1]={ ObjectIdGetDatum(id) };
|
||||
int stat;
|
||||
init_prs(Oid id, WParserInfo * prs)
|
||||
{
|
||||
Oid arg[1] = {OIDOID};
|
||||
bool isnull;
|
||||
Datum pars[1] = {ObjectIdGetDatum(id)};
|
||||
int stat;
|
||||
|
||||
memset(prs,0,sizeof(WParserInfo));
|
||||
memset(prs, 0, sizeof(WParserInfo));
|
||||
SPI_connect();
|
||||
if ( !plan_getparser ) {
|
||||
plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
|
||||
if ( !plan_getparser )
|
||||
if (!plan_getparser)
|
||||
{
|
||||
plan_getparser = SPI_saveplan(SPI_prepare("select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1", 1, arg));
|
||||
if (!plan_getparser)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
}
|
||||
|
||||
stat = SPI_execp(plan_getparser, pars, " ", 1);
|
||||
if ( stat < 0 )
|
||||
ts_error (ERROR, "SPI_execp return %d", stat);
|
||||
if ( SPI_processed > 0 ) {
|
||||
Oid oid=InvalidOid;
|
||||
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
{
|
||||
Oid oid = InvalidOid;
|
||||
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
|
||||
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull));
|
||||
fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
|
||||
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
|
||||
fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
|
||||
prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
|
||||
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
|
||||
prs->lextype = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull));
|
||||
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull));
|
||||
fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
|
||||
prs->prs_id=id;
|
||||
} else
|
||||
prs->prs_id = id;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "No parser with id %d", id);
|
||||
SPI_finish();
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
WParserInfo *last_prs;
|
||||
int len;
|
||||
int reallen;
|
||||
WParserInfo *list;
|
||||
typedef struct
|
||||
{
|
||||
WParserInfo *last_prs;
|
||||
int len;
|
||||
int reallen;
|
||||
WParserInfo *list;
|
||||
SNMap name2id_map;
|
||||
} PrsList;
|
||||
} PrsList;
|
||||
|
||||
static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
|
||||
static PrsList PList = {NULL, 0, 0, NULL, {0, 0, NULL}};
|
||||
|
||||
void
|
||||
reset_prs(void) {
|
||||
freeSNMap( &(PList.name2id_map) );
|
||||
if ( PList.list )
|
||||
void
|
||||
reset_prs(void)
|
||||
{
|
||||
freeSNMap(&(PList.name2id_map));
|
||||
if (PList.list)
|
||||
free(PList.list);
|
||||
memset(&PList,0,sizeof(PrsList));
|
||||
memset(&PList, 0, sizeof(PrsList));
|
||||
}
|
||||
|
||||
static int
|
||||
compareprs(const void *a, const void *b) {
|
||||
return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
|
||||
compareprs(const void *a, const void *b)
|
||||
{
|
||||
return ((WParserInfo *) a)->prs_id - ((WParserInfo *) b)->prs_id;
|
||||
}
|
||||
|
||||
WParserInfo *
|
||||
findprs(Oid id) {
|
||||
findprs(Oid id)
|
||||
{
|
||||
/* last used prs */
|
||||
if ( PList.last_prs && PList.last_prs->prs_id==id )
|
||||
if (PList.last_prs && PList.last_prs->prs_id == id)
|
||||
return PList.last_prs;
|
||||
|
||||
/* already used prs */
|
||||
if ( PList.len != 0 ) {
|
||||
if (PList.len != 0)
|
||||
{
|
||||
WParserInfo key;
|
||||
key.prs_id=id;
|
||||
|
||||
key.prs_id = id;
|
||||
PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
|
||||
if ( PList.last_prs != NULL )
|
||||
if (PList.last_prs != NULL)
|
||||
return PList.last_prs;
|
||||
}
|
||||
|
||||
/* last chance */
|
||||
if ( PList.len==PList.reallen ) {
|
||||
if (PList.len == PList.reallen)
|
||||
{
|
||||
WParserInfo *tmp;
|
||||
int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
|
||||
tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
|
||||
if ( !tmp )
|
||||
ts_error(ERROR,"No memory");
|
||||
PList.reallen=reallen;
|
||||
PList.list=tmp;
|
||||
int reallen = (PList.reallen) ? 2 * PList.reallen : 16;
|
||||
|
||||
tmp = (WParserInfo *) realloc(PList.list, sizeof(WParserInfo) * reallen);
|
||||
if (!tmp)
|
||||
ts_error(ERROR, "No memory");
|
||||
PList.reallen = reallen;
|
||||
PList.list = tmp;
|
||||
}
|
||||
PList.last_prs=&(PList.list[PList.len]);
|
||||
PList.last_prs = &(PList.list[PList.len]);
|
||||
init_prs(id, PList.last_prs);
|
||||
PList.len++;
|
||||
qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
|
||||
return findprs(id); /* qsort changed order!! */;
|
||||
return findprs(id); /* qsort changed order!! */ ;
|
||||
}
|
||||
|
||||
static void *plan_name2id=NULL;
|
||||
static void *plan_name2id = NULL;
|
||||
|
||||
Oid
|
||||
name2id_prs(text *name) {
|
||||
Oid arg[1]={ TEXTOID };
|
||||
bool isnull;
|
||||
Datum pars[1]={ PointerGetDatum(name) };
|
||||
int stat;
|
||||
Oid id=findSNMap_t( &(PList.name2id_map), name );
|
||||
|
||||
if ( id )
|
||||
name2id_prs(text *name)
|
||||
{
|
||||
Oid arg[1] = {TEXTOID};
|
||||
bool isnull;
|
||||
Datum pars[1] = {PointerGetDatum(name)};
|
||||
int stat;
|
||||
Oid id = findSNMap_t(&(PList.name2id_map), name);
|
||||
|
||||
if (id)
|
||||
return id;
|
||||
|
||||
|
||||
|
||||
SPI_connect();
|
||||
if ( !plan_name2id ) {
|
||||
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
|
||||
if ( !plan_name2id )
|
||||
if (!plan_name2id)
|
||||
{
|
||||
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_parser where prs_name = $1", 1, arg));
|
||||
if (!plan_name2id)
|
||||
ts_error(ERROR, "SPI_prepare() failed");
|
||||
}
|
||||
|
||||
stat = SPI_execp(plan_name2id, pars, " ", 1);
|
||||
if ( stat < 0 )
|
||||
ts_error (ERROR, "SPI_execp return %d", stat);
|
||||
if ( SPI_processed > 0 )
|
||||
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
|
||||
else
|
||||
if (stat < 0)
|
||||
ts_error(ERROR, "SPI_execp return %d", stat);
|
||||
if (SPI_processed > 0)
|
||||
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
|
||||
else
|
||||
ts_error(ERROR, "No parser '%s'", text2char(name));
|
||||
SPI_finish();
|
||||
addSNMap_t( &(PList.name2id_map), name, id );
|
||||
addSNMap_t(&(PList.name2id_map), name, id);
|
||||
return id;
|
||||
}
|
||||
|
||||
|
||||
/******sql-level interface******/
|
||||
typedef struct {
|
||||
int cur;
|
||||
LexDescr *list;
|
||||
} TypeStorage;
|
||||
typedef struct
|
||||
{
|
||||
int cur;
|
||||
LexDescr *list;
|
||||
} TypeStorage;
|
||||
|
||||
static void
|
||||
setup_firstcall(FuncCallContext *funcctx, Oid prsid) {
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
TypeStorage *st;
|
||||
WParserInfo *prs = findprs(prsid);
|
||||
setup_firstcall(FuncCallContext *funcctx, Oid prsid)
|
||||
{
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
TypeStorage *st;
|
||||
WParserInfo *prs = findprs(prsid);
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
|
||||
st=(TypeStorage*)palloc( sizeof(TypeStorage) );
|
||||
st->cur=0;
|
||||
st->list = (LexDescr*)DatumGetPointer(
|
||||
OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
|
||||
);
|
||||
funcctx->user_fctx = (void*)st;
|
||||
st = (TypeStorage *) palloc(sizeof(TypeStorage));
|
||||
st->cur = 0;
|
||||
st->list = (LexDescr *) DatumGetPointer(
|
||||
OidFunctionCall1(prs->lextype, PointerGetDatum(prs->prs))
|
||||
);
|
||||
funcctx->user_fctx = (void *) st;
|
||||
tupdesc = RelationNameGetTupleDesc("tokentype");
|
||||
funcctx->slot = TupleDescGetSlot(tupdesc);
|
||||
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
||||
@ -176,20 +193,22 @@ setup_firstcall(FuncCallContext *funcctx, Oid prsid) {
|
||||
}
|
||||
|
||||
static Datum
|
||||
process_call(FuncCallContext *funcctx) {
|
||||
TypeStorage *st;
|
||||
process_call(FuncCallContext *funcctx)
|
||||
{
|
||||
TypeStorage *st;
|
||||
|
||||
st=(TypeStorage*)funcctx->user_fctx;
|
||||
if ( st->list && st->list[st->cur].lexid ) {
|
||||
Datum result;
|
||||
char* values[3];
|
||||
char txtid[16];
|
||||
HeapTuple tuple;
|
||||
st = (TypeStorage *) funcctx->user_fctx;
|
||||
if (st->list && st->list[st->cur].lexid)
|
||||
{
|
||||
Datum result;
|
||||
char *values[3];
|
||||
char txtid[16];
|
||||
HeapTuple tuple;
|
||||
|
||||
values[0]=txtid;
|
||||
sprintf(txtid,"%d",st->list[st->cur].lexid);
|
||||
values[1]=st->list[st->cur].alias;
|
||||
values[2]=st->list[st->cur].descr;
|
||||
values[0] = txtid;
|
||||
sprintf(txtid, "%d", st->list[st->cur].lexid);
|
||||
values[1] = st->list[st->cur].alias;
|
||||
values[2] = st->list[st->cur].descr;
|
||||
|
||||
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
||||
result = TupleGetDatum(funcctx->slot, tuple);
|
||||
@ -198,161 +217,179 @@ process_call(FuncCallContext *funcctx) {
|
||||
pfree(values[2]);
|
||||
st->cur++;
|
||||
return result;
|
||||
} else {
|
||||
if ( st->list ) pfree(st->list);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (st->list)
|
||||
pfree(st->list);
|
||||
pfree(st);
|
||||
}
|
||||
return (Datum)0;
|
||||
return (Datum) 0;
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(token_type);
|
||||
Datum token_type(PG_FUNCTION_ARGS);
|
||||
Datum token_type(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
token_type(PG_FUNCTION_ARGS) {
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
token_type(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL()) {
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
setup_firstcall(funcctx, PG_GETARG_OID(0) );
|
||||
setup_firstcall(funcctx, PG_GETARG_OID(0));
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ( (result=process_call(funcctx)) != (Datum)0 )
|
||||
if ((result = process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(token_type_byname);
|
||||
Datum token_type_byname(PG_FUNCTION_ARGS);
|
||||
Datum token_type_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
token_type_byname(PG_FUNCTION_ARGS) {
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
token_type_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
|
||||
if (SRF_IS_FIRSTCALL()) {
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
setup_firstcall(funcctx, name2id_prs( name ) );
|
||||
PG_FREE_IF_COPY(name,0);
|
||||
setup_firstcall(funcctx, name2id_prs(name));
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ( (result=process_call(funcctx)) != (Datum)0 )
|
||||
if ((result = process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(token_type_current);
|
||||
Datum token_type_current(PG_FUNCTION_ARGS);
|
||||
Datum token_type_current(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
token_type_current(PG_FUNCTION_ARGS) {
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
token_type_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL()) {
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
if ( current_parser_id==InvalidOid )
|
||||
current_parser_id = name2id_prs( char2text("default") );
|
||||
setup_firstcall(funcctx, current_parser_id );
|
||||
if (current_parser_id == InvalidOid)
|
||||
current_parser_id = name2id_prs(char2text("default"));
|
||||
setup_firstcall(funcctx, current_parser_id);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ( (result=process_call(funcctx)) != (Datum)0 )
|
||||
if ((result = process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curprs);
|
||||
Datum set_curprs(PG_FUNCTION_ARGS);
|
||||
Datum set_curprs(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curprs(PG_FUNCTION_ARGS) {
|
||||
findprs(PG_GETARG_OID(0));
|
||||
current_parser_id=PG_GETARG_OID(0);
|
||||
PG_RETURN_VOID();
|
||||
set_curprs(PG_FUNCTION_ARGS)
|
||||
{
|
||||
findprs(PG_GETARG_OID(0));
|
||||
current_parser_id = PG_GETARG_OID(0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(set_curprs_byname);
|
||||
Datum set_curprs_byname(PG_FUNCTION_ARGS);
|
||||
Datum set_curprs_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
set_curprs_byname(PG_FUNCTION_ARGS) {
|
||||
text *name=PG_GETARG_TEXT_P(0);
|
||||
|
||||
DirectFunctionCall1(
|
||||
set_curprs,
|
||||
ObjectIdGetDatum( name2id_prs(name) )
|
||||
);
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
PG_RETURN_VOID();
|
||||
set_curprs_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
|
||||
DirectFunctionCall1(
|
||||
set_curprs,
|
||||
ObjectIdGetDatum(name2id_prs(name))
|
||||
);
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int type;
|
||||
char *lexem;
|
||||
} LexemEntry;
|
||||
typedef struct
|
||||
{
|
||||
int type;
|
||||
char *lexem;
|
||||
} LexemEntry;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int cur;
|
||||
int len;
|
||||
LexemEntry *list;
|
||||
} PrsStorage;
|
||||
|
||||
typedef struct {
|
||||
int cur;
|
||||
int len;
|
||||
LexemEntry *list;
|
||||
} PrsStorage;
|
||||
|
||||
|
||||
static void
|
||||
prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt) {
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
PrsStorage *st;
|
||||
WParserInfo *prs = findprs(prsid);
|
||||
char *lex=NULL;
|
||||
int llen=0, type=0;
|
||||
prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt)
|
||||
{
|
||||
TupleDesc tupdesc;
|
||||
MemoryContext oldcontext;
|
||||
PrsStorage *st;
|
||||
WParserInfo *prs = findprs(prsid);
|
||||
char *lex = NULL;
|
||||
int llen = 0,
|
||||
type = 0;
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
|
||||
st=(PrsStorage*)palloc( sizeof(PrsStorage) );
|
||||
st->cur=0;
|
||||
st->len=16;
|
||||
st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
|
||||
st = (PrsStorage *) palloc(sizeof(PrsStorage));
|
||||
st->cur = 0;
|
||||
st->len = 16;
|
||||
st->list = (LexemEntry *) palloc(sizeof(LexemEntry) * st->len);
|
||||
|
||||
prs->prs = (void*)DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prs->start_info),
|
||||
PointerGetDatum(VARDATA(txt)),
|
||||
Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
|
||||
)
|
||||
);
|
||||
prs->prs = (void *) DatumGetPointer(
|
||||
FunctionCall2(
|
||||
&(prs->start_info),
|
||||
PointerGetDatum(VARDATA(txt)),
|
||||
Int32GetDatum(VARSIZE(txt) - VARHDRSZ)
|
||||
)
|
||||
);
|
||||
|
||||
while( ( type=DatumGetInt32(FunctionCall3(
|
||||
&(prs->getlexeme_info),
|
||||
PointerGetDatum(prs->prs),
|
||||
PointerGetDatum(&lex),
|
||||
PointerGetDatum(&llen))) ) != 0 ) {
|
||||
while ((type = DatumGetInt32(FunctionCall3(
|
||||
&(prs->getlexeme_info),
|
||||
PointerGetDatum(prs->prs),
|
||||
PointerGetDatum(&lex),
|
||||
PointerGetDatum(&llen)))) != 0)
|
||||
{
|
||||
|
||||
if ( st->cur>=st->len ) {
|
||||
st->len=2*st->len;
|
||||
st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
|
||||
if (st->cur >= st->len)
|
||||
{
|
||||
st->len = 2 * st->len;
|
||||
st->list = (LexemEntry *) repalloc(st->list, sizeof(LexemEntry) * st->len);
|
||||
}
|
||||
st->list[st->cur].lexem = palloc(llen+1);
|
||||
memcpy( st->list[st->cur].lexem, lex, llen);
|
||||
st->list[st->cur].lexem[llen]='\0';
|
||||
st->list[st->cur].type=type;
|
||||
st->list[st->cur].lexem = palloc(llen + 1);
|
||||
memcpy(st->list[st->cur].lexem, lex, llen);
|
||||
st->list[st->cur].lexem[llen] = '\0';
|
||||
st->list[st->cur].type = type;
|
||||
st->cur++;
|
||||
}
|
||||
|
||||
FunctionCall1(
|
||||
&(prs->end_info),
|
||||
PointerGetDatum(prs->prs)
|
||||
);
|
||||
|
||||
st->len=st->cur;
|
||||
st->cur=0;
|
||||
|
||||
funcctx->user_fctx = (void*)st;
|
||||
FunctionCall1(
|
||||
&(prs->end_info),
|
||||
PointerGetDatum(prs->prs)
|
||||
);
|
||||
|
||||
st->len = st->cur;
|
||||
st->cur = 0;
|
||||
|
||||
funcctx->user_fctx = (void *) st;
|
||||
tupdesc = RelationNameGetTupleDesc("tokenout");
|
||||
funcctx->slot = TupleDescGetSlot(tupdesc);
|
||||
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
|
||||
@ -360,132 +397,148 @@ prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt) {
|
||||
}
|
||||
|
||||
static Datum
|
||||
prs_process_call(FuncCallContext *funcctx) {
|
||||
PrsStorage *st;
|
||||
prs_process_call(FuncCallContext *funcctx)
|
||||
{
|
||||
PrsStorage *st;
|
||||
|
||||
st=(PrsStorage*)funcctx->user_fctx;
|
||||
if ( st->cur < st->len ) {
|
||||
Datum result;
|
||||
char* values[2];
|
||||
char tid[16];
|
||||
HeapTuple tuple;
|
||||
st = (PrsStorage *) funcctx->user_fctx;
|
||||
if (st->cur < st->len)
|
||||
{
|
||||
Datum result;
|
||||
char *values[2];
|
||||
char tid[16];
|
||||
HeapTuple tuple;
|
||||
|
||||
values[0]=tid;
|
||||
sprintf(tid,"%d",st->list[st->cur].type);
|
||||
values[1]=st->list[st->cur].lexem;
|
||||
values[0] = tid;
|
||||
sprintf(tid, "%d", st->list[st->cur].type);
|
||||
values[1] = st->list[st->cur].lexem;
|
||||
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
|
||||
result = TupleGetDatum(funcctx->slot, tuple);
|
||||
|
||||
pfree(values[1]);
|
||||
st->cur++;
|
||||
return result;
|
||||
} else {
|
||||
if ( st->list ) pfree(st->list);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (st->list)
|
||||
pfree(st->list);
|
||||
pfree(st);
|
||||
}
|
||||
return (Datum)0;
|
||||
return (Datum) 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(parse);
|
||||
Datum parse(PG_FUNCTION_ARGS);
|
||||
Datum parse(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
parse(PG_FUNCTION_ARGS) {
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
parse(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
text *txt = PG_GETARG_TEXT_P(1);
|
||||
|
||||
if (SRF_IS_FIRSTCALL()) {
|
||||
text *txt = PG_GETARG_TEXT_P(1);
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
|
||||
PG_FREE_IF_COPY(txt,1);
|
||||
prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
|
||||
if ((result = prs_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(parse_byname);
|
||||
Datum parse_byname(PG_FUNCTION_ARGS);
|
||||
Datum parse_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
parse_byname(PG_FUNCTION_ARGS) {
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
parse_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
text *txt = PG_GETARG_TEXT_P(1);
|
||||
|
||||
if (SRF_IS_FIRSTCALL()) {
|
||||
text *name = PG_GETARG_TEXT_P(0);
|
||||
text *txt = PG_GETARG_TEXT_P(1);
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
|
||||
PG_FREE_IF_COPY(name,0);
|
||||
PG_FREE_IF_COPY(txt,1);
|
||||
prs_setup_firstcall(funcctx, name2id_prs(name), txt);
|
||||
PG_FREE_IF_COPY(name, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
|
||||
if ((result = prs_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(parse_current);
|
||||
Datum parse_current(PG_FUNCTION_ARGS);
|
||||
Datum parse_current(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
parse_current(PG_FUNCTION_ARGS) {
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
parse_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
text *txt = PG_GETARG_TEXT_P(0);
|
||||
|
||||
if (SRF_IS_FIRSTCALL()) {
|
||||
text *txt = PG_GETARG_TEXT_P(0);
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
if ( current_parser_id==InvalidOid )
|
||||
current_parser_id = name2id_prs( char2text("default") );
|
||||
prs_setup_firstcall(funcctx, current_parser_id,txt );
|
||||
PG_FREE_IF_COPY(txt,0);
|
||||
if (current_parser_id == InvalidOid)
|
||||
current_parser_id = name2id_prs(char2text("default"));
|
||||
prs_setup_firstcall(funcctx, current_parser_id, txt);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
|
||||
if ((result = prs_process_call(funcctx)) != (Datum) 0)
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(headline);
|
||||
Datum headline(PG_FUNCTION_ARGS);
|
||||
Datum headline(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
headline(PG_FUNCTION_ARGS) {
|
||||
TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
headline(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSCfgInfo *cfg = findcfg(PG_GETARG_OID(0));
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
|
||||
text *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
|
||||
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
|
||||
HLPRSTEXT prs;
|
||||
text *out;
|
||||
text *out;
|
||||
WParserInfo *prsobj = findprs(cfg->prs_id);
|
||||
|
||||
memset(&prs,0,sizeof(HLPRSTEXT));
|
||||
memset(&prs, 0, sizeof(HLPRSTEXT));
|
||||
prs.lenwords = 32;
|
||||
prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
|
||||
hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
||||
|
||||
|
||||
FunctionCall3(
|
||||
&(prsobj->headline_info),
|
||||
PointerGetDatum(&prs),
|
||||
PointerGetDatum(opt),
|
||||
PointerGetDatum(query)
|
||||
);
|
||||
&(prsobj->headline_info),
|
||||
PointerGetDatum(&prs),
|
||||
PointerGetDatum(opt),
|
||||
PointerGetDatum(query)
|
||||
);
|
||||
|
||||
out = genhl(&prs);
|
||||
|
||||
PG_FREE_IF_COPY(in,1);
|
||||
PG_FREE_IF_COPY(query,2);
|
||||
if ( opt ) PG_FREE_IF_COPY(opt,3);
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
if (opt)
|
||||
PG_FREE_IF_COPY(opt, 3);
|
||||
pfree(prs.words);
|
||||
pfree(prs.startsel);
|
||||
pfree(prs.stopsel);
|
||||
@ -495,35 +548,34 @@ headline(PG_FUNCTION_ARGS) {
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(headline_byname);
|
||||
Datum headline_byname(PG_FUNCTION_ARGS);
|
||||
Datum headline_byname(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
headline_byname(PG_FUNCTION_ARGS) {
|
||||
text *cfg=PG_GETARG_TEXT_P(0);
|
||||
headline_byname(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *cfg = PG_GETARG_TEXT_P(0);
|
||||
|
||||
Datum out=DirectFunctionCall4(
|
||||
headline,
|
||||
ObjectIdGetDatum(name2id_cfg( cfg ) ),
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(2),
|
||||
( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
|
||||
Datum out = DirectFunctionCall4(
|
||||
headline,
|
||||
ObjectIdGetDatum(name2id_cfg(cfg)),
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(2),
|
||||
(PG_NARGS() > 3) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
|
||||
);
|
||||
|
||||
PG_FREE_IF_COPY(cfg,0);
|
||||
PG_RETURN_DATUM(out);
|
||||
PG_FREE_IF_COPY(cfg, 0);
|
||||
PG_RETURN_DATUM(out);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(headline_current);
|
||||
Datum headline_current(PG_FUNCTION_ARGS);
|
||||
Datum headline_current(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
headline_current(PG_FUNCTION_ARGS) {
|
||||
headline_current(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall4(
|
||||
headline,
|
||||
ObjectIdGetDatum(get_currcfg()),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
|
||||
));
|
||||
headline,
|
||||
ObjectIdGetDatum(get_currcfg()),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
(PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -3,26 +3,28 @@
|
||||
#include "postgres.h"
|
||||
#include "fmgr.h"
|
||||
|
||||
typedef struct {
|
||||
Oid prs_id;
|
||||
FmgrInfo start_info;
|
||||
FmgrInfo getlexeme_info;
|
||||
FmgrInfo end_info;
|
||||
FmgrInfo headline_info;
|
||||
Oid lextype;
|
||||
void *prs;
|
||||
} WParserInfo;
|
||||
typedef struct
|
||||
{
|
||||
Oid prs_id;
|
||||
FmgrInfo start_info;
|
||||
FmgrInfo getlexeme_info;
|
||||
FmgrInfo end_info;
|
||||
FmgrInfo headline_info;
|
||||
Oid lextype;
|
||||
void *prs;
|
||||
} WParserInfo;
|
||||
|
||||
void init_prs(Oid id, WParserInfo *prs);
|
||||
WParserInfo* findprs(Oid id);
|
||||
Oid name2id_prs(text *name);
|
||||
void reset_prs(void);
|
||||
void init_prs(Oid id, WParserInfo * prs);
|
||||
WParserInfo *findprs(Oid id);
|
||||
Oid name2id_prs(text *name);
|
||||
void reset_prs(void);
|
||||
|
||||
|
||||
typedef struct {
|
||||
int lexid;
|
||||
char *alias;
|
||||
char *descr;
|
||||
} LexDescr;
|
||||
typedef struct
|
||||
{
|
||||
int lexid;
|
||||
char *alias;
|
||||
char *descr;
|
||||
} LexDescr;
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* default word parser
|
||||
/*
|
||||
* default word parser
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include <errno.h>
|
||||
@ -17,40 +17,44 @@
|
||||
#include "wordparser/deflex.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_lextype);
|
||||
Datum prsd_lextype(PG_FUNCTION_ARGS);
|
||||
Datum prsd_lextype(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
prsd_lextype(PG_FUNCTION_ARGS) {
|
||||
LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
|
||||
int i;
|
||||
Datum
|
||||
prsd_lextype(PG_FUNCTION_ARGS)
|
||||
{
|
||||
LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
|
||||
int i;
|
||||
|
||||
for(i=1;i<=LASTNUM;i++) {
|
||||
descr[i-1].lexid = i;
|
||||
descr[i-1].alias = pstrdup(tok_alias[i]);
|
||||
descr[i-1].descr = pstrdup(lex_descr[i]);
|
||||
for (i = 1; i <= LASTNUM; i++)
|
||||
{
|
||||
descr[i - 1].lexid = i;
|
||||
descr[i - 1].alias = pstrdup(tok_alias[i]);
|
||||
descr[i - 1].descr = pstrdup(lex_descr[i]);
|
||||
}
|
||||
|
||||
descr[LASTNUM].lexid=0;
|
||||
|
||||
|
||||
descr[LASTNUM].lexid = 0;
|
||||
|
||||
PG_RETURN_POINTER(descr);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_start);
|
||||
Datum prsd_start(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_start(PG_FUNCTION_ARGS) {
|
||||
start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
|
||||
Datum prsd_start(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_start(PG_FUNCTION_ARGS)
|
||||
{
|
||||
start_parse_str((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1));
|
||||
PG_RETURN_POINTER(NULL);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_getlexeme);
|
||||
Datum prsd_getlexeme(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_getlexeme(PG_FUNCTION_ARGS) {
|
||||
Datum prsd_getlexeme(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_getlexeme(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
|
||||
char **t=(char**)PG_GETARG_POINTER(1);
|
||||
int *tlen=(int*)PG_GETARG_POINTER(2);
|
||||
int type=tsearch2_yylex();
|
||||
char **t = (char **) PG_GETARG_POINTER(1);
|
||||
int *tlen = (int *) PG_GETARG_POINTER(2);
|
||||
int type = tsearch2_yylex();
|
||||
|
||||
*t = token;
|
||||
*tlen = tokenlen;
|
||||
@ -58,34 +62,39 @@ prsd_getlexeme(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_end);
|
||||
Datum prsd_end(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_end(PG_FUNCTION_ARGS) {
|
||||
Datum prsd_end(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_end(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
|
||||
end_parse();
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
#define LEAVETOKEN(x) ( (x)==12 )
|
||||
#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
|
||||
#define ENDPUNCTOKEN(x) ( (x)==12 )
|
||||
#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
|
||||
#define ENDPUNCTOKEN(x) ( (x)==12 )
|
||||
|
||||
|
||||
#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
|
||||
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
|
||||
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
|
||||
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
|
||||
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
|
||||
|
||||
typedef struct {
|
||||
HLWORD *words;
|
||||
int len;
|
||||
} hlCheck;
|
||||
typedef struct
|
||||
{
|
||||
HLWORD *words;
|
||||
int len;
|
||||
} hlCheck;
|
||||
|
||||
static bool
|
||||
checkcondition_HL(void *checkval, ITEM *val) {
|
||||
int i;
|
||||
for(i=0;i<((hlCheck*)checkval)->len;i++) {
|
||||
if ( ((hlCheck*)checkval)->words[i].item==val )
|
||||
checkcondition_HL(void *checkval, ITEM * val)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ((hlCheck *) checkval)->len; i++)
|
||||
{
|
||||
if (((hlCheck *) checkval)->words[i].item == val)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -93,21 +102,28 @@ checkcondition_HL(void *checkval, ITEM *val) {
|
||||
|
||||
|
||||
static bool
|
||||
hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
|
||||
int i,j;
|
||||
ITEM *item=GETQUERY(query);
|
||||
int pos=*p;
|
||||
*q=0;
|
||||
*p=0x7fffffff;
|
||||
hlCover(HLPRSTEXT * prs, QUERYTYPE * query, int *p, int *q)
|
||||
{
|
||||
int i,
|
||||
j;
|
||||
ITEM *item = GETQUERY(query);
|
||||
int pos = *p;
|
||||
|
||||
for(j=0;j<query->size;j++) {
|
||||
if ( item->type != VAL ) {
|
||||
*q = 0;
|
||||
*p = 0x7fffffff;
|
||||
|
||||
for (j = 0; j < query->size; j++)
|
||||
{
|
||||
if (item->type != VAL)
|
||||
{
|
||||
item++;
|
||||
continue;
|
||||
}
|
||||
for(i=pos;i<prs->curwords;i++) {
|
||||
if ( prs->words[i].item == item ) {
|
||||
if ( i>*q)
|
||||
for (i = pos; i < prs->curwords; i++)
|
||||
{
|
||||
if (prs->words[i].item == item)
|
||||
{
|
||||
if (i > *q)
|
||||
*q = i;
|
||||
break;
|
||||
}
|
||||
@ -115,32 +131,39 @@ hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
|
||||
item++;
|
||||
}
|
||||
|
||||
if ( *q==0 )
|
||||
if (*q == 0)
|
||||
return false;
|
||||
|
||||
item=GETQUERY(query);
|
||||
for(j=0;j<query->size;j++) {
|
||||
if ( item->type != VAL ) {
|
||||
item = GETQUERY(query);
|
||||
for (j = 0; j < query->size; j++)
|
||||
{
|
||||
if (item->type != VAL)
|
||||
{
|
||||
item++;
|
||||
continue;
|
||||
}
|
||||
for(i=*q;i>=pos;i--) {
|
||||
if ( prs->words[i].item == item ) {
|
||||
if ( i<*p )
|
||||
*p=i;
|
||||
for (i = *q; i >= pos; i--)
|
||||
{
|
||||
if (prs->words[i].item == item)
|
||||
{
|
||||
if (i < *p)
|
||||
*p = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
item++;
|
||||
}
|
||||
}
|
||||
|
||||
if ( *p<=*q ) {
|
||||
hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
|
||||
if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) {
|
||||
if (*p <= *q)
|
||||
{
|
||||
hlCheck ch = {&(prs->words[*p]), *q - *p + 1};
|
||||
|
||||
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))
|
||||
return true;
|
||||
} else {
|
||||
else
|
||||
{
|
||||
(*p)++;
|
||||
return hlCover(prs,query,p,q);
|
||||
return hlCover(prs, query, p, q);
|
||||
}
|
||||
}
|
||||
|
||||
@ -148,45 +171,54 @@ hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prsd_headline);
|
||||
Datum prsd_headline(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_headline(PG_FUNCTION_ARGS) {
|
||||
HLPRSTEXT *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
|
||||
text *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
|
||||
QUERYTYPE *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
|
||||
Datum prsd_headline(PG_FUNCTION_ARGS);
|
||||
Datum
|
||||
prsd_headline(PG_FUNCTION_ARGS)
|
||||
{
|
||||
HLPRSTEXT *prs = (HLPRSTEXT *) PG_GETARG_POINTER(0);
|
||||
text *opt = (text *) PG_GETARG_POINTER(1); /* can't be toasted */
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(2); /* can't be toasted */
|
||||
|
||||
/* from opt + start and and tag */
|
||||
int min_words=15;
|
||||
int max_words=35;
|
||||
int shortword=3;
|
||||
int min_words = 15;
|
||||
int max_words = 35;
|
||||
int shortword = 3;
|
||||
|
||||
int p=0,q=0;
|
||||
int bestb=-1,beste=-1;
|
||||
int bestlen=-1;
|
||||
int pose=0, poslen, curlen;
|
||||
int p = 0,
|
||||
q = 0;
|
||||
int bestb = -1,
|
||||
beste = -1;
|
||||
int bestlen = -1;
|
||||
int pose = 0,
|
||||
poslen,
|
||||
curlen;
|
||||
|
||||
int i;
|
||||
int i;
|
||||
|
||||
/*config*/
|
||||
prs->startsel=NULL;
|
||||
prs->stopsel=NULL;
|
||||
if ( opt ) {
|
||||
Map *map,*mptr;
|
||||
|
||||
parse_cfgdict(opt,&map);
|
||||
mptr=map;
|
||||
/* config */
|
||||
prs->startsel = NULL;
|
||||
prs->stopsel = NULL;
|
||||
if (opt)
|
||||
{
|
||||
Map *map,
|
||||
*mptr;
|
||||
|
||||
parse_cfgdict(opt, &map);
|
||||
mptr = map;
|
||||
|
||||
while (mptr && mptr->key)
|
||||
{
|
||||
if (strcasecmp(mptr->key, "MaxWords") == 0)
|
||||
max_words = pg_atoi(mptr->value, 4, 1);
|
||||
else if (strcasecmp(mptr->key, "MinWords") == 0)
|
||||
min_words = pg_atoi(mptr->value, 4, 1);
|
||||
else if (strcasecmp(mptr->key, "ShortWord") == 0)
|
||||
shortword = pg_atoi(mptr->value, 4, 1);
|
||||
else if (strcasecmp(mptr->key, "StartSel") == 0)
|
||||
prs->startsel = pstrdup(mptr->value);
|
||||
else if (strcasecmp(mptr->key, "StopSel") == 0)
|
||||
prs->stopsel = pstrdup(mptr->value);
|
||||
|
||||
while(mptr && mptr->key) {
|
||||
if ( strcasecmp(mptr->key,"MaxWords")==0 )
|
||||
max_words=pg_atoi(mptr->value,4,1);
|
||||
else if ( strcasecmp(mptr->key,"MinWords")==0 )
|
||||
min_words=pg_atoi(mptr->value,4,1);
|
||||
else if ( strcasecmp(mptr->key,"ShortWord")==0 )
|
||||
shortword=pg_atoi(mptr->value,4,1);
|
||||
else if ( strcasecmp(mptr->key,"StartSel")==0 )
|
||||
prs->startsel=pstrdup(mptr->value);
|
||||
else if ( strcasecmp(mptr->key,"StopSel")==0 )
|
||||
prs->stopsel=pstrdup(mptr->value);
|
||||
|
||||
pfree(mptr->key);
|
||||
pfree(mptr->value);
|
||||
|
||||
@ -194,104 +226,118 @@ prsd_headline(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
pfree(map);
|
||||
|
||||
if ( min_words >= max_words )
|
||||
if (min_words >= max_words)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("must be MinWords < MaxWords")));
|
||||
if ( min_words<=0 )
|
||||
if (min_words <= 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("must be MinWords > 0")));
|
||||
if ( shortword<0 )
|
||||
if (shortword < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("must be ShortWord >= 0")));
|
||||
}
|
||||
|
||||
while( hlCover(prs,query,&p,&q) ) {
|
||||
while (hlCover(prs, query, &p, &q))
|
||||
{
|
||||
/* find cover len in words */
|
||||
curlen=0;
|
||||
poslen=0;
|
||||
for(i=p;i<=q && curlen < max_words ; i++) {
|
||||
if ( !NONWORDTOKEN(prs->words[i].type) )
|
||||
curlen = 0;
|
||||
poslen = 0;
|
||||
for (i = p; i <= q && curlen < max_words; i++)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if ( prs->words[i].item && !prs->words[i].repeated )
|
||||
poslen++;
|
||||
pose=i;
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
pose = i;
|
||||
}
|
||||
|
||||
if ( poslen<bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) {
|
||||
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
|
||||
{
|
||||
/* best already finded, so try one more cover */
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( curlen < max_words ) { /* find good end */
|
||||
for(i=i-1 ;i<prs->curwords && curlen<max_words; i++) {
|
||||
if ( i!=q ) {
|
||||
if ( !NONWORDTOKEN(prs->words[i].type) )
|
||||
if (curlen < max_words)
|
||||
{ /* find good end */
|
||||
for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
|
||||
{
|
||||
if (i != q)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
if ( prs->words[i].item && !prs->words[i].repeated )
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen++;
|
||||
}
|
||||
pose=i;
|
||||
if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
|
||||
pose = i;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
if ( curlen>=min_words )
|
||||
if (curlen >= min_words)
|
||||
break;
|
||||
}
|
||||
} else { /* shorter cover :((( */
|
||||
for(;curlen>min_words;i--) {
|
||||
if ( !NONWORDTOKEN(prs->words[i].type) )
|
||||
}
|
||||
else
|
||||
{ /* shorter cover :((( */
|
||||
for (; curlen > min_words; i--)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen--;
|
||||
if ( prs->words[i].item && !prs->words[i].repeated )
|
||||
if (prs->words[i].item && !prs->words[i].repeated)
|
||||
poslen--;
|
||||
pose=i;
|
||||
if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
|
||||
pose = i;
|
||||
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
|
||||
( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
|
||||
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
|
||||
bestb=p; beste=pose;
|
||||
bestlen=poslen;
|
||||
}
|
||||
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
|
||||
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
|
||||
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
|
||||
{
|
||||
bestb = p;
|
||||
beste = pose;
|
||||
bestlen = poslen;
|
||||
}
|
||||
|
||||
p++;
|
||||
}
|
||||
|
||||
if ( bestlen<0 ) {
|
||||
curlen=0;
|
||||
poslen=0;
|
||||
for(i=0;i<prs->curwords && curlen<min_words ; i++) {
|
||||
if ( !NONWORDTOKEN(prs->words[i].type) )
|
||||
if (bestlen < 0)
|
||||
{
|
||||
curlen = 0;
|
||||
poslen = 0;
|
||||
for (i = 0; i < prs->curwords && curlen < min_words; i++)
|
||||
{
|
||||
if (!NONWORDTOKEN(prs->words[i].type))
|
||||
curlen++;
|
||||
pose=i;
|
||||
pose = i;
|
||||
}
|
||||
bestb=0; beste=pose;
|
||||
bestb = 0;
|
||||
beste = pose;
|
||||
}
|
||||
|
||||
for(i=bestb;i<=beste;i++) {
|
||||
if ( prs->words[i].item )
|
||||
prs->words[i].selected=1;
|
||||
if ( prs->words[i].repeated )
|
||||
prs->words[i].skip=1;
|
||||
if ( HLIDIGNORE(prs->words[i].type) )
|
||||
prs->words[i].replace=1;
|
||||
for (i = bestb; i <= beste; i++)
|
||||
{
|
||||
if (prs->words[i].item)
|
||||
prs->words[i].selected = 1;
|
||||
if (prs->words[i].repeated)
|
||||
prs->words[i].skip = 1;
|
||||
if (HLIDIGNORE(prs->words[i].type))
|
||||
prs->words[i].replace = 1;
|
||||
|
||||
prs->words[i].in=1;
|
||||
prs->words[i].in = 1;
|
||||
}
|
||||
|
||||
if (!prs->startsel)
|
||||
prs->startsel=pstrdup("<b>");
|
||||
prs->startsel = pstrdup("<b>");
|
||||
if (!prs->stopsel)
|
||||
prs->stopsel=pstrdup("</b>");
|
||||
prs->startsellen=strlen(prs->startsel);
|
||||
prs->stopsellen=strlen(prs->stopsel);
|
||||
prs->stopsel = pstrdup("</b>");
|
||||
prs->startsellen = strlen(prs->startsel);
|
||||
prs->stopsellen = strlen(prs->stopsel);
|
||||
|
||||
PG_RETURN_POINTER(prs);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user