1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

pgindent run.

This commit is contained in:
Bruce Momjian
2003-08-04 00:43:34 +00:00
parent 63354a0228
commit 089003fb46
554 changed files with 24888 additions and 21245 deletions

View File

@ -4,80 +4,99 @@
#include "ts_cfg.h"
#include "dict.h"
text*
char2text(char* in) {
text *
char2text(char *in)
{
return charl2text(in, strlen(in));
}
text* charl2text(char* in, int len) {
text *out=(text*)palloc(len+VARHDRSZ);
text *
charl2text(char *in, int len)
{
text *out = (text *) palloc(len + VARHDRSZ);
memcpy(VARDATA(out), in, len);
VARATT_SIZEP(out) = len+VARHDRSZ;
VARATT_SIZEP(out) = len + VARHDRSZ;
return out;
}
char
*text2char(text* in) {
char *out=palloc( VARSIZE(in) );
memcpy(out, VARDATA(in), VARSIZE(in)-VARHDRSZ);
out[ VARSIZE(in)-VARHDRSZ ] ='\0';
char
*
text2char(text *in)
{
char *out = palloc(VARSIZE(in));
memcpy(out, VARDATA(in), VARSIZE(in) - VARHDRSZ);
out[VARSIZE(in) - VARHDRSZ] = '\0';
return out;
}
char
*pnstrdup(char* in, int len) {
char *out=palloc( len+1 );
char
*
pnstrdup(char *in, int len)
{
char *out = palloc(len + 1);
memcpy(out, in, len);
out[len]='\0';
out[len] = '\0';
return out;
}
text
*ptextdup(text* in) {
text *out=(text*)palloc( VARSIZE(in) );
memcpy(out,in,VARSIZE(in));
text
*
ptextdup(text *in)
{
text *out = (text *) palloc(VARSIZE(in));
memcpy(out, in, VARSIZE(in));
return out;
}
text
*mtextdup(text* in) {
text *out=(text*)malloc( VARSIZE(in) );
if ( !out )
text
*
mtextdup(text *in)
{
text *out = (text *) malloc(VARSIZE(in));
if (!out)
ts_error(ERROR, "No memory");
memcpy(out,in,VARSIZE(in));
memcpy(out, in, VARSIZE(in));
return out;
}
void
ts_error(int state, const char *format, ...) {
va_list args;
int tlen = 128, len=0;
char *buf;
void
ts_error(int state, const char *format,...)
{
va_list args;
int tlen = 128,
len = 0;
char *buf;
reset_cfg();
reset_dict();
reset_prs();
va_start(args, format);
buf = palloc(tlen);
len = vsnprintf(buf, tlen-1, format, args);
if ( len >= tlen ) {
tlen=len+1;
buf = repalloc( buf, tlen );
vsnprintf(buf, tlen-1, format, args);
len = vsnprintf(buf, tlen - 1, format, args);
if (len >= tlen)
{
tlen = len + 1;
buf = repalloc(buf, tlen);
vsnprintf(buf, tlen - 1, format, args);
}
va_end(args);
/* ?? internal error ?? */
elog(state, "%s", buf);
pfree(buf);
}
int
text_cmp(text *a, text *b) {
if ( VARSIZE(a) == VARSIZE(b) )
return strncmp( VARDATA(a), VARDATA(b), VARSIZE(a)-VARHDRSZ );
return (int)VARSIZE(a) - (int)VARSIZE(b);
int
text_cmp(text *a, text *b)
{
if (VARSIZE(a) == VARSIZE(b))
return strncmp(VARDATA(a), VARDATA(b), VARSIZE(a) - VARHDRSZ);
return (int) VARSIZE(a) - (int) VARSIZE(b);
}

View File

@ -7,18 +7,18 @@
#define PG_NARGS() (fcinfo->nargs)
#endif
text* char2text(char* in);
text* charl2text(char* in, int len);
char *text2char(text* in);
char *pnstrdup(char* in, int len);
text *ptextdup(text* in);
text *mtextdup(text* in);
text *char2text(char *in);
text *charl2text(char *in, int len);
char *text2char(text *in);
char *pnstrdup(char *in, int len);
text *ptextdup(text *in);
text *mtextdup(text *in);
int text_cmp(text *a, text *b);
int text_cmp(text *a, text *b);
#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
#define ARRNELEMS(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
void ts_error(int state, const char *format, ...);
void ts_error(int state, const char *format,...);
#endif

View File

@ -1,5 +1,5 @@
/*
* interface functions to dictionary
/*
* interface functions to dictionary
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
@ -19,260 +19,285 @@
/*********top interface**********/
static void *plan_getdict=NULL;
static void *plan_getdict = NULL;
void
init_dict(Oid id, DictInfo *dict) {
Oid arg[1]={ OIDOID };
bool isnull;
Datum pars[1]={ ObjectIdGetDatum(id) };
int stat;
init_dict(Oid id, DictInfo * dict)
{
Oid arg[1] = {OIDOID};
bool isnull;
Datum pars[1] = {ObjectIdGetDatum(id)};
int stat;
memset(dict,0,sizeof(DictInfo));
memset(dict, 0, sizeof(DictInfo));
SPI_connect();
if ( !plan_getdict ) {
plan_getdict = SPI_saveplan( SPI_prepare( "select dict_init, dict_initoption, dict_lexize from pg_ts_dict where oid = $1" , 1, arg ) );
if ( !plan_getdict )
if (!plan_getdict)
{
plan_getdict = SPI_saveplan(SPI_prepare("select dict_init, dict_initoption, dict_lexize from pg_ts_dict where oid = $1", 1, arg));
if (!plan_getdict)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getdict, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 ) {
Datum opt;
Oid oid=InvalidOid;
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
if ( !(isnull || oid==InvalidOid) ) {
opt=SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
dict->dictionary=(void*)DatumGetPointer(OidFunctionCall1(oid, opt));
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
{
Datum opt;
Oid oid = InvalidOid;
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
if (!(isnull || oid == InvalidOid))
{
opt = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
dict->dictionary = (void *) DatumGetPointer(OidFunctionCall1(oid, opt));
}
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
if ( isnull || oid==InvalidOid )
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
if (isnull || oid == InvalidOid)
ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
dict->dict_id=id;
} else
dict->dict_id = id;
}
else
ts_error(ERROR, "No dictionary with id %d", id);
SPI_finish();
}
typedef struct {
DictInfo *last_dict;
int len;
int reallen;
DictInfo *list;
typedef struct
{
DictInfo *last_dict;
int len;
int reallen;
DictInfo *list;
SNMap name2id_map;
} DictList;
} DictList;
static DictList DList = {NULL,0,0,NULL,{0,0,NULL}};
static DictList DList = {NULL, 0, 0, NULL, {0, 0, NULL}};
void
reset_dict(void) {
freeSNMap( &(DList.name2id_map) );
reset_dict(void)
{
freeSNMap(&(DList.name2id_map));
/* XXX need to free DList.list[*].dictionary */
if ( DList.list )
if (DList.list)
free(DList.list);
memset(&DList,0,sizeof(DictList));
memset(&DList, 0, sizeof(DictList));
}
static int
comparedict(const void *a, const void *b) {
return ((DictInfo*)a)->dict_id - ((DictInfo*)b)->dict_id;
comparedict(const void *a, const void *b)
{
return ((DictInfo *) a)->dict_id - ((DictInfo *) b)->dict_id;
}
DictInfo *
finddict(Oid id) {
finddict(Oid id)
{
/* last used dict */
if ( DList.last_dict && DList.last_dict->dict_id==id )
if (DList.last_dict && DList.last_dict->dict_id == id)
return DList.last_dict;
/* already used dict */
if ( DList.len != 0 ) {
DictInfo key;
key.dict_id=id;
if (DList.len != 0)
{
DictInfo key;
key.dict_id = id;
DList.last_dict = bsearch(&key, DList.list, DList.len, sizeof(DictInfo), comparedict);
if ( DList.last_dict != NULL )
if (DList.last_dict != NULL)
return DList.last_dict;
}
/* last chance */
if ( DList.len==DList.reallen ) {
DictInfo *tmp;
int reallen = ( DList.reallen ) ? 2*DList.reallen : 16;
tmp=(DictInfo*)realloc(DList.list,sizeof(DictInfo)*reallen);
if ( !tmp )
ts_error(ERROR,"No memory");
DList.reallen=reallen;
DList.list=tmp;
if (DList.len == DList.reallen)
{
DictInfo *tmp;
int reallen = (DList.reallen) ? 2 * DList.reallen : 16;
tmp = (DictInfo *) realloc(DList.list, sizeof(DictInfo) * reallen);
if (!tmp)
ts_error(ERROR, "No memory");
DList.reallen = reallen;
DList.list = tmp;
}
DList.last_dict=&(DList.list[DList.len]);
DList.last_dict = &(DList.list[DList.len]);
init_dict(id, DList.last_dict);
DList.len++;
qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
return finddict(id); /* qsort changed order!! */;
return finddict(id); /* qsort changed order!! */ ;
}
static void *plan_name2id=NULL;
static void *plan_name2id = NULL;
Oid
name2id_dict(text *name) {
Oid arg[1]={ TEXTOID };
bool isnull;
Datum pars[1]={ PointerGetDatum(name) };
int stat;
Oid id=findSNMap_t( &(DList.name2id_map), name );
name2id_dict(text *name)
{
Oid arg[1] = {TEXTOID};
bool isnull;
Datum pars[1] = {PointerGetDatum(name)};
int stat;
Oid id = findSNMap_t(&(DList.name2id_map), name);
if ( id )
if (id)
return id;
SPI_connect();
if ( !plan_name2id ) {
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_dict where dict_name = $1" , 1, arg ) );
if ( !plan_name2id )
if (!plan_name2id)
{
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_dict where dict_name = $1", 1, arg));
if (!plan_name2id)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 )
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
else
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
else
ts_error(ERROR, "No dictionary with name '%s'", text2char(name));
SPI_finish();
addSNMap_t( &(DList.name2id_map), name, id );
addSNMap_t(&(DList.name2id_map), name, id);
return id;
}
/******sql-level interface******/
PG_FUNCTION_INFO_V1(lexize);
Datum lexize(PG_FUNCTION_ARGS);
Datum lexize(PG_FUNCTION_ARGS);
Datum
lexize(PG_FUNCTION_ARGS) {
text *in=PG_GETARG_TEXT_P(1);
DictInfo *dict = finddict( PG_GETARG_OID(0) );
char **res, **ptr;
Datum *da;
ArrayType *a;
lexize(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(1);
DictInfo *dict = finddict(PG_GETARG_OID(0));
char **res,
**ptr;
Datum *da;
ArrayType *a;
ptr = res = (char**)DatumGetPointer(
FunctionCall3(&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(VARDATA(in)),
Int32GetDatum(VARSIZE(in)-VARHDRSZ)
)
);
ptr = res = (char **) DatumGetPointer(
FunctionCall3(&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(VARDATA(in)),
Int32GetDatum(VARSIZE(in) - VARHDRSZ)
)
);
PG_FREE_IF_COPY(in, 1);
if ( !res ) {
if (PG_NARGS() > 2)
if (!res)
{
if (PG_NARGS() > 2)
PG_RETURN_POINTER(NULL);
else
PG_RETURN_NULL();
}
while(*ptr) ptr++;
da = (Datum*)palloc(sizeof(Datum)*(ptr-res+1));
ptr=res;
while(*ptr) {
da[ ptr-res ] = PointerGetDatum( char2text(*ptr) );
while (*ptr)
ptr++;
da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1));
ptr = res;
while (*ptr)
{
da[ptr - res] = PointerGetDatum(char2text(*ptr));
ptr++;
}
a = construct_array(
da,
ptr-res,
TEXTOID,
-1,
false,
'i'
);
da,
ptr - res,
TEXTOID,
-1,
false,
'i'
);
ptr=res;
while(*ptr) {
pfree( DatumGetPointer(da[ ptr-res ]) );
pfree( *ptr );
ptr = res;
while (*ptr)
{
pfree(DatumGetPointer(da[ptr - res]));
pfree(*ptr);
ptr++;
}
pfree(res);
pfree(da);
PG_RETURN_POINTER(a);
PG_RETURN_POINTER(a);
}
PG_FUNCTION_INFO_V1(lexize_byname);
Datum lexize_byname(PG_FUNCTION_ARGS);
Datum
lexize_byname(PG_FUNCTION_ARGS) {
text *dictname=PG_GETARG_TEXT_P(0);
Datum res;
Datum lexize_byname(PG_FUNCTION_ARGS);
Datum
lexize_byname(PG_FUNCTION_ARGS)
{
text *dictname = PG_GETARG_TEXT_P(0);
Datum res;
strdup("simple");
res=DirectFunctionCall3(
lexize,
ObjectIdGetDatum(name2id_dict(dictname)),
PG_GETARG_DATUM(1),
(Datum)0
);
res = DirectFunctionCall3(
lexize,
ObjectIdGetDatum(name2id_dict(dictname)),
PG_GETARG_DATUM(1),
(Datum) 0
);
PG_FREE_IF_COPY(dictname, 0);
if (res)
PG_RETURN_DATUM(res);
else
if (res)
PG_RETURN_DATUM(res);
else
PG_RETURN_NULL();
}
static Oid currect_dictionary_id=0;
static Oid currect_dictionary_id = 0;
PG_FUNCTION_INFO_V1(set_curdict);
Datum set_curdict(PG_FUNCTION_ARGS);
Datum set_curdict(PG_FUNCTION_ARGS);
Datum
set_curdict(PG_FUNCTION_ARGS) {
set_curdict(PG_FUNCTION_ARGS)
{
finddict(PG_GETARG_OID(0));
currect_dictionary_id=PG_GETARG_OID(0);
currect_dictionary_id = PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curdict_byname);
Datum set_curdict_byname(PG_FUNCTION_ARGS);
Datum set_curdict_byname(PG_FUNCTION_ARGS);
Datum
set_curdict_byname(PG_FUNCTION_ARGS) {
text *dictname=PG_GETARG_TEXT_P(0);
set_curdict_byname(PG_FUNCTION_ARGS)
{
text *dictname = PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curdict,
ObjectIdGetDatum( name2id_dict(dictname) )
);
set_curdict,
ObjectIdGetDatum(name2id_dict(dictname))
);
PG_FREE_IF_COPY(dictname, 0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(lexize_bycurrent);
Datum lexize_bycurrent(PG_FUNCTION_ARGS);
Datum
lexize_bycurrent(PG_FUNCTION_ARGS) {
Datum res;
if ( currect_dictionary_id == 0 )
Datum lexize_bycurrent(PG_FUNCTION_ARGS);
Datum
lexize_bycurrent(PG_FUNCTION_ARGS)
{
Datum res;
if (currect_dictionary_id == 0)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("no currect dictionary"),
errhint("Execute select set_curdict().")));
res = DirectFunctionCall3(
lexize,
ObjectIdGetDatum(currect_dictionary_id),
PG_GETARG_DATUM(0),
(Datum)0
);
if (res)
lexize,
ObjectIdGetDatum(currect_dictionary_id),
PG_GETARG_DATUM(0),
(Datum) 0
);
if (res)
PG_RETURN_DATUM(res);
else
else
PG_RETURN_NULL();
}

View File

@ -3,36 +3,39 @@
#include "postgres.h"
#include "fmgr.h"
typedef struct {
int len;
char **stop;
char* (*wordop)(char*);
} StopList;
typedef struct
{
int len;
char **stop;
char *(*wordop) (char *);
} StopList;
void sortstoplist(StopList *s);
void freestoplist(StopList *s);
void readstoplist(text *in, StopList *s);
bool searchstoplist(StopList *s, char *key);
char* lowerstr(char *str);
void sortstoplist(StopList * s);
void freestoplist(StopList * s);
void readstoplist(text *in, StopList * s);
bool searchstoplist(StopList * s, char *key);
char *lowerstr(char *str);
typedef struct {
Oid dict_id;
FmgrInfo lexize_info;
void *dictionary;
} DictInfo;
typedef struct
{
Oid dict_id;
FmgrInfo lexize_info;
void *dictionary;
} DictInfo;
void init_dict(Oid id, DictInfo *dict);
DictInfo* finddict(Oid id);
Oid name2id_dict(text *name);
void reset_dict(void);
void init_dict(Oid id, DictInfo * dict);
DictInfo *finddict(Oid id);
Oid name2id_dict(text *name);
void reset_dict(void);
/* simple parser of cfg string */
typedef struct {
char *key;
char *value;
} Map;
typedef struct
{
char *key;
char *value;
} Map;
void parse_cfgdict(text *in, Map **m);
void parse_cfgdict(text *in, Map ** m);
#endif

View File

@ -1,5 +1,5 @@
/*
* example of dictionary
/*
* example of dictionary
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
@ -11,30 +11,35 @@
#include "dict.h"
#include "common.h"
typedef struct {
typedef struct
{
StopList stoplist;
} DictExample;
} DictExample;
PG_FUNCTION_INFO_V1(dex_init);
Datum dex_init(PG_FUNCTION_ARGS);
Datum dex_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(dex_lexize);
Datum dex_lexize(PG_FUNCTION_ARGS);
Datum dex_lexize(PG_FUNCTION_ARGS);
Datum
dex_init(PG_FUNCTION_ARGS) {
DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
Datum
dex_init(PG_FUNCTION_ARGS)
{
DictExample *d = (DictExample *) malloc(sizeof(DictExample));
if ( !d )
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(d,0,sizeof(DictExample));
memset(d, 0, sizeof(DictExample));
d->stoplist.wordop = lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
d->stoplist.wordop=lowerstr;
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
@ -44,18 +49,21 @@ dex_init(PG_FUNCTION_ARGS) {
}
Datum
dex_lexize(PG_FUNCTION_ARGS) {
DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
char **res=palloc(sizeof(char*)*2);
dex_lexize(PG_FUNCTION_ARGS)
{
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
char **res = palloc(sizeof(char *) * 2);
if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
res[0]=NULL;
} else
res[0]=txt;
res[1]=NULL;
res[0] = NULL;
}
else
res[0] = txt;
res[1] = NULL;
PG_RETURN_POINTER(res);
}

View File

@ -1,4 +1,4 @@
/*
/*
* ISpell interface
* Teodor Sigaev <teodor@sigaev.ru>
*/
@ -12,96 +12,117 @@
#include "common.h"
#include "ispell/spell.h"
typedef struct {
typedef struct
{
StopList stoplist;
IspellDict obj;
} DictISpell;
} DictISpell;
PG_FUNCTION_INFO_V1(spell_init);
Datum spell_init(PG_FUNCTION_ARGS);
Datum spell_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(spell_lexize);
Datum spell_lexize(PG_FUNCTION_ARGS);
Datum spell_lexize(PG_FUNCTION_ARGS);
static void
freeDictISpell(DictISpell *d) {
freeDictISpell(DictISpell * d)
{
FreeIspell(&(d->obj));
freestoplist(&(d->stoplist));
free(d);
}
Datum
spell_init(PG_FUNCTION_ARGS) {
DictISpell *d;
Map *cfg, *pcfg;
text *in;
bool affloaded=false, dictloaded=false, stoploaded=false;
Datum
spell_init(PG_FUNCTION_ARGS)
{
DictISpell *d;
Map *cfg,
*pcfg;
text *in;
bool affloaded = false,
dictloaded = false,
stoploaded = false;
if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL )
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("ISpell confguration error")));
d = (DictISpell*)malloc( sizeof(DictISpell) );
if ( !d )
d = (DictISpell *) malloc(sizeof(DictISpell));
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(d,0,sizeof(DictISpell));
d->stoplist.wordop=lowerstr;
memset(d, 0, sizeof(DictISpell));
d->stoplist.wordop = lowerstr;
in = PG_GETARG_TEXT_P(0);
parse_cfgdict(in,&cfg);
parse_cfgdict(in, &cfg);
PG_FREE_IF_COPY(in, 0);
pcfg=cfg;
while(pcfg->key) {
if ( strcasecmp("DictFile", pcfg->key) == 0 ) {
if ( dictloaded ) {
pcfg = cfg;
while (pcfg->key)
{
if (strcasecmp("DictFile", pcfg->key) == 0)
{
if (dictloaded)
{
freeDictISpell(d);
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("dictionary already loaded")));
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("dictionary already loaded")));
}
if ( ImportDictionary(&(d->obj), pcfg->value) ) {
if (ImportDictionary(&(d->obj), pcfg->value))
{
freeDictISpell(d);
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not load dictionary file \"%s\"",
pcfg->value)));
}
dictloaded=true;
} else if ( strcasecmp("AffFile", pcfg->key) == 0 ) {
if ( affloaded ) {
dictloaded = true;
}
else if (strcasecmp("AffFile", pcfg->key) == 0)
{
if (affloaded)
{
freeDictISpell(d);
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("affixes already loaded")));
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("affixes already loaded")));
}
if ( ImportAffixes(&(d->obj), pcfg->value) ) {
if (ImportAffixes(&(d->obj), pcfg->value))
{
freeDictISpell(d);
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not load affix file \"%s\"",
pcfg->value)));
}
affloaded=true;
} else if ( strcasecmp("StopFile", pcfg->key) == 0 ) {
text *tmp=char2text(pcfg->value);
if ( stoploaded ) {
affloaded = true;
}
else if (strcasecmp("StopFile", pcfg->key) == 0)
{
text *tmp = char2text(pcfg->value);
if (stoploaded)
{
freeDictISpell(d);
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("stop words already loaded")));
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("stop words already loaded")));
}
readstoplist(tmp, &(d->stoplist));
sortstoplist(&(d->stoplist));
pfree(tmp);
stoploaded=true;
} else {
stoploaded = true;
}
else
{
freeDictISpell(d);
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized option: %s => %s",
pcfg->key, pcfg->value)));
pcfg->key, pcfg->value)));
}
pfree(pcfg->key);
pfree(pcfg->value);
@ -109,15 +130,20 @@ spell_init(PG_FUNCTION_ARGS) {
}
pfree(cfg);
if ( affloaded && dictloaded ) {
if (affloaded && dictloaded)
{
SortDictionary(&(d->obj));
SortAffixes(&(d->obj));
} else if ( !affloaded ) {
}
else if (!affloaded)
{
freeDictISpell(d);
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("no affixes")));
} else {
}
else
{
freeDictISpell(d);
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
@ -128,37 +154,43 @@ spell_init(PG_FUNCTION_ARGS) {
}
Datum
spell_lexize(PG_FUNCTION_ARGS) {
DictISpell *d = (DictISpell*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt;
char **res;
char **ptr, **cptr;
spell_lexize(PG_FUNCTION_ARGS)
{
DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt;
char **res;
char **ptr,
**cptr;
if ( !PG_GETARG_INT32(2) )
if (!PG_GETARG_INT32(2))
PG_RETURN_POINTER(NULL);
res=palloc(sizeof(char*)*2);
res = palloc(sizeof(char *) * 2);
txt = pnstrdup(in, PG_GETARG_INT32(2));
res=NormalizeWord(&(d->obj), txt);
res = NormalizeWord(&(d->obj), txt);
pfree(txt);
if ( res==NULL )
if (res == NULL)
PG_RETURN_POINTER(NULL);
ptr=cptr=res;
while(*ptr) {
if ( searchstoplist(&(d->stoplist),*ptr) ) {
ptr = cptr = res;
while (*ptr)
{
if (searchstoplist(&(d->stoplist), *ptr))
{
pfree(*ptr);
*ptr=NULL;
*ptr = NULL;
ptr++;
}
else
{
*cptr = *ptr;
cptr++;
ptr++;
} else {
*cptr=*ptr;
cptr++; ptr++;
}
}
*cptr=NULL;
*cptr = NULL;
PG_RETURN_POINTER(res);
}

View File

@ -1,6 +1,6 @@
/*
/*
* example of Snowball dictionary
* http://snowball.tartarus.org/
* http://snowball.tartarus.org/
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <stdlib.h>
@ -14,103 +14,118 @@
#include "snowball/english_stem.h"
#include "snowball/russian_stem.h"
typedef struct {
typedef struct
{
struct SN_env *z;
StopList stoplist;
int (*stem)(struct SN_env * z);
} DictSnowball;
int (*stem) (struct SN_env * z);
} DictSnowball;
PG_FUNCTION_INFO_V1(snb_en_init);
Datum snb_en_init(PG_FUNCTION_ARGS);
Datum snb_en_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_ru_init);
Datum snb_ru_init(PG_FUNCTION_ARGS);
Datum snb_ru_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_lexize);
Datum snb_lexize(PG_FUNCTION_ARGS);
Datum snb_lexize(PG_FUNCTION_ARGS);
Datum
snb_en_init(PG_FUNCTION_ARGS) {
DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
Datum
snb_en_init(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
if ( !d )
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(d,0,sizeof(DictSnowball));
d->stoplist.wordop=lowerstr;
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
text *in = PG_GETARG_TEXT_P(0);
memset(d, 0, sizeof(DictSnowball));
d->stoplist.wordop = lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = english_create_env();
if (!d->z) {
if (!d->z)
{
freestoplist(&(d->stoplist));
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
d->stem=english_stem;
d->stem = english_stem;
PG_RETURN_POINTER(d);
}
Datum
snb_ru_init(PG_FUNCTION_ARGS) {
DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
Datum
snb_ru_init(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) malloc(sizeof(DictSnowball));
if ( !d )
if (!d)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset(d,0,sizeof(DictSnowball));
d->stoplist.wordop=lowerstr;
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
text *in = PG_GETARG_TEXT_P(0);
memset(d, 0, sizeof(DictSnowball));
d->stoplist.wordop = lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
{
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = russian_create_env();
if (!d->z) {
if (!d->z)
{
freestoplist(&(d->stoplist));
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
d->stem=russian_stem;
d->stem = russian_stem;
PG_RETURN_POINTER(d);
}
Datum
snb_lexize(PG_FUNCTION_ARGS) {
DictSnowball *d = (DictSnowball*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
char **res=palloc(sizeof(char*)*2);
snb_lexize(PG_FUNCTION_ARGS)
{
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
char **res = palloc(sizeof(char *) * 2);
if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
res[0]=NULL;
} else {
SN_set_current(d->z, strlen(txt), txt);
(d->stem)(d->z);
if ( d->z->p && d->z->l ) {
txt=repalloc(txt, d->z->l+1);
memcpy( txt, d->z->p, d->z->l);
txt[d->z->l]='\0';
}
res[0]=txt;
res[0] = NULL;
}
res[1]=NULL;
else
{
SN_set_current(d->z, strlen(txt), txt);
(d->stem) (d->z);
if (d->z->p && d->z->l)
{
txt = repalloc(txt, d->z->l + 1);
memcpy(txt, d->z->p, d->z->l);
txt[d->z->l] = '\0';
}
res[0] = txt;
}
res[1] = NULL;
PG_RETURN_POINTER(res);
}

View File

@ -1,4 +1,4 @@
/*
/*
* ISpell interface
* Teodor Sigaev <teodor@sigaev.ru>
*/
@ -13,93 +13,106 @@
#include "common.h"
#define SYNBUFLEN 4096
typedef struct {
char *in;
char *out;
} Syn;
typedef struct
{
char *in;
char *out;
} Syn;
typedef struct {
int len;
Syn *syn;
} DictSyn;
typedef struct
{
int len;
Syn *syn;
} DictSyn;
PG_FUNCTION_INFO_V1(syn_init);
Datum syn_init(PG_FUNCTION_ARGS);
Datum syn_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(syn_lexize);
Datum syn_lexize(PG_FUNCTION_ARGS);
Datum syn_lexize(PG_FUNCTION_ARGS);
static char *
findwrd(char *in, char **end) {
char *start;
findwrd(char *in, char **end)
{
char *start;
*end=NULL;
while(*in && isspace(*in))
*end = NULL;
while (*in && isspace(*in))
in++;
if ( !in )
if (!in)
return NULL;
start=in;
start = in;
while(*in && !isspace(*in))
while (*in && !isspace(*in))
in++;
*end=in;
*end = in;
return start;
}
static int
compareSyn(const void *a, const void *b) {
return strcmp( ((Syn*)a)->in, ((Syn*)b)->in );
compareSyn(const void *a, const void *b)
{
return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
}
Datum
syn_init(PG_FUNCTION_ARGS) {
text *in;
DictSyn *d;
int cur=0;
FILE *fin;
char *filename;
char buf[SYNBUFLEN];
char *starti,*starto,*end=NULL;
int slen;
Datum
syn_init(PG_FUNCTION_ARGS)
{
text *in;
DictSyn *d;
int cur = 0;
FILE *fin;
char *filename;
char buf[SYNBUFLEN];
char *starti,
*starto,
*end = NULL;
int slen;
if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL )
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("NULL config")));
in = PG_GETARG_TEXT_P(0);
if ( VARSIZE(in) - VARHDRSZ == 0 )
if (VARSIZE(in) - VARHDRSZ == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("VOID config")));
filename=text2char(in);
filename = text2char(in);
PG_FREE_IF_COPY(in, 0);
if ( (fin=fopen(filename,"r")) == NULL )
if ((fin = fopen(filename, "r")) == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m",
filename)));
filename)));
d = (DictSyn*)malloc( sizeof(DictSyn) );
if ( !d ) {
d = (DictSyn *) malloc(sizeof(DictSyn));
if (!d)
{
fclose(fin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
memset(d,0,sizeof(DictSyn));
memset(d, 0, sizeof(DictSyn));
while( fgets(buf,SYNBUFLEN,fin) ) {
slen = strlen(buf)-1;
while (fgets(buf, SYNBUFLEN, fin))
{
slen = strlen(buf) - 1;
buf[slen] = '\0';
if ( *buf=='\0' ) continue;
if (cur==d->len) {
d->len = (d->len) ? 2*d->len : 16;
d->syn=(Syn*)realloc( d->syn, sizeof(Syn)*d->len );
if ( !d->syn ) {
if (*buf == '\0')
continue;
if (cur == d->len)
{
d->len = (d->len) ? 2 * d->len : 16;
d->syn = (Syn *) realloc(d->syn, sizeof(Syn) * d->len);
if (!d->syn)
{
fclose(fin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
@ -107,64 +120,66 @@ syn_init(PG_FUNCTION_ARGS) {
}
}
starti=findwrd(buf,&end);
if ( !starti )
starti = findwrd(buf, &end);
if (!starti)
continue;
*end='\0';
if ( end >= buf+slen )
*end = '\0';
if (end >= buf + slen)
continue;
starto= findwrd(end+1, &end);
if ( !starto )
starto = findwrd(end + 1, &end);
if (!starto)
continue;
*end='\0';
*end = '\0';
d->syn[cur].in=strdup(lowerstr(starti));
d->syn[cur].out=strdup(lowerstr(starto));
if ( !(d->syn[cur].in && d->syn[cur].out) ) {
d->syn[cur].in = strdup(lowerstr(starti));
d->syn[cur].out = strdup(lowerstr(starto));
if (!(d->syn[cur].in && d->syn[cur].out))
{
fclose(fin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
cur++;
cur++;
}
fclose(fin);
d->len=cur;
if ( cur>1 )
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
fclose(fin);
d->len = cur;
if (cur > 1)
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
pfree(filename);
PG_RETURN_POINTER(d);
PG_RETURN_POINTER(d);
}
Datum
syn_lexize(PG_FUNCTION_ARGS) {
DictSyn *d = (DictSyn*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
Syn key,*found;
char **res=NULL;
syn_lexize(PG_FUNCTION_ARGS)
{
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
Syn key,
*found;
char **res = NULL;
if ( !PG_GETARG_INT32(2) )
if (!PG_GETARG_INT32(2))
PG_RETURN_POINTER(NULL);
key.out=NULL;
key.in=lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
key.out = NULL;
key.in = lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
found=(Syn*)bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
pfree(key.in);
if ( !found )
if (!found)
PG_RETURN_POINTER(NULL);
res=palloc(sizeof(char*)*2);
res = palloc(sizeof(char *) * 2);
res[0]=pstrdup(found->out);
res[1]=NULL;
res[0] = pstrdup(found->out);
res[1] = NULL;
PG_RETURN_POINTER(res);
PG_RETURN_POINTER(res);
}

File diff suppressed because it is too large Load Diff

View File

@ -4,48 +4,53 @@
#include <sys/types.h>
#include <regex.h>
typedef struct spell_struct {
char * word;
char flag[10];
} SPELL;
typedef struct spell_struct
{
char *word;
char flag[10];
} SPELL;
typedef struct aff_struct {
char flag;
char type;
char mask[33];
char find[16];
char repl[16];
regex_t reg;
size_t replen;
char compile;
} AFFIX;
typedef struct aff_struct
{
char flag;
char type;
char mask[33];
char find[16];
char repl[16];
regex_t reg;
size_t replen;
char compile;
} AFFIX;
typedef struct Tree_struct {
int Left[256], Right[256];
} Tree_struct;
typedef struct Tree_struct
{
int Left[256],
Right[256];
} Tree_struct;
typedef struct {
int maffixes;
int naffixes;
AFFIX * Affix;
typedef struct
{
int maffixes;
int naffixes;
AFFIX *Affix;
int nspell;
int mspell;
SPELL *Spell;
Tree_struct SpellTree;
Tree_struct PrefixTree;
Tree_struct SuffixTree;
int nspell;
int mspell;
SPELL *Spell;
Tree_struct SpellTree;
Tree_struct PrefixTree;
Tree_struct SuffixTree;
} IspellDict;
} IspellDict;
char ** NormalizeWord(IspellDict * Conf,char *word);
int ImportAffixes(IspellDict * Conf, const char *filename);
int ImportDictionary(IspellDict * Conf,const char *filename);
char **NormalizeWord(IspellDict * Conf, char *word);
int ImportAffixes(IspellDict * Conf, const char *filename);
int ImportDictionary(IspellDict * Conf, const char *filename);
int AddSpell(IspellDict * Conf,const char * word,const char *flag);
int AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
void SortDictionary(IspellDict * Conf);
void SortAffixes(IspellDict * Conf);
void FreeIspell (IspellDict *Conf);
int AddSpell(IspellDict * Conf, const char *word, const char *flag);
int AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const char *repl, int type);
void SortDictionary(IspellDict * Conf);
void SortAffixes(IspellDict * Conf);
void FreeIspell(IspellDict * Conf);
#endif

View File

@ -1,5 +1,5 @@
/*
* Simple config parser
/*
* Simple config parser
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <stdlib.h>
@ -16,126 +16,164 @@
#define CS_WAITEQ 2
#define CS_WAITVALUE 3
#define CS_INVALUE 4
#define CS_IN2VALUE 5
#define CS_IN2VALUE 5
#define CS_WAITDELIM 6
#define CS_INESC 7
#define CS_IN2ESC 8
static char *
nstrdup(char *ptr, int len) {
char *res=palloc(len+1), *cptr;
memcpy(res,ptr,len);
res[len]='\0';
nstrdup(char *ptr, int len)
{
char *res = palloc(len + 1),
*cptr;
memcpy(res, ptr, len);
res[len] = '\0';
cptr = ptr = res;
while(*ptr) {
if ( *ptr == '\\' )
while (*ptr)
{
if (*ptr == '\\')
ptr++;
*cptr=*ptr; ptr++; cptr++;
*cptr = *ptr;
ptr++;
cptr++;
}
*cptr='\0';
*cptr = '\0';
return res;
}
void
parse_cfgdict(text *in, Map **m) {
Map *mptr;
char *ptr=VARDATA(in), *begin=NULL;
char num=0;
int state=CS_WAITKEY;
parse_cfgdict(text *in, Map ** m)
{
Map *mptr;
char *ptr = VARDATA(in),
*begin = NULL;
char num = 0;
int state = CS_WAITKEY;
while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
if ( *ptr==',' ) num++;
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
{
if (*ptr == ',')
num++;
ptr++;
}
*m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
memset(mptr, 0, sizeof(Map)*(num+2) );
ptr=VARDATA(in);
while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
if (state==CS_WAITKEY) {
if (isalpha(*ptr)) {
begin=ptr;
state=CS_INKEY;
} else if ( !isspace(*ptr) )
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error"),
errdetail("Syntax error in position %d near \"%c\"",
(int) (ptr-VARDATA(in)), *ptr)));
} else if (state==CS_INKEY) {
if ( isspace(*ptr) ) {
mptr->key=nstrdup(begin, ptr-begin);
state=CS_WAITEQ;
} else if ( *ptr=='=' ) {
mptr->key=nstrdup(begin, ptr-begin);
state=CS_WAITVALUE;
} else if ( !isalpha(*ptr) )
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error"),
errdetail("Syntax error in position %d near \"%c\"",
(int) (ptr-VARDATA(in)), *ptr)));
} else if ( state==CS_WAITEQ ) {
if ( *ptr=='=' )
state=CS_WAITVALUE;
else if ( !isspace(*ptr) )
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error"),
errdetail("Syntax error in position %d near \"%c\"",
(int) (ptr-VARDATA(in)), *ptr)));
} else if ( state==CS_WAITVALUE ) {
if ( *ptr=='"' ) {
begin=ptr+1;
state=CS_INVALUE;
} else if ( !isspace(*ptr) ) {
begin=ptr;
state=CS_IN2VALUE;
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
memset(mptr, 0, sizeof(Map) * (num + 2));
ptr = VARDATA(in);
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
{
if (state == CS_WAITKEY)
{
if (isalpha(*ptr))
{
begin = ptr;
state = CS_INKEY;
}
} else if ( state==CS_INVALUE ) {
if ( *ptr=='"' ) {
mptr->value = nstrdup(begin, ptr-begin);
mptr++;
state=CS_WAITDELIM;
} else if ( *ptr=='\\' )
state=CS_INESC;
} else if ( state==CS_IN2VALUE ) {
if ( isspace(*ptr) || *ptr==',' ) {
mptr->value = nstrdup(begin, ptr-begin);
mptr++;
state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
} else if ( *ptr=='\\' )
state=CS_INESC;
} else if ( state==CS_WAITDELIM ) {
if ( *ptr==',' )
state=CS_WAITKEY;
else if ( !isspace(*ptr) )
else if (!isspace(*ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error"),
errdetail("Syntax error in position %d near \"%c\"",
(int) (ptr-VARDATA(in)), *ptr)));
} else if ( state == CS_INESC ) {
state=CS_INVALUE;
} else if ( state == CS_IN2ESC ) {
state=CS_IN2VALUE;
} else
errdetail("Syntax error in position %d near \"%c\"",
(int) (ptr - VARDATA(in)), *ptr)));
}
else if (state == CS_INKEY)
{
if (isspace(*ptr))
{
mptr->key = nstrdup(begin, ptr - begin);
state = CS_WAITEQ;
}
else if (*ptr == '=')
{
mptr->key = nstrdup(begin, ptr - begin);
state = CS_WAITVALUE;
}
else if (!isalpha(*ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error"),
errdetail("Syntax error in position %d near \"%c\"",
(int) (ptr - VARDATA(in)), *ptr)));
}
else if (state == CS_WAITEQ)
{
if (*ptr == '=')
state = CS_WAITVALUE;
else if (!isspace(*ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error"),
errdetail("Syntax error in position %d near \"%c\"",
(int) (ptr - VARDATA(in)), *ptr)));
}
else if (state == CS_WAITVALUE)
{
if (*ptr == '"')
{
begin = ptr + 1;
state = CS_INVALUE;
}
else if (!isspace(*ptr))
{
begin = ptr;
state = CS_IN2VALUE;
}
}
else if (state == CS_INVALUE)
{
if (*ptr == '"')
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
state = CS_WAITDELIM;
}
else if (*ptr == '\\')
state = CS_INESC;
}
else if (state == CS_IN2VALUE)
{
if (isspace(*ptr) || *ptr == ',')
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
state = (*ptr == ',') ? CS_WAITKEY : CS_WAITDELIM;
}
else if (*ptr == '\\')
state = CS_INESC;
}
else if (state == CS_WAITDELIM)
{
if (*ptr == ',')
state = CS_WAITKEY;
else if (!isspace(*ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error"),
errdetail("Syntax error in position %d near \"%c\"",
(int) (ptr - VARDATA(in)), *ptr)));
}
else if (state == CS_INESC)
state = CS_INVALUE;
else if (state == CS_IN2ESC)
state = CS_IN2VALUE;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("bad parser state"),
errdetail("%d at position %d near \"%c\"",
state, (int) (ptr-VARDATA(in)), *ptr)));
state, (int) (ptr - VARDATA(in)), *ptr)));
ptr++;
}
if (state==CS_IN2VALUE) {
mptr->value = nstrdup(begin, ptr-begin);
if (state == CS_IN2VALUE)
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
} else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) )
}
else if (!(state == CS_WAITDELIM || state == CS_WAITKEY))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unexpected end of line")));
}

View File

@ -99,28 +99,40 @@ typedef struct
TI_IN_STATE valstate;
/* tscfg */
int cfg_id;
int cfg_id;
} QPRS_STATE;
static char*
get_weight(char *buf, int2 *weight) {
static char *
get_weight(char *buf, int2 *weight)
{
*weight = 0;
if ( *buf != ':' )
if (*buf != ':')
return buf;
buf++;
while( *buf ) {
switch(tolower(*buf)) {
case 'a': *weight |= 1<<3; break;
case 'b': *weight |= 1<<2; break;
case 'c': *weight |= 1<<1; break;
case 'd': *weight |= 1; break;
default: return buf;
while (*buf)
{
switch (tolower(*buf))
{
case 'a':
*weight |= 1 << 3;
break;
case 'b':
*weight |= 1 << 2;
break;
case 'c':
*weight |= 1 << 1;
break;
case 'd':
*weight |= 1;
break;
default:
return buf;
}
buf++;
}
return buf;
}
@ -146,11 +158,15 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
state->count++;
(state->buf)++;
return OPEN;
} else if ( *(state->buf) == ':' ) {
}
else if (*(state->buf) == ':')
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("error at start of operand")));
} else if (*(state->buf) != ' ') {
}
else if (*(state->buf) != ' ')
{
state->valstate.prsbuf = state->buf;
state->state = WAITOPERATOR;
if (gettoken_tsvector(&(state->valstate)))
@ -257,7 +273,7 @@ static void
pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
{
int4 count = 0;
PRSTEXT prs;
PRSTEXT prs;
prs.lenwords = 32;
prs.curwords = 0;
@ -266,16 +282,17 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
for(count=0;count<prs.curwords;count++) {
for (count = 0; count < prs.curwords; count++)
{
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
pfree( prs.words[count].word );
pfree(prs.words[count].word);
if (count)
pushquery(state, OPR, (int4) '&', 0, 0, 0 );
}
pushquery(state, OPR, (int4) '&', 0, 0, 0);
}
pfree(prs.words);
/* XXX */
if ( prs.curwords==0 )
if (prs.curwords == 0)
pushval_asis(state, VALTRUE, 0, 0, 0);
}
@ -381,15 +398,18 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
* check weight info
*/
static bool
checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
uint16 len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
while (len--) {
if ( item->weight & ( 1<<ptr->weight ) )
checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item)
{
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
while (len--)
{
if (item->weight & (1 << ptr->weight))
return true;
ptr++;
}
return false;
return false;
}
/*
@ -410,8 +430,8 @@ checkcondition_str(void *checkval, ITEM * val)
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
if (difference == 0)
return ( val->weight && StopMiddle->haspos ) ?
checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
return (val->weight && StopMiddle->haspos) ?
checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
@ -468,7 +488,7 @@ rexectsq(PG_FUNCTION_ARGS)
Datum
exectsq(PG_FUNCTION_ARGS)
{
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
CHKVAL chkval;
bool result;
@ -485,10 +505,10 @@ exectsq(PG_FUNCTION_ARGS)
chkval.values = STRPTR(val);
chkval.operand = GETOPERAND(query);
result = TS_execute(
GETQUERY(query),
&chkval,
true,
checkcondition_str
GETQUERY(query),
&chkval,
true,
checkcondition_str
);
PG_FREE_IF_COPY(val, 0);
@ -534,7 +554,7 @@ findoprnd(ITEM * ptr, int4 *pos)
* input
*/
static QUERYTYPE *
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
{
QPRS_STATE state;
int4 i;
@ -555,7 +575,7 @@ queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int c
state.count = 0;
state.num = 0;
state.str = NULL;
state.cfg_id=cfg_id;
state.cfg_id = cfg_id;
/* init value parser's state */
state.valstate.oprisdelim = true;
@ -678,12 +698,30 @@ infix(INFIX * in, bool first)
}
*(in->cur) = '\'';
in->cur++;
if ( in->curpol->weight ) {
*(in->cur) = ':'; in->cur++;
if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
if ( in->curpol->weight & 1 ) { *(in->cur) = 'D'; in->cur++; }
if (in->curpol->weight)
{
*(in->cur) = ':';
in->cur++;
if (in->curpol->weight & (1 << 3))
{
*(in->cur) = 'A';
in->cur++;
}
if (in->curpol->weight & (1 << 2))
{
*(in->cur) = 'B';
in->cur++;
}
if (in->curpol->weight & (1 << 1))
{
*(in->cur) = 'C';
in->cur++;
}
if (in->curpol->weight & 1)
{
*(in->cur) = 'D';
in->cur++;
}
}
*(in->cur) = '\0';
in->curpol++;
@ -827,15 +865,16 @@ tsquerytree(PG_FUNCTION_ARGS)
}
Datum
to_tsquery(PG_FUNCTION_ARGS) {
text *in = PG_GETARG_TEXT_P(1);
char *str;
to_tsquery(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(1);
char *str;
QUERYTYPE *query;
ITEM *res;
int4 len;
str=text2char(in);
PG_FREE_IF_COPY(in,1);
str = text2char(in);
PG_FREE_IF_COPY(in, 1);
query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
res = clean_fakeval_v2(GETQUERY(query), &len);
@ -851,25 +890,25 @@ to_tsquery(PG_FUNCTION_ARGS) {
}
Datum
to_tsquery_name(PG_FUNCTION_ARGS) {
text *name=PG_GETARG_TEXT_P(0);
Datum res= DirectFunctionCall2(
to_tsquery,
Int32GetDatum( name2id_cfg(name) ),
PG_GETARG_DATUM(1)
to_tsquery_name(PG_FUNCTION_ARGS)
{
text *name = PG_GETARG_TEXT_P(0);
Datum res = DirectFunctionCall2(
to_tsquery,
Int32GetDatum(name2id_cfg(name)),
PG_GETARG_DATUM(1)
);
PG_FREE_IF_COPY(name,1);
PG_FREE_IF_COPY(name, 1);
PG_RETURN_DATUM(res);
}
Datum
to_tsquery_current(PG_FUNCTION_ARGS) {
PG_RETURN_DATUM( DirectFunctionCall2(
to_tsquery,
Int32GetDatum( get_currcfg() ),
PG_GETARG_DATUM(0)
));
to_tsquery_current(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall2(
to_tsquery,
Int32GetDatum(get_currcfg()),
PG_GETARG_DATUM(0)
));
}

View File

@ -16,10 +16,10 @@ typedef struct ITEM
int2 left;
int4 val;
/* user-friendly value, must correlate with WordEntry */
uint32
unused:1,
length:11,
distance:20;
uint32
unused:1,
length:11,
distance:20;
} ITEM;
/*
@ -50,6 +50,6 @@ typedef struct
#define VALFALSE 7
bool TS_execute(ITEM * curitem, void *checkval,
bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
#endif

View File

@ -37,29 +37,35 @@ Datum rank_cd_def(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(get_covers);
Datum get_covers(PG_FUNCTION_ARGS);
static float weights[]={0.1, 0.2, 0.4, 1.0};
static float weights[] = {0.1, 0.2, 0.4, 1.0};
#define wpos(wep) ( w[ ((WordEntryPos*)(wep))->weight ] )
#define DEF_NORM_METHOD 0
#define DEF_NORM_METHOD 0
/*
* Returns a weight of a word collocation
*/
static float4 word_distance ( int4 w ) {
if ( w>100 )
return 1e-30;
static float4
word_distance(int4 w)
{
if (w > 100)
return 1e-30;
return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
}
static int
cnt_length( tsvector *t ) {
WordEntry *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
int len = 0, clen;
cnt_length(tsvector * t)
{
WordEntry *ptr = ARRPTR(t),
*end = (WordEntry *) STRPTR(t);
int len = 0,
clen;
while(ptr < end) {
if ( (clen=POSDATALEN(t, ptr)) == 0 )
while (ptr < end)
{
if ((clen = POSDATALEN(t, ptr)) == 0)
len += 1;
else
len += clen;
@ -70,191 +76,225 @@ cnt_length( tsvector *t ) {
}
static int4
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
if (ptr->len == item->length)
return strncmp(
eval + ptr->pos,
qval + item->distance,
item->length);
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
{
if (ptr->len == item->length)
return strncmp(
eval + ptr->pos,
qval + item->distance,
item->length);
return (ptr->len > item->length) ? 1 : -1;
return (ptr->len > item->length) ? 1 : -1;
}
static WordEntry*
find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry*)STRPTR(t);
WordEntry *StopMiddle;
int difference;
static WordEntry *
find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
{
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
WordEntry *StopMiddle;
int difference;
/* Loop invariant: StopLow <= item < StopHigh */
/* Loop invariant: StopLow <= item < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
if (difference == 0)
return StopMiddle;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
if (difference == 0)
return StopMiddle;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
return NULL;
return NULL;
}
static WordEntryPos POSNULL[]={
{0,0},
{0,MAXENTRYPOS-1}
static WordEntryPos POSNULL[] = {
{0, 0},
{0, MAXENTRYPOS - 1}
};
static float
calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
int i,k,l,p;
WordEntry *entry;
WordEntryPos *post,*ct;
int4 dimt,lenct,dist;
float res=-1.0;
ITEM *item=GETQUERY(q);
calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
{
uint16 **pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
int i,
k,
l,
p;
WordEntry *entry;
WordEntryPos *post,
*ct;
int4 dimt,
lenct,
dist;
float res = -1.0;
ITEM *item = GETQUERY(q);
memset(pos,0,sizeof(uint16**) * q->size);
*(uint16*)POSNULL = lengthof(POSNULL)-1;
memset(pos, 0, sizeof(uint16 **) * q->size);
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
for(i=0; i<q->size; i++) {
if ( item[i].type != VAL )
for (i = 0; i < q->size; i++)
{
if (item[i].type != VAL)
continue;
entry=find_wordentry(t,q,&(item[i]));
if ( !entry )
entry = find_wordentry(t, q, &(item[i]));
if (!entry)
continue;
if ( entry->haspos )
pos[i] = (uint16*)_POSDATAPTR(t,entry);
if (entry->haspos)
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
else
pos[i] = (uint16*)POSNULL;
pos[i] = (uint16 *) POSNULL;
dimt = *(uint16*)(pos[i]);
post = (WordEntryPos*)(pos[i]+1);
for( k=0; k<i; k++ ) {
if ( !pos[k] ) continue;
lenct = *(uint16*)(pos[k]);
ct = (WordEntryPos*)(pos[k]+1);
for(l=0; l<dimt; l++) {
for(p=0; p<lenct; p++) {
dist = abs( post[l].pos - ct[p].pos );
if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
float curw;
if ( !dist ) dist=MAXENTRYPOS;
curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
dimt = *(uint16 *) (pos[i]);
post = (WordEntryPos *) (pos[i] + 1);
for (k = 0; k < i; k++)
{
if (!pos[k])
continue;
lenct = *(uint16 *) (pos[k]);
ct = (WordEntryPos *) (pos[k] + 1);
for (l = 0; l < dimt; l++)
{
for (p = 0; p < lenct; p++)
{
dist = abs(post[l].pos - ct[p].pos);
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
{
float curw;
if (!dist)
dist = MAXENTRYPOS;
curw = sqrt(wpos(&(post[l])) * wpos(&(ct[p])) * word_distance(dist));
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
}
}
}
}
}
pfree(pos);
return res;
return res;
}
static float
calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
WordEntry *entry;
WordEntryPos *post;
int4 dimt,j,i;
float res=-1.0;
ITEM *item=GETQUERY(q);
calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
{
WordEntry *entry;
WordEntryPos *post;
int4 dimt,
j,
i;
float res = -1.0;
ITEM *item = GETQUERY(q);
*(uint16*)POSNULL = lengthof(POSNULL)-1;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
for(i=0; i<q->size; i++) {
if ( item[i].type != VAL )
for (i = 0; i < q->size; i++)
{
if (item[i].type != VAL)
continue;
entry=find_wordentry(t,q,&(item[i]));
if ( !entry )
entry = find_wordentry(t, q, &(item[i]));
if (!entry)
continue;
if ( entry->haspos ) {
dimt = POSDATALEN(t,entry);
post = POSDATAPTR(t,entry);
} else {
dimt = *(uint16*)POSNULL;
post = POSNULL+1;
if (entry->haspos)
{
dimt = POSDATALEN(t, entry);
post = POSDATAPTR(t, entry);
}
else
{
dimt = *(uint16 *) POSNULL;
post = POSNULL + 1;
}
for(j=0;j<dimt;j++) {
if ( res < 0 )
res = wpos( &(post[j]) );
for (j = 0; j < dimt; j++)
{
if (res < 0)
res = wpos(&(post[j]));
else
res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
res = 1.0 - (1.0 - res) * (1.0 - wpos(&(post[j])));
}
}
return res;
}
static float
calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
ITEM *item = GETQUERY(q);
float res=0.0;
calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
{
ITEM *item = GETQUERY(q);
float res = 0.0;
if (!t->size || !q->size)
return 0.0;
res = ( item->type != VAL && item->val == (int4) '&' ) ?
calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
res = (item->type != VAL && item->val == (int4) '&') ?
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if ( res < 0 )
if (res < 0)
res = 1e-20;
switch(method) {
case 0: break;
case 1: res /= log((float)cnt_length(t)); break;
case 2: res /= (float)cnt_length(t); break;
switch (method)
{
case 0:
break;
case 1:
res /= log((float) cnt_length(t));
break;
case 2:
res /= (float) cnt_length(t);
break;
default:
/* internal error */
elog(ERROR,"unrecognized normalization method: %d", method);
}
/* internal error */
elog(ERROR, "unrecognized normalization method: %d", method);
}
return res;
}
Datum
rank(PG_FUNCTION_ARGS) {
rank(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
int method=DEF_NORM_METHOD;
float res=0.0;
float ws[ lengthof(weights) ];
int i;
int method = DEF_NORM_METHOD;
float res = 0.0;
float ws[lengthof(weights)];
int i;
if ( ARR_NDIM(win) != 1 )
if (ARR_NDIM(win) != 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array of weight must be one-dimensional")));
if ( ARRNELEMS(win) < lengthof(weights) )
if (ARRNELEMS(win) < lengthof(weights))
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array of weight is too short")));
for(i=0;i<lengthof(weights);i++) {
ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
if ( ws[ i ] > 1.0 )
for (i = 0; i < lengthof(weights); i++)
{
ws[i] = (((float4 *) ARR_DATA_PTR(win))[i] >= 0) ? ((float4 *) ARR_DATA_PTR(win))[i] : weights[i];
if (ws[i] > 1.0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("weight out of range")));
}
}
if ( PG_NARGS() == 4 )
method=PG_GETARG_INT32(3);
if (PG_NARGS() == 4)
method = PG_GETARG_INT32(3);
res = calc_rank(ws, txt, query, method);
res=calc_rank(ws, txt, query, method);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
@ -262,108 +302,127 @@ rank(PG_FUNCTION_ARGS) {
}
Datum
rank_def(PG_FUNCTION_ARGS) {
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
rank_def(PG_FUNCTION_ARGS)
{
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
float res=0.0;
int method=DEF_NORM_METHOD;
float res = 0.0;
int method = DEF_NORM_METHOD;
if ( PG_NARGS() == 3 )
method=PG_GETARG_INT32(2);
if (PG_NARGS() == 3)
method = PG_GETARG_INT32(2);
res = calc_rank(weights, txt, query, method);
res=calc_rank(weights, txt, query, method);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
typedef struct {
ITEM *item;
int32 pos;
} DocRepresentation;
typedef struct
{
ITEM *item;
int32 pos;
} DocRepresentation;
static int
compareDocR(const void *a, const void *b) {
if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
compareDocR(const void *a, const void *b)
{
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
return 1;
return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
}
typedef struct {
typedef struct
{
DocRepresentation *doc;
int len;
int len;
} ChkDocR;
static bool
checkcondition_DR(void *checkval, ITEM *val) {
DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
checkcondition_DR(void *checkval, ITEM * val)
{
DocRepresentation *ptr = ((ChkDocR *) checkval)->doc;
while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
if ( val == ptr->item )
while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len)
{
if (val == ptr->item)
return true;
ptr++;
}
}
return false;
}
static bool
Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
int i;
DocRepresentation *ptr,*f=(DocRepresentation*)0xffffffff;
ITEM *item=GETQUERY(query);
int lastpos=*pos;
int oldq=*q;
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q)
{
int i;
DocRepresentation *ptr,
*f = (DocRepresentation *) 0xffffffff;
ITEM *item = GETQUERY(query);
int lastpos = *pos;
int oldq = *q;
*p=0x7fffffff;
*q=0;
*p = 0x7fffffff;
*q = 0;
for(i=0; i<query->size; i++) {
if ( item->type != VAL ) {
for (i = 0; i < query->size; i++)
{
if (item->type != VAL)
{
item++;
continue;
}
ptr = doc + *pos;
while(ptr-doc<len) {
if ( ptr->item == item ) {
if ( ptr->pos > *q ) {
while (ptr - doc < len)
{
if (ptr->item == item)
{
if (ptr->pos > *q)
{
*q = ptr->pos;
lastpos= ptr - doc;
}
lastpos = ptr - doc;
}
break;
}
}
ptr++;
}
item++;
}
if (*q==0 )
if (*q == 0)
return false;
if (*q==oldq) { /* already check this pos */
if (*q == oldq)
{ /* already check this pos */
(*pos)++;
return Cover(doc, len, query, pos,p,q);
}
return Cover(doc, len, query, pos, p, q);
}
item=GETQUERY(query);
for(i=0; i<query->size; i++) {
if ( item->type != VAL ) {
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
{
if (item->type != VAL)
{
item++;
continue;
}
ptr = doc + lastpos;
while(ptr>=doc+*pos) {
if ( ptr->item == item ) {
if ( ptr->pos < *p ) {
while (ptr >= doc + *pos)
{
if (ptr->item == item)
{
if (ptr->pos < *p)
{
*p = ptr->pos;
f=ptr;
f = ptr;
}
break;
}
@ -371,106 +430,135 @@ Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *
}
item++;
}
if ( *p<=*q ) {
ChkDocR ch = { f, (doc + lastpos)-f+1 };
*pos = f-doc+1;
if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) {
/*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/
if (*p <= *q)
{
ChkDocR ch = {f, (doc + lastpos) - f + 1};
*pos = f - doc + 1;
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR))
{
/*
* elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p,
* *q);
*/
return true;
} else
return Cover(doc, len, query, pos,p,q);
}
else
return Cover(doc, len, query, pos, p, q);
}
return false;
}
static DocRepresentation*
get_docrep(tsvector *txt, QUERYTYPE *query, int *doclen) {
ITEM *item=GETQUERY(query);
WordEntry *entry;
WordEntryPos *post;
int4 dimt,j,i;
int len=query->size*4,cur=0;
static DocRepresentation *
get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
{
ITEM *item = GETQUERY(query);
WordEntry *entry;
WordEntryPos *post;
int4 dimt,
j,
i;
int len = query->size * 4,
cur = 0;
DocRepresentation *doc;
*(uint16*)POSNULL = lengthof(POSNULL)-1;
doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
for(i=0; i<query->size; i++) {
if ( item[i].type != VAL )
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
for (i = 0; i < query->size; i++)
{
if (item[i].type != VAL)
continue;
entry=find_wordentry(txt,query,&(item[i]));
if ( !entry )
entry = find_wordentry(txt, query, &(item[i]));
if (!entry)
continue;
if ( entry->haspos ) {
dimt = POSDATALEN(txt,entry);
post = POSDATAPTR(txt,entry);
} else {
dimt = *(uint16*)POSNULL;
post = POSNULL+1;
if (entry->haspos)
{
dimt = POSDATALEN(txt, entry);
post = POSDATAPTR(txt, entry);
}
else
{
dimt = *(uint16 *) POSNULL;
post = POSNULL + 1;
}
while( cur+dimt >= len ) {
len*=2;
doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
while (cur + dimt >= len)
{
len *= 2;
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
}
for(j=0;j<dimt;j++) {
doc[cur].item=&(item[i]);
doc[cur].pos=post[j].pos;
for (j = 0; j < dimt; j++)
{
doc[cur].item = &(item[i]);
doc[cur].pos = post[j].pos;
cur++;
}
}
*doclen=cur;
if ( cur>0 ) {
if ( cur>1 )
*doclen = cur;
if (cur > 0)
{
if (cur > 1)
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc;
}
pfree(doc);
return NULL;
}
Datum
rank_cd(PG_FUNCTION_ARGS) {
int K = PG_GETARG_INT32(0);
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
rank_cd(PG_FUNCTION_ARGS)
{
int K = PG_GETARG_INT32(0);
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
int method=DEF_NORM_METHOD;
DocRepresentation *doc;
float res=0.0;
int p=0,q=0,len,cur;
int method = DEF_NORM_METHOD;
DocRepresentation *doc;
float res = 0.0;
int p = 0,
q = 0,
len,
cur;
doc = get_docrep(txt, query, &len);
if ( !doc ) {
if (!doc)
{
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(0.0);
}
cur=0;
if (K<=0)
K=4;
while( Cover(doc, len, query, &cur, &p, &q) )
res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
cur = 0;
if (K <= 0)
K = 4;
while (Cover(doc, len, query, &cur, &p, &q))
res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0;
if ( PG_NARGS() == 4 )
method=PG_GETARG_INT32(3);
if (PG_NARGS() == 4)
method = PG_GETARG_INT32(3);
switch(method) {
case 0: break;
case 1: res /= log((float)cnt_length(txt)); break;
case 2: res /= (float)cnt_length(txt); break;
switch (method)
{
case 0:
break;
case 1:
res /= log((float) cnt_length(txt));
break;
case 2:
res /= (float) cnt_length(txt);
break;
default:
/* internal error */
elog(ERROR,"unrecognized normalization method: %d", method);
}
/* internal error */
elog(ERROR, "unrecognized normalization method: %d", method);
}
pfree(doc);
PG_FREE_IF_COPY(txt, 1);
@ -481,120 +569,141 @@ rank_cd(PG_FUNCTION_ARGS) {
Datum
rank_cd_def(PG_FUNCTION_ARGS) {
PG_RETURN_DATUM( DirectFunctionCall4(
rank_cd,
Int32GetDatum(-1),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
));
rank_cd_def(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(
rank_cd,
Int32GetDatum(-1),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
(PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
));
}
/**************debug*************/
typedef struct {
char *w;
int2 len;
int2 pos;
int2 start;
int2 finish;
} DocWord;
typedef struct
{
char *w;
int2 len;
int2 pos;
int2 start;
int2 finish;
} DocWord;
static int
compareDocWord(const void *a, const void *b) {
if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
compareDocWord(const void *a, const void *b)
{
if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
return 1;
return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
}
Datum
get_covers(PG_FUNCTION_ARGS) {
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
Datum
get_covers(PG_FUNCTION_ARGS)
{
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
WordEntry *pptr=ARRPTR(txt);
int i,dlen=0,j,cur=0,len=0,rlen;
DocWord *dw,*dwptr;
text *out;
char *cptr;
WordEntry *pptr = ARRPTR(txt);
int i,
dlen = 0,
j,
cur = 0,
len = 0,
rlen;
DocWord *dw,
*dwptr;
text *out;
char *cptr;
DocRepresentation *doc;
int pos=0,p,q,olddwpos=0;
int ncover=1;
int pos = 0,
p,
q,
olddwpos = 0;
int ncover = 1;
doc = get_docrep(txt, query, &rlen);
if ( !doc ) {
out=palloc(VARHDRSZ);
if (!doc)
{
out = palloc(VARHDRSZ);
VARATT_SIZEP(out) = VARHDRSZ;
PG_FREE_IF_COPY(txt,0);
PG_FREE_IF_COPY(query,1);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_POINTER(out);
}
for(i=0;i<txt->size;i++) {
for (i = 0; i < txt->size; i++)
{
if (!pptr[i].haspos)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("no pos info")));
dlen += POSDATALEN(txt,&(pptr[i]));
dlen += POSDATALEN(txt, &(pptr[i]));
}
dwptr=dw=palloc(sizeof(DocWord)*dlen);
memset(dw,0,sizeof(DocWord)*dlen);
dwptr = dw = palloc(sizeof(DocWord) * dlen);
memset(dw, 0, sizeof(DocWord) * dlen);
for(i=0;i<txt->size;i++) {
WordEntryPos *posdata = POSDATAPTR(txt,&(pptr[i]));
for(j=0;j<POSDATALEN(txt,&(pptr[i]));j++) {
dw[cur].w=STRPTR(txt)+pptr[i].pos;
dw[cur].len=pptr[i].len;
dw[cur].pos=posdata[j].pos;
for (i = 0; i < txt->size; i++)
{
WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
{
dw[cur].w = STRPTR(txt) + pptr[i].pos;
dw[cur].len = pptr[i].len;
dw[cur].pos = posdata[j].pos;
cur++;
}
len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
}
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
while( Cover(doc, rlen, query, &pos, &p, &q) ) {
dwptr=dw+olddwpos;
while(dwptr->pos < p && dwptr-dw<dlen)
while (Cover(doc, rlen, query, &pos, &p, &q))
{
dwptr = dw + olddwpos;
while (dwptr->pos < p && dwptr - dw < dlen)
dwptr++;
olddwpos=dwptr-dw;
dwptr->start=ncover;
while(dwptr->pos < q+1 && dwptr-dw<dlen)
olddwpos = dwptr - dw;
dwptr->start = ncover;
while (dwptr->pos < q + 1 && dwptr - dw < dlen)
dwptr++;
(dwptr-1)->finish=ncover;
len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
ncover++;
}
out=palloc(VARHDRSZ+len);
cptr=((char*)out)+VARHDRSZ;
dwptr=dw;
(dwptr - 1)->finish = ncover;
len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
ncover++;
}
while( dwptr-dw < dlen) {
if ( dwptr->start ) {
sprintf(cptr,"{%d ",dwptr->start);
cptr=strchr(cptr,'\0');
out = palloc(VARHDRSZ + len);
cptr = ((char *) out) + VARHDRSZ;
dwptr = dw;
while (dwptr - dw < dlen)
{
if (dwptr->start)
{
sprintf(cptr, "{%d ", dwptr->start);
cptr = strchr(cptr, '\0');
}
memcpy(cptr,dwptr->w,dwptr->len);
cptr+=dwptr->len;
*cptr=' ';
memcpy(cptr, dwptr->w, dwptr->len);
cptr += dwptr->len;
*cptr = ' ';
cptr++;
if ( dwptr->finish ) {
sprintf(cptr,"}%d ",dwptr->finish);
cptr=strchr(cptr,'\0');
if (dwptr->finish)
{
sprintf(cptr, "}%d ", dwptr->finish);
cptr = strchr(cptr, '\0');
}
dwptr++;
}
}
VARATT_SIZEP(out) = cptr - ((char *) out);
VARATT_SIZEP(out) = cptr - ((char*)out);
pfree(dw);
pfree(doc);
PG_FREE_IF_COPY(txt,0);
PG_FREE_IF_COPY(query,1);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_POINTER(out);
}

View File

@ -1,4 +1,4 @@
/*
/*
* simple but fast map from str to Oid
* Teodor Sigaev <teodor@sigaev.ru>
*/
@ -11,69 +11,85 @@
#include "common.h"
static int
compareSNMapEntry(const void *a, const void *b) {
return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
compareSNMapEntry(const void *a, const void *b)
{
return strcmp(((SNMapEntry *) a)->key, ((SNMapEntry *) b)->key);
}
void
addSNMap( SNMap *map, char *key, Oid value ) {
if (map->len>=map->reallen) {
void
addSNMap(SNMap * map, char *key, Oid value)
{
if (map->len >= map->reallen)
{
SNMapEntry *tmp;
int len = (map->reallen) ? 2*map->reallen : 16;
tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
if ( !tmp )
int len = (map->reallen) ? 2 * map->reallen : 16;
tmp = (SNMapEntry *) realloc(map->list, sizeof(SNMapEntry) * len);
if (!tmp)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
map->reallen=len;
map->list=tmp;
map->reallen = len;
map->list = tmp;
}
map->list[ map->len ].key = strdup(key);
if ( ! map->list[ map->len ].key )
map->list[map->len].key = strdup(key);
if (!map->list[map->len].key)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
map->list[ map->len ].value=value;
map->list[map->len].value = value;
map->len++;
if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
if (map->len > 1)
qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
}
void
addSNMap_t( SNMap *map, text *key, Oid value ) {
char *k=text2char( key );
void
addSNMap_t(SNMap * map, text *key, Oid value)
{
char *k = text2char(key);
addSNMap(map, k, value);
pfree(k);
}
Oid
findSNMap( SNMap *map, char *key ) {
Oid
findSNMap(SNMap * map, char *key)
{
SNMapEntry *ptr;
SNMapEntry ks = {key, 0};
if ( map->len==0 || !map->list )
return 0;
ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
SNMapEntry ks = {key, 0};
if (map->len == 0 || !map->list)
return 0;
ptr = (SNMapEntry *) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
return (ptr) ? ptr->value : 0;
}
Oid
findSNMap_t( SNMap *map, text *key ) {
char *k=text2char(key);
int res;
res= findSNMap(map, k);
Oid
findSNMap_t(SNMap * map, text *key)
{
char *k = text2char(key);
int res;
res = findSNMap(map, k);
pfree(k);
return res;
}
void freeSNMap( SNMap *map ) {
SNMapEntry *entry=map->list;
if ( map->list ) {
while( map->len ) {
if ( entry->key ) free(entry->key);
entry++; map->len--;
void
freeSNMap(SNMap * map)
{
SNMapEntry *entry = map->list;
if (map->list)
{
while (map->len)
{
if (entry->key)
free(entry->key);
entry++;
map->len--;
}
free( map->list );
free(map->list);
}
memset(map,0,sizeof(SNMap));
memset(map, 0, sizeof(SNMap));
}

View File

@ -3,21 +3,23 @@
#include "postgres.h"
typedef struct {
char *key;
Oid value;
} SNMapEntry;
typedef struct
{
char *key;
Oid value;
} SNMapEntry;
typedef struct {
int len;
int reallen;
SNMapEntry *list;
} SNMap;
typedef struct
{
int len;
int reallen;
SNMapEntry *list;
} SNMap;
void addSNMap( SNMap *map, char *key, Oid value );
void addSNMap_t( SNMap *map, text *key, Oid value );
Oid findSNMap( SNMap *map, char *key );
Oid findSNMap_t( SNMap *map, text *key );
void freeSNMap( SNMap *map );
void addSNMap(SNMap * map, char *key, Oid value);
void addSNMap_t(SNMap * map, text *key, Oid value);
Oid findSNMap(SNMap * map, char *key);
Oid findSNMap_t(SNMap * map, text *key);
void freeSNMap(SNMap * map);
#endif

View File

@ -2,48 +2,64 @@
#include "header.h"
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
{ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
z->p = create_s();
if (S_size)
{ z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
{ int i;
for (i = 0; i < S_size; i++) z->S[i] = create_s();
}
z->S_size = S_size;
}
if (I_size)
{ z->I = (int *) calloc(I_size, sizeof(int));
z->I_size = I_size;
}
if (B_size)
{ z->B = (symbol *) calloc(B_size, sizeof(symbol));
z->B_size = B_size;
}
return z;
}
extern void SN_close_env(struct SN_env * z)
extern struct SN_env *
SN_create_env(int S_size, int I_size, int B_size)
{
if (z->S_size)
{
{ int i;
for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
}
free(z->S);
}
if (z->I_size) free(z->I);
if (z->B_size) free(z->B);
if (z->p) lose_s(z->p);
free(z);
struct SN_env *z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
z->p = create_s();
if (S_size)
{
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
{
int i;
for (i = 0; i < S_size; i++)
z->S[i] = create_s();
}
z->S_size = S_size;
}
if (I_size)
{
z->I = (int *) calloc(I_size, sizeof(int));
z->I_size = I_size;
}
if (B_size)
{
z->B = (symbol *) calloc(B_size, sizeof(symbol));
z->B_size = B_size;
}
return z;
}
extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
extern void
SN_close_env(struct SN_env * z)
{
replace_s(z, 0, z->l, size, s);
z->c = 0;
if (z->S_size)
{
{
int i;
for (i = 0; i < z->S_size; i++)
lose_s(z->S[i]);
}
free(z->S);
}
if (z->I_size)
free(z->I);
if (z->B_size)
free(z->B);
if (z->p)
lose_s(z->p);
free(z);
}
extern void
SN_set_current(struct SN_env * z, int size, const symbol * s)
{
replace_s(z, 0, z->l, size, s);
z->c = 0;
}

View File

@ -11,17 +11,24 @@ typedef unsigned char symbol;
*/
struct SN_env {
symbol * p;
int c; int a; int l; int lb; int bra; int ket;
int S_size; int I_size; int B_size;
symbol * * S;
int * I;
symbol * B;
struct SN_env
{
symbol *p;
int c;
int a;
int l;
int lb;
int bra;
int ket;
int S_size;
int I_size;
int B_size;
symbol **S;
int *I;
symbol *B;
};
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
extern struct SN_env *SN_create_env(int S_size, int I_size, int B_size);
extern void SN_close_env(struct SN_env * z);
extern void SN_set_current(struct SN_env * z, int size, const symbol * s);

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,7 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
extern struct SN_env * english_create_env(void);
extern struct SN_env *english_create_env(void);
extern void english_close_env(struct SN_env * z);
extern int english_stem(struct SN_env * z);
extern int english_stem(struct SN_env * z);

View File

@ -2,41 +2,42 @@
#define HEAD 2*sizeof(int)
#define SIZE(p) ((int *)(p))[-1]
#define SIZE(p) ((int *)(p))[-1]
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
#define CAPACITY(p) ((int *)(p))[-2]
struct among
{ int s_size; /* number of chars in string */
symbol * s; /* search string */
int substring_i;/* index to longest matching substring */
int result; /* result of the lookup */
int (* function)(struct SN_env *);
{
int s_size; /* number of chars in string */
symbol *s; /* search string */
int substring_i; /* index to longest matching substring */
int result; /* result of the lookup */
int (*function) (struct SN_env *);
};
extern symbol * create_s(void);
extern symbol *create_s(void);
extern void lose_s(symbol * p);
extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
extern int in_grouping(struct SN_env * z, unsigned char *s, int min, int max);
extern int in_grouping_b(struct SN_env * z, unsigned char *s, int min, int max);
extern int out_grouping(struct SN_env * z, unsigned char *s, int min, int max);
extern int out_grouping_b(struct SN_env * z, unsigned char *s, int min, int max);
extern int in_range(struct SN_env * z, int min, int max);
extern int in_range_b(struct SN_env * z, int min, int max);
extern int out_range(struct SN_env * z, int min, int max);
extern int out_range_b(struct SN_env * z, int min, int max);
extern int in_range(struct SN_env * z, int min, int max);
extern int in_range_b(struct SN_env * z, int min, int max);
extern int out_range(struct SN_env * z, int min, int max);
extern int out_range_b(struct SN_env * z, int min, int max);
extern int eq_s(struct SN_env * z, int s_size, symbol * s);
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
extern int eq_v(struct SN_env * z, symbol * p);
extern int eq_v_b(struct SN_env * z, symbol * p);
extern int eq_s(struct SN_env * z, int s_size, symbol * s);
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
extern int eq_v(struct SN_env * z, symbol * p);
extern int eq_v_b(struct SN_env * z, symbol * p);
extern int find_among(struct SN_env * z, struct among * v, int v_size);
extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
extern int find_among(struct SN_env * z, struct among * v, int v_size);
extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
extern symbol * increase_size(symbol * p, int n);
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
extern symbol *increase_size(symbol * p, int n);
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
extern void slice_from_v(struct SN_env * z, symbol * p);
extern void slice_del(struct SN_env * z);
@ -44,8 +45,7 @@ extern void slice_del(struct SN_env * z);
extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
extern symbol * slice_to(struct SN_env * z, symbol * p);
extern symbol * assign_to(struct SN_env * z, symbol * p);
extern symbol *slice_to(struct SN_env * z, symbol * p);
extern symbol *assign_to(struct SN_env * z, symbol * p);
extern void debug(struct SN_env * z, int number, int line_count);

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,7 @@
/* This file was generated automatically by the Snowball to ANSI C compiler */
extern struct SN_env * russian_create_env(void);
extern struct SN_env *russian_create_env(void);
extern void russian_close_env(struct SN_env * z);
extern int russian_stem(struct SN_env * z);
extern int russian_stem(struct SN_env * z);

View File

@ -9,320 +9,507 @@
#define CREATE_SIZE 1
extern symbol * create_s(void)
{ symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
CAPACITY(p) = CREATE_SIZE;
SET_SIZE(p, CREATE_SIZE);
return p;
}
extern void lose_s(symbol * p) { free((char *) p - HEAD); }
extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
{ if (z->c >= z->l) return 0;
{ int ch = z->p[z->c];
if
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c++; return 1;
}
extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
{ if (z->c <= z->lb) return 0;
{ int ch = z->p[z->c - 1];
if
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c--; return 1;
}
extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
{ if (z->c >= z->l) return 0;
{ int ch = z->p[z->c];
unless
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c++; return 1;
}
extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
{ if (z->c <= z->lb) return 0;
{ int ch = z->p[z->c - 1];
unless
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c--; return 1;
}
extern int in_range(struct SN_env * z, int min, int max)
{ if (z->c >= z->l) return 0;
{ int ch = z->p[z->c];
if
(ch > max || ch < min) return 0;
}
z->c++; return 1;
}
extern int in_range_b(struct SN_env * z, int min, int max)
{ if (z->c <= z->lb) return 0;
{ int ch = z->p[z->c - 1];
if
(ch > max || ch < min) return 0;
}
z->c--; return 1;
}
extern int out_range(struct SN_env * z, int min, int max)
{ if (z->c >= z->l) return 0;
{ int ch = z->p[z->c];
unless
(ch > max || ch < min) return 0;
}
z->c++; return 1;
}
extern int out_range_b(struct SN_env * z, int min, int max)
{ if (z->c <= z->lb) return 0;
{ int ch = z->p[z->c - 1];
unless
(ch > max || ch < min) return 0;
}
z->c--; return 1;
}
extern int eq_s(struct SN_env * z, int s_size, symbol * s)
{ if (z->l - z->c < s_size ||
memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
z->c += s_size; return 1;
}
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
{ if (z->c - z->lb < s_size ||
memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
z->c -= s_size; return 1;
}
extern int eq_v(struct SN_env * z, symbol * p)
{ return eq_s(z, SIZE(p), p);
}
extern int eq_v_b(struct SN_env * z, symbol * p)
{ return eq_s_b(z, SIZE(p), p);
}
extern int find_among(struct SN_env * z, struct among * v, int v_size)
extern symbol *
create_s(void)
{
int i = 0;
int j = v_size;
symbol *p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
int c = z->c; int l = z->l;
symbol * q = z->p + c;
CAPACITY(p) = CREATE_SIZE;
SET_SIZE(p, CREATE_SIZE);
return p;
}
struct among * w;
extern void lose_s(symbol * p)
{
free((char *) p - HEAD);
}
int common_i = 0;
int common_j = 0;
extern int
in_grouping(struct SN_env * z, unsigned char *s, int min, int max)
{
if (z->c >= z->l)
return 0;
{
int ch = z->p[z->c];
int first_key_inspected = 0;
if
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 0;
}
z->c++;
return 1;
}
while(1)
{ int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; /* smaller */
w = v + k;
{ int i; for (i = common; i < w->s_size; i++)
{ if (c + common == l) { diff = -1; break; }
diff = q[common] - w->s[i];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1)
{ if (i > 0) break; /* v->s has been inspected */
if (j == i) break; /* only one item in v */
extern int
in_grouping_b(struct SN_env * z, unsigned char *s, int min, int max)
{
if (z->c <= z->lb)
return 0;
{
int ch = z->p[z->c - 1];
/* - but now we need to go round once more to get
v->s inspected. This looks messy, but is actually
the optimal approach. */
if
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
return 0;
}
z->c--;
return 1;
}
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1)
{ w = v + i;
if (common_i >= w->s_size)
{ z->c = c + w->s_size;
if (w->function == 0) return w->result;
{ int res = w->function(z);
z->c = c + w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
extern int
out_grouping(struct SN_env * z, unsigned char *s, int min, int max)
{
if (z->c >= z->l)
return 0;
{
int ch = z->p[z->c];
unless
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c++;
return 1;
}
extern int
out_grouping_b(struct SN_env * z, unsigned char *s, int min, int max)
{
if (z->c <= z->lb)
return 0;
{
int ch = z->p[z->c - 1];
unless
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c--;
return 1;
}
extern int
in_range(struct SN_env * z, int min, int max)
{
if (z->c >= z->l)
return 0;
{
int ch = z->p[z->c];
if
(ch > max || ch < min)
return 0;
}
z->c++;
return 1;
}
extern int
in_range_b(struct SN_env * z, int min, int max)
{
if (z->c <= z->lb)
return 0;
{
int ch = z->p[z->c - 1];
if
(ch > max || ch < min)
return 0;
}
z->c--;
return 1;
}
extern int
out_range(struct SN_env * z, int min, int max)
{
if (z->c >= z->l)
return 0;
{
int ch = z->p[z->c];
unless
(ch > max || ch < min) return 0;
}
z->c++;
return 1;
}
extern int
out_range_b(struct SN_env * z, int min, int max)
{
if (z->c <= z->lb)
return 0;
{
int ch = z->p[z->c - 1];
unless
(ch > max || ch < min) return 0;
}
z->c--;
return 1;
}
extern int
eq_s(struct SN_env * z, int s_size, symbol * s)
{
if (z->l - z->c < s_size ||
memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0)
return 0;
z->c += s_size;
return 1;
}
extern int
eq_s_b(struct SN_env * z, int s_size, symbol * s)
{
if (z->c - z->lb < s_size ||
memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0)
return 0;
z->c -= s_size;
return 1;
}
extern int
eq_v(struct SN_env * z, symbol * p)
{
return eq_s(z, SIZE(p), p);
}
extern int
eq_v_b(struct SN_env * z, symbol * p)
{
return eq_s_b(z, SIZE(p), p);
}
extern int
find_among(struct SN_env * z, struct among * v, int v_size)
{
int i = 0;
int j = v_size;
int c = z->c;
int l = z->l;
symbol *q = z->p + c;
struct among *w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while (1)
{
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; /* smaller */
w = v + k;
{
int i;
for (i = common; i < w->s_size; i++)
{
if (c + common == l)
{
diff = -1;
break;
}
diff = q[common] - w->s[i];
if (diff != 0)
break;
common++;
}
}
if (diff < 0)
{
j = k;
common_j = common;
}
else
{
i = k;
common_i = common;
}
if (j - i <= 1)
{
if (i > 0)
break; /* v->s has been inspected */
if (j == i)
break; /* only one item in v */
/*
* - but now we need to go round once more to get v->s
* inspected. This looks messy, but is actually the optimal
* approach.
*/
if (first_key_inspected)
break;
first_key_inspected = 1;
}
}
while (1)
{
w = v + i;
if (common_i >= w->s_size)
{
z->c = c + w->s_size;
if (w->function == 0)
return w->result;
{
int res = w->function(z);
z->c = c + w->s_size;
if (res)
return w->result;
}
}
i = w->substring_i;
if (i < 0)
return 0;
}
}
/* find_among_b is for backwards processing. Same comments apply */
extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
extern int
find_among_b(struct SN_env * z, struct among * v, int v_size)
{
int i = 0;
int j = v_size;
int i = 0;
int j = v_size;
int c = z->c; int lb = z->lb;
symbol * q = z->p + c - 1;
int c = z->c;
int lb = z->lb;
symbol *q = z->p + c - 1;
struct among * w;
struct among *w;
int common_i = 0;
int common_j = 0;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
int first_key_inspected = 0;
while(1)
{ int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
w = v + k;
{ int i; for (i = w->s_size - 1 - common; i >= 0; i--)
{ if (c - common == lb) { diff = -1; break; }
diff = q[- common] - w->s[i];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1)
{ if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1)
{ w = v + i;
if (common_i >= w->s_size)
{ z->c = c - w->s_size;
if (w->function == 0) return w->result;
{ int res = w->function(z);
z->c = c - w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
while (1)
{
int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
w = v + k;
{
int i;
for (i = w->s_size - 1 - common; i >= 0; i--)
{
if (c - common == lb)
{
diff = -1;
break;
}
diff = q[-common] - w->s[i];
if (diff != 0)
break;
common++;
}
}
if (diff < 0)
{
j = k;
common_j = common;
}
else
{
i = k;
common_i = common;
}
if (j - i <= 1)
{
if (i > 0)
break;
if (j == i)
break;
if (first_key_inspected)
break;
first_key_inspected = 1;
}
}
while (1)
{
w = v + i;
if (common_i >= w->s_size)
{
z->c = c - w->s_size;
if (w->function == 0)
return w->result;
{
int res = w->function(z);
z->c = c - w->s_size;
if (res)
return w->result;
}
}
i = w->substring_i;
if (i < 0)
return 0;
}
}
extern symbol * increase_size(symbol * p, int n)
{ int new_size = n + 20;
symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
CAPACITY(q) = new_size;
memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
extern symbol *
increase_size(symbol * p, int n)
{
int new_size = n + 20;
symbol *q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
CAPACITY(q) = new_size;
memmove(q, p, CAPACITY(p) * sizeof(symbol));
lose_s(p);
return q;
}
/* to replace symbols between c_bra and c_ket in z->p by the
s_size symbols at s
*/
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
{ int adjustment = s_size - (c_ket - c_bra);
int len = SIZE(z->p);
if (adjustment != 0)
{ if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
SET_SIZE(z->p, adjustment + len);
z->l += adjustment;
if (z->c >= c_ket) z->c += adjustment; else
if (z->c > c_bra) z->c = c_bra;
}
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
return adjustment;
}
static void slice_check(struct SN_env * z)
extern int
replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
{
if (!(0 <= z->bra &&
z->bra <= z->ket &&
z->ket <= z->l &&
z->l <= SIZE(z->p))) /* this line could be removed */
{
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
exit(1);
}
int adjustment = s_size - (c_ket - c_bra);
int len = SIZE(z->p);
if (adjustment != 0)
{
if (adjustment + len > CAPACITY(z->p))
z->p = increase_size(z->p, adjustment + len);
memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
SET_SIZE(z->p, adjustment + len);
z->l += adjustment;
if (z->c >= c_ket)
z->c += adjustment;
else if (z->c > c_bra)
z->c = c_bra;
}
unless(s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
return adjustment;
}
extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
{ slice_check(z);
replace_s(z, z->bra, z->ket, s_size, s);
static void
slice_check(struct SN_env * z)
{
if (!(0 <= z->bra &&
z->bra <= z->ket &&
z->ket <= z->l &&
z->l <= SIZE(z->p))) /* this line could be removed */
{
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
exit(1);
}
}
extern void slice_from_v(struct SN_env * z, symbol * p)
{ slice_from_s(z, SIZE(p), p);
extern void
slice_from_s(struct SN_env * z, int s_size, symbol * s)
{
slice_check(z);
replace_s(z, z->bra, z->ket, s_size, s);
}
extern void slice_del(struct SN_env * z)
{ slice_from_s(z, 0, 0);
extern void
slice_from_v(struct SN_env * z, symbol * p)
{
slice_from_s(z, SIZE(p), p);
}
extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
{ int adjustment = replace_s(z, bra, ket, s_size, s);
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
extern void
slice_del(struct SN_env * z)
{
slice_from_s(z, 0, 0);
}
extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
{ int adjustment = replace_s(z, bra, ket, SIZE(p), p);
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
extern void
insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
{
int adjustment = replace_s(z, bra, ket, s_size, s);
if (bra <= z->bra)
z->bra += adjustment;
if (bra <= z->ket)
z->ket += adjustment;
}
extern symbol * slice_to(struct SN_env * z, symbol * p)
{ slice_check(z);
{ int len = z->ket - z->bra;
if (CAPACITY(p) < len) p = increase_size(p, len);
memmove(p, z->p + z->bra, len * sizeof(symbol));
SET_SIZE(p, len);
}
return p;
extern void
insert_v(struct SN_env * z, int bra, int ket, symbol * p)
{
int adjustment = replace_s(z, bra, ket, SIZE(p), p);
if (bra <= z->bra)
z->bra += adjustment;
if (bra <= z->ket)
z->ket += adjustment;
}
extern symbol * assign_to(struct SN_env * z, symbol * p)
{ int len = z->l;
if (CAPACITY(p) < len) p = increase_size(p, len);
memmove(p, z->p, len * sizeof(symbol));
SET_SIZE(p, len);
return p;
extern symbol *
slice_to(struct SN_env * z, symbol * p)
{
slice_check(z);
{
int len = z->ket - z->bra;
if (CAPACITY(p) < len)
p = increase_size(p, len);
memmove(p, z->p + z->bra, len * sizeof(symbol));
SET_SIZE(p, len);
}
return p;
}
extern void debug(struct SN_env * z, int number, int line_count)
{ int i;
int limit = SIZE(z->p);
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++)
{ if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
}
printf("'\n");
extern symbol *
assign_to(struct SN_env * z, symbol * p)
{
int len = z->l;
if (CAPACITY(p) < len)
p = increase_size(p, len);
memmove(p, z->p, len * sizeof(symbol));
SET_SIZE(p, len);
return p;
}
extern void
debug(struct SN_env * z, int number, int line_count)
{
int i;
int limit = SIZE(z->p);
/* if (number >= 0) printf("%3d (line %4d): '", number, line_count); */
if (number >= 0)
printf("%3d (line %4d): [%d]'", number, line_count, limit);
for (i = 0; i <= limit; i++)
{
if (z->lb == i)
printf("{");
if (z->bra == i)
printf("[");
if (z->c == i)
printf("|");
if (z->ket == i)
printf("]");
if (z->l == i)
printf("}");
if (i < limit)
{
int ch = z->p[i];
if (ch == 0)
ch = '#';
printf("%c", ch);
}
}
printf("'\n");
}

View File

@ -1,4 +1,4 @@
/*
/*
* stopword library
* Teodor Sigaev <teodor@sigaev.ru>
*/
@ -13,97 +13,114 @@
#define STOPBUFLEN 4096
char*
lowerstr(char *str) {
char *ptr=str;
while(*ptr) {
*ptr = tolower(*(unsigned char*)ptr);
char *
lowerstr(char *str)
{
char *ptr = str;
while (*ptr)
{
*ptr = tolower(*(unsigned char *) ptr);
ptr++;
}
return str;
}
void
freestoplist(StopList *s) {
char **ptr=s->stop;
if ( ptr )
while( *ptr && s->len >0 ) {
freestoplist(StopList * s)
{
char **ptr = s->stop;
if (ptr)
while (*ptr && s->len > 0)
{
free(*ptr);
ptr++; s->len--;
free(s->stop);
}
memset(s,0,sizeof(StopList));
ptr++;
s->len--;
free(s->stop);
}
memset(s, 0, sizeof(StopList));
}
void
readstoplist(text *in, StopList *s) {
char **stop=NULL;
s->len=0;
if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
char *filename=text2char(in);
FILE *hin=NULL;
char buf[STOPBUFLEN];
int reallen=0;
readstoplist(text *in, StopList * s)
{
char **stop = NULL;
if ( (hin=fopen(filename,"r")) == NULL )
s->len = 0;
if (in && VARSIZE(in) - VARHDRSZ > 0)
{
char *filename = text2char(in);
FILE *hin = NULL;
char buf[STOPBUFLEN];
int reallen = 0;
if ((hin = fopen(filename, "r")) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open file \"%s\": %m",
filename)));
filename)));
while( fgets(buf,STOPBUFLEN,hin) ) {
buf[strlen(buf)-1] = '\0';
if ( *buf=='\0' ) continue;
while (fgets(buf, STOPBUFLEN, hin))
{
buf[strlen(buf) - 1] = '\0';
if (*buf == '\0')
continue;
if ( s->len>= reallen ) {
char **tmp;
reallen=(reallen) ? reallen*2 : 16;
tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
if (!tmp) {
if (s->len >= reallen)
{
char **tmp;
reallen = (reallen) ? reallen * 2 : 16;
tmp = (char **) realloc((void *) stop, sizeof(char *) * reallen);
if (!tmp)
{
freestoplist(s);
fclose(hin);
fclose(hin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
stop=tmp;
stop = tmp;
}
stop[s->len]=strdup(buf);
if ( !stop[s->len] ) {
stop[s->len] = strdup(buf);
if (!stop[s->len])
{
freestoplist(s);
fclose(hin);
fclose(hin);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
if ( s->wordop )
stop[s->len]=(s->wordop)(stop[s->len]);
if (s->wordop)
stop[s->len] = (s->wordop) (stop[s->len]);
(s->len)++;
(s->len)++;
}
fclose(hin);
pfree(filename);
pfree(filename);
}
s->stop=stop;
}
s->stop = stop;
}
static int
comparestr(const void *a, const void *b) {
return strcmp( *(char**)a, *(char**)b );
comparestr(const void *a, const void *b)
{
return strcmp(*(char **) a, *(char **) b);
}
void
sortstoplist(StopList *s) {
if (s->stop && s->len>0)
qsort(s->stop, s->len, sizeof(char*), comparestr);
sortstoplist(StopList * s)
{
if (s->stop && s->len > 0)
qsort(s->stop, s->len, sizeof(char *), comparestr);
}
bool
searchstoplist(StopList *s, char *key) {
if ( s->wordop )
key=(*(s->wordop))(key);
return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
searchstoplist(StopList * s, char *key)
{
if (s->wordop)
key = (*(s->wordop)) (key);
return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false;
}

View File

@ -1,5 +1,5 @@
/*
* interface functions to tscfg
/*
* interface functions to tscfg
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
@ -23,263 +23,299 @@
/*********top interface**********/
static void *plan_getcfg_bylocale=NULL;
static void *plan_getcfg=NULL;
static void *plan_getmap=NULL;
static void *plan_name2id=NULL;
static Oid current_cfg_id=0;
static void *plan_getcfg_bylocale = NULL;
static void *plan_getcfg = NULL;
static void *plan_getmap = NULL;
static void *plan_name2id = NULL;
static Oid current_cfg_id = 0;
void
init_cfg(Oid id, TSCfgInfo *cfg) {
Oid arg[2]={ OIDOID, OIDOID };
bool isnull;
Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
int stat,i,j;
text *ptr;
text *prsname=NULL;
MemoryContext oldcontext;
init_cfg(Oid id, TSCfgInfo * cfg)
{
Oid arg[2] = {OIDOID, OIDOID};
bool isnull;
Datum pars[2] = {ObjectIdGetDatum(id), ObjectIdGetDatum(id)};
int stat,
i,
j;
text *ptr;
text *prsname = NULL;
MemoryContext oldcontext;
memset(cfg,0,sizeof(TSCfgInfo));
memset(cfg, 0, sizeof(TSCfgInfo));
SPI_connect();
if ( !plan_getcfg ) {
plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
if ( !plan_getcfg )
if (!plan_getcfg)
{
plan_getcfg = SPI_saveplan(SPI_prepare("select prs_name from pg_ts_cfg where oid = $1", 1, arg));
if (!plan_getcfg)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getcfg, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 ) {
prsname = (text*) DatumGetPointer(
SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
);
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
{
prsname = (text *) DatumGetPointer(
SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
);
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
prsname = ptextdup( prsname );
prsname = ptextdup(prsname);
MemoryContextSwitchTo(oldcontext);
cfg->id=id;
} else
cfg->id = id;
}
else
ts_error(ERROR, "No tsearch cfg with id %d", id);
arg[0]=TEXTOID;
if ( !plan_getmap ) {
plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
if ( !plan_getmap )
arg[0] = TEXTOID;
if (!plan_getmap)
{
plan_getmap = SPI_saveplan(SPI_prepare("select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;", 2, arg));
if (!plan_getmap)
ts_error(ERROR, "SPI_prepare() failed");
}
pars[0]=PointerGetDatum( prsname );
pars[0] = PointerGetDatum(prsname);
stat = SPI_execp(plan_getmap, pars, " ", 0);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed <= 0 )
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed <= 0)
ts_error(ERROR, "No parser with id %d", id);
for(i=0;i<SPI_processed;i++) {
int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
ArrayType *a;
for (i = 0; i < SPI_processed; i++)
{
int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
ArrayType *toasted_a = (ArrayType *) PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
ArrayType *a;
if ( !cfg->map ) {
cfg->len=lexid+1;
cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
if ( !cfg->map )
if (!cfg->map)
{
cfg->len = lexid + 1;
cfg->map = (ListDictionary *) malloc(sizeof(ListDictionary) * cfg->len);
if (!cfg->map)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
memset(cfg->map, 0, sizeof(ListDictionary) * cfg->len);
}
if (isnull)
continue;
a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
if ( ARR_NDIM(a) != 1 )
ts_error(ERROR,"Wrong dimension");
if ( ARRNELEMS(a) < 1 )
a = (ArrayType *) PointerGetDatum(PG_DETOAST_DATUM(DatumGetPointer(toasted_a)));
if (ARR_NDIM(a) != 1)
ts_error(ERROR, "Wrong dimension");
if (ARRNELEMS(a) < 1)
continue;
cfg->map[lexid].len=ARRNELEMS(a);
cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
ptr=(text*)ARR_DATA_PTR(a);
cfg->map[lexid].len = ARRNELEMS(a);
cfg->map[lexid].dict_id = (Datum *) malloc(sizeof(Datum) * cfg->map[lexid].len);
memset(cfg->map[lexid].dict_id, 0, sizeof(Datum) * cfg->map[lexid].len);
ptr = (text *) ARR_DATA_PTR(a);
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
for(j=0;j<cfg->map[lexid].len;j++) {
for (j = 0; j < cfg->map[lexid].len; j++)
{
cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
ptr=NEXTVAL(ptr);
}
ptr = NEXTVAL(ptr);
}
MemoryContextSwitchTo(oldcontext);
if ( a != toasted_a )
if (a != toasted_a)
pfree(a);
}
SPI_finish();
cfg->prs_id = name2id_prs( prsname );
cfg->prs_id = name2id_prs(prsname);
pfree(prsname);
for(i=0;i<cfg->len;i++) {
for(j=0;j<cfg->map[i].len;j++) {
ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
for (i = 0; i < cfg->len; i++)
{
for (j = 0; j < cfg->map[i].len; j++)
{
ptr = (text *) DatumGetPointer(cfg->map[i].dict_id[j]);
cfg->map[i].dict_id[j] = ObjectIdGetDatum(name2id_dict(ptr));
pfree(ptr);
}
}
}
typedef struct {
TSCfgInfo *last_cfg;
int len;
int reallen;
TSCfgInfo *list;
typedef struct
{
TSCfgInfo *last_cfg;
int len;
int reallen;
TSCfgInfo *list;
SNMap name2id_map;
} CFGList;
} CFGList;
static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
static CFGList CList = {NULL, 0, 0, NULL, {0, 0, NULL}};
void
reset_cfg(void) {
freeSNMap( &(CList.name2id_map) );
if ( CList.list ) {
int i,j;
for(i=0;i<CList.len;i++)
if ( CList.list[i].map ) {
for(j=0;j<CList.list[i].len;j++)
if ( CList.list[i].map[j].dict_id )
reset_cfg(void)
{
freeSNMap(&(CList.name2id_map));
if (CList.list)
{
int i,
j;
for (i = 0; i < CList.len; i++)
if (CList.list[i].map)
{
for (j = 0; j < CList.list[i].len; j++)
if (CList.list[i].map[j].dict_id)
free(CList.list[i].map[j].dict_id);
free( CList.list[i].map );
free(CList.list[i].map);
}
free(CList.list);
free(CList.list);
}
memset(&CList,0,sizeof(CFGList));
memset(&CList, 0, sizeof(CFGList));
}
static int
comparecfg(const void *a, const void *b) {
return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
comparecfg(const void *a, const void *b)
{
return ((TSCfgInfo *) a)->id - ((TSCfgInfo *) b)->id;
}
TSCfgInfo *
findcfg(Oid id) {
findcfg(Oid id)
{
/* last used cfg */
if ( CList.last_cfg && CList.last_cfg->id==id )
if (CList.last_cfg && CList.last_cfg->id == id)
return CList.last_cfg;
/* already used cfg */
if ( CList.len != 0 ) {
TSCfgInfo key;
key.id=id;
if (CList.len != 0)
{
TSCfgInfo key;
key.id = id;
CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
if ( CList.last_cfg != NULL )
if (CList.last_cfg != NULL)
return CList.last_cfg;
}
/* last chance */
if ( CList.len==CList.reallen ) {
TSCfgInfo *tmp;
int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
if ( !tmp )
ts_error(ERROR,"No memory");
CList.reallen=reallen;
CList.list=tmp;
if (CList.len == CList.reallen)
{
TSCfgInfo *tmp;
int reallen = (CList.reallen) ? 2 * CList.reallen : 16;
tmp = (TSCfgInfo *) realloc(CList.list, sizeof(TSCfgInfo) * reallen);
if (!tmp)
ts_error(ERROR, "No memory");
CList.reallen = reallen;
CList.list = tmp;
}
CList.last_cfg=&(CList.list[CList.len]);
CList.last_cfg = &(CList.list[CList.len]);
init_cfg(id, CList.last_cfg);
CList.len++;
qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
return findcfg(id); /* qsort changed order!! */;
return findcfg(id); /* qsort changed order!! */ ;
}
Oid
name2id_cfg(text *name) {
Oid arg[1]={ TEXTOID };
bool isnull;
Datum pars[1]={ PointerGetDatum(name) };
int stat;
Oid id=findSNMap_t( &(CList.name2id_map), name );
if ( id )
name2id_cfg(text *name)
{
Oid arg[1] = {TEXTOID};
bool isnull;
Datum pars[1] = {PointerGetDatum(name)};
int stat;
Oid id = findSNMap_t(&(CList.name2id_map), name);
if (id)
return id;
SPI_connect();
if ( !plan_name2id ) {
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
if ( !plan_name2id )
if (!plan_name2id)
{
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_cfg where ts_name = $1", 1, arg));
if (!plan_name2id)
/* internal error */
elog(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if ( stat < 0 )
if (stat < 0)
/* internal error */
elog (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 ) {
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
if ( isnull )
elog(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
{
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
if (isnull)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("null id for tsearch config")));
} else
}
else
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("no tsearch config")));
SPI_finish();
addSNMap_t( &(CList.name2id_map), name, id );
addSNMap_t(&(CList.name2id_map), name, id);
return id;
}
void
parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
int type, lenlemm, i;
char *lemm=NULL;
void
parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
{
int type,
lenlemm,
i;
char *lemm = NULL;
WParserInfo *prsobj = findprs(cfg->prs_id);
prsobj->prs=(void*)DatumGetPointer(
FunctionCall2(
&(prsobj->start_info),
PointerGetDatum(buf),
Int32GetDatum(buflen)
)
);
prsobj->prs = (void *) DatumGetPointer(
FunctionCall2(
&(prsobj->start_info),
PointerGetDatum(buf),
Int32GetDatum(buflen)
)
);
while( ( type=DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm))) ) != 0 ) {
while ((type = DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)))) != 0)
{
if ( lenlemm >= MAXSTRLEN )
if (lenlemm >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long")));
if ( type >= cfg->len ) /* skip this type of lexem */
continue;
if (type >= cfg->len) /* skip this type of lexem */
continue;
for(i=0;i<cfg->map[type].len;i++) {
DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
char **norms, **ptr;
norms = ptr = (char**)DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if ( !norms ) /* dictionary doesn't know this lexem */
for (i = 0; i < cfg->map[type].len; i++)
{
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
char **norms,
**ptr;
norms = ptr = (char **) DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if (!norms) /* dictionary doesn't know this lexem */
continue;
prs->pos++; /*set pos*/
prs->pos++; /* set pos */
while( *ptr ) {
if (prs->curwords == prs->lenwords) {
while (*ptr)
{
if (prs->curwords == prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
}
@ -292,191 +328,220 @@ parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
prs->curwords++;
}
pfree(norms);
break; /* lexem already normalized or is stop word*/
break; /* lexem already normalized or is stop
* word */
}
}
FunctionCall1(
&(prsobj->end_info),
PointerGetDatum(prsobj->prs)
);
&(prsobj->end_info),
PointerGetDatum(prsobj->prs)
);
}
static void
hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
while (prs->curwords >= prs->lenwords) {
hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type)
{
while (prs->curwords >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
}
memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) );
prs->words[prs->curwords].type = (uint8)type;
prs->words[prs->curwords].len = buflen;
memset(&(prs->words[prs->curwords]), 0, sizeof(HLWORD));
prs->words[prs->curwords].type = (uint8) type;
prs->words[prs->curwords].len = buflen;
prs->words[prs->curwords].word = palloc(buflen);
memcpy(prs->words[prs->curwords].word, buf, buflen);
prs->curwords++;
prs->curwords++;
}
static void
hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
int i;
ITEM *item=GETQUERY(query);
HLWORD *word=&( prs->words[prs->curwords-1] );
hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
{
int i;
ITEM *item = GETQUERY(query);
HLWORD *word = &(prs->words[prs->curwords - 1]);
while (prs->curwords + query->size >= prs->lenwords) {
while (prs->curwords + query->size >= prs->lenwords)
{
prs->lenwords *= 2;
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
}
for(i=0; i<query->size; i++) {
if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
if ( word->item ) {
memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
prs->words[prs->curwords].item=item;
prs->words[prs->curwords].repeated=1;
for (i = 0; i < query->size; i++)
{
if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
{
if (word->item)
{
memcpy(&(prs->words[prs->curwords]), word, sizeof(HLWORD));
prs->words[prs->curwords].item = item;
prs->words[prs->curwords].repeated = 1;
prs->curwords++;
} else
word->item=item;
}
else
word->item = item;
}
item++;
}
}
void
hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
int type, lenlemm, i;
char *lemm=NULL;
void
hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen)
{
int type,
lenlemm,
i;
char *lemm = NULL;
WParserInfo *prsobj = findprs(cfg->prs_id);
prsobj->prs=(void*)DatumGetPointer(
FunctionCall2(
&(prsobj->start_info),
PointerGetDatum(buf),
Int32GetDatum(buflen)
)
);
prsobj->prs = (void *) DatumGetPointer(
FunctionCall2(
&(prsobj->start_info),
PointerGetDatum(buf),
Int32GetDatum(buflen)
)
);
while( ( type=DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm))) ) != 0 ) {
while ((type = DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm)))) != 0)
{
if ( lenlemm >= MAXSTRLEN )
if (lenlemm >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long")));
hladdword(prs,lemm,lenlemm,type);
hladdword(prs, lemm, lenlemm, type);
if ( type >= cfg->len )
continue;
if (type >= cfg->len)
continue;
for(i=0;i<cfg->map[type].len;i++) {
DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
char **norms, **ptr;
norms = ptr = (char**)DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if ( !norms ) /* dictionary doesn't know this lexem */
for (i = 0; i < cfg->map[type].len; i++)
{
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
char **norms,
**ptr;
norms = ptr = (char **) DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if (!norms) /* dictionary doesn't know this lexem */
continue;
while( *ptr ) {
hlfinditem(prs,query,*ptr,strlen(*ptr));
while (*ptr)
{
hlfinditem(prs, query, *ptr, strlen(*ptr));
pfree(*ptr);
ptr++;
}
pfree(norms);
break; /* lexem already normalized or is stop word*/
break; /* lexem already normalized or is stop
* word */
}
}
FunctionCall1(
&(prsobj->end_info),
PointerGetDatum(prsobj->prs)
);
&(prsobj->end_info),
PointerGetDatum(prsobj->prs)
);
}
text*
genhl(HLPRSTEXT * prs) {
text *out;
int len=128;
char *ptr;
HLWORD *wrd=prs->words;
text *
genhl(HLPRSTEXT * prs)
{
text *out;
int len = 128;
char *ptr;
HLWORD *wrd = prs->words;
out = (text*)palloc( len );
ptr=((char*)out) + VARHDRSZ;
out = (text *) palloc(len);
ptr = ((char *) out) + VARHDRSZ;
while( wrd - prs->words < prs->curwords ) {
while ( wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
int dist = ptr - ((char*)out);
len*= 2;
while (wrd - prs->words < prs->curwords)
{
while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len)
{
int dist = ptr - ((char *) out);
len *= 2;
out = (text *) repalloc(out, len);
ptr=((char*)out) + dist;
ptr = ((char *) out) + dist;
}
if ( wrd->in && !wrd->skip && !wrd->repeated ) {
if ( wrd->replace ) {
*ptr=' ';
if (wrd->in && !wrd->skip && !wrd->repeated)
{
if (wrd->replace)
{
*ptr = ' ';
ptr++;
} else {
if (wrd->selected) {
memcpy(ptr,prs->startsel,prs->startsellen);
ptr+=prs->startsellen;
}
else
{
if (wrd->selected)
{
memcpy(ptr, prs->startsel, prs->startsellen);
ptr += prs->startsellen;
}
memcpy(ptr,wrd->word,wrd->len);
ptr+=wrd->len;
if (wrd->selected) {
memcpy(ptr,prs->stopsel,prs->stopsellen);
ptr+=prs->stopsellen;
memcpy(ptr, wrd->word, wrd->len);
ptr += wrd->len;
if (wrd->selected)
{
memcpy(ptr, prs->stopsel, prs->stopsellen);
ptr += prs->stopsellen;
}
}
}
if ( !wrd->repeated )
if (!wrd->repeated)
pfree(wrd->word);
wrd++;
}
VARATT_SIZEP(out)=ptr - ((char*)out);
return out;
VARATT_SIZEP(out) = ptr - ((char *) out);
return out;
}
int
get_currcfg(void) {
Oid arg[1]={ TEXTOID };
int
get_currcfg(void)
{
Oid arg[1] = {TEXTOID};
const char *curlocale;
Datum pars[1];
bool isnull;
int stat;
Datum pars[1];
bool isnull;
int stat;
if ( current_cfg_id > 0 )
if (current_cfg_id > 0)
return current_cfg_id;
SPI_connect();
if ( !plan_getcfg_bylocale ) {
plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
if ( !plan_getcfg_bylocale )
if (!plan_getcfg_bylocale)
{
plan_getcfg_bylocale = SPI_saveplan(SPI_prepare("select oid from pg_ts_cfg where locale = $1 ", 1, arg));
if (!plan_getcfg_bylocale)
/* internal error */
elog(ERROR, "SPI_prepare() failed");
}
curlocale = setlocale(LC_CTYPE, NULL);
pars[0] = PointerGetDatum( char2text((char*)curlocale) );
pars[0] = PointerGetDatum(char2text((char *) curlocale));
stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
if ( stat < 0 )
if (stat < 0)
/* internal error */
elog (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 )
current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
else
elog(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
current_cfg_id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
else
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not find tsearch config by locale")));
@ -487,39 +552,43 @@ get_currcfg(void) {
}
PG_FUNCTION_INFO_V1(set_curcfg);
Datum set_curcfg(PG_FUNCTION_ARGS);
Datum set_curcfg(PG_FUNCTION_ARGS);
Datum
set_curcfg(PG_FUNCTION_ARGS) {
findcfg(PG_GETARG_OID(0));
current_cfg_id=PG_GETARG_OID(0);
PG_RETURN_VOID();
set_curcfg(PG_FUNCTION_ARGS)
{
findcfg(PG_GETARG_OID(0));
current_cfg_id = PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curcfg_byname);
Datum set_curcfg_byname(PG_FUNCTION_ARGS);
Datum set_curcfg_byname(PG_FUNCTION_ARGS);
Datum
set_curcfg_byname(PG_FUNCTION_ARGS) {
text *name=PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curcfg,
ObjectIdGetDatum( name2id_cfg(name) )
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
}
set_curcfg_byname(PG_FUNCTION_ARGS)
{
text *name = PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curcfg,
ObjectIdGetDatum(name2id_cfg(name))
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(show_curcfg);
Datum show_curcfg(PG_FUNCTION_ARGS);
Datum show_curcfg(PG_FUNCTION_ARGS);
Datum
show_curcfg(PG_FUNCTION_ARGS) {
PG_RETURN_OID( get_currcfg() );
show_curcfg(PG_FUNCTION_ARGS)
{
PG_RETURN_OID(get_currcfg());
}
PG_FUNCTION_INFO_V1(reset_tsearch);
Datum reset_tsearch(PG_FUNCTION_ARGS);
Datum reset_tsearch(PG_FUNCTION_ARGS);
Datum
reset_tsearch(PG_FUNCTION_ARGS) {
ts_error(NOTICE,"TSearch cache cleaned");
PG_RETURN_VOID();
reset_tsearch(PG_FUNCTION_ARGS)
{
ts_error(NOTICE, "TSearch cache cleaned");
PG_RETURN_VOID();
}

View File

@ -3,66 +3,73 @@
#include "postgres.h"
#include "query.h"
typedef struct {
int len;
Datum *dict_id;
} ListDictionary;
typedef struct
{
int len;
Datum *dict_id;
} ListDictionary;
typedef struct {
Oid id;
Oid prs_id;
int len;
ListDictionary *map;
typedef struct
{
Oid id;
Oid prs_id;
int len;
ListDictionary *map;
} TSCfgInfo;
Oid name2id_cfg(text *name);
TSCfgInfo * findcfg(Oid id);
void init_cfg(Oid id, TSCfgInfo *cfg);
void reset_cfg(void);
Oid name2id_cfg(text *name);
TSCfgInfo *findcfg(Oid id);
void init_cfg(Oid id, TSCfgInfo * cfg);
void reset_cfg(void);
typedef struct {
uint16 len;
union {
typedef struct
{
uint16 len;
union
{
uint16 pos;
uint16 *apos;
} pos;
char *word;
uint32 alen;
} WORD;
typedef struct {
WORD *words;
int4 lenwords;
int4 curwords;
uint16 *apos;
} pos;
char *word;
uint32 alen;
} WORD;
typedef struct
{
WORD *words;
int4 lenwords;
int4 curwords;
int4 pos;
} PRSTEXT;
} PRSTEXT;
typedef struct {
uint16 len;
uint8 selected:1,
in:1,
skip:1,
replace:1,
repeated:1;
uint8 type;
char *word;
ITEM *item;
} HLWORD;
typedef struct {
HLWORD *words;
int4 lenwords;
int4 curwords;
char *startsel;
char *stopsel;
int2 startsellen;
int2 stopsellen;
} HLPRSTEXT;
typedef struct
{
uint16 len;
uint8 selected:1,
in:1,
skip:1,
replace:1,
repeated:1;
uint8 type;
char *word;
ITEM *item;
} HLWORD;
void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
text* genhl(HLPRSTEXT * prs);
typedef struct
{
HLWORD *words;
int4 lenwords;
int4 curwords;
char *startsel;
char *stopsel;
int2 startsellen;
int2 stopsellen;
} HLPRSTEXT;
void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
int get_currcfg(void);
void hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 buflen);
text *genhl(HLPRSTEXT * prs);
void parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen);
int get_currcfg(void);
#endif

View File

@ -10,108 +10,128 @@
#include "common.h"
PG_FUNCTION_INFO_V1(tsstat_in);
Datum tsstat_in(PG_FUNCTION_ARGS);
Datum
tsstat_in(PG_FUNCTION_ARGS) {
tsstat *stat=palloc(STATHDRSIZE);
stat->len=STATHDRSIZE;
stat->size=0;
Datum tsstat_in(PG_FUNCTION_ARGS);
Datum
tsstat_in(PG_FUNCTION_ARGS)
{
tsstat *stat = palloc(STATHDRSIZE);
stat->len = STATHDRSIZE;
stat->size = 0;
PG_RETURN_POINTER(stat);
}
PG_FUNCTION_INFO_V1(tsstat_out);
Datum tsstat_out(PG_FUNCTION_ARGS);
Datum
tsstat_out(PG_FUNCTION_ARGS) {
Datum tsstat_out(PG_FUNCTION_ARGS);
Datum
tsstat_out(PG_FUNCTION_ARGS)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("tsstat_out not implemented")));
PG_RETURN_NULL();
}
static WordEntry**
SEI_realloc( WordEntry** in, uint32 *len ) {
if ( *len==0 || in==NULL ) {
*len=8;
in=palloc( sizeof(WordEntry*)* (*len) );
} else {
static WordEntry **
SEI_realloc(WordEntry ** in, uint32 *len)
{
if (*len == 0 || in == NULL)
{
*len = 8;
in = palloc(sizeof(WordEntry *) * (*len));
}
else
{
*len *= 2;
in=repalloc( in, sizeof(WordEntry*)* (*len) );
in = repalloc(in, sizeof(WordEntry *) * (*len));
}
return in;
}
static int
compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
if ( a->len == b->len )
compareStatWord(StatEntry * a, WordEntry * b, tsstat * stat, tsvector * txt)
{
if (a->len == b->len)
return strncmp(
STATSTRPTR(stat) + a->pos,
STRPTR(txt) + b->pos,
a->len
STATSTRPTR(stat) + a->pos,
STRPTR(txt) + b->pos,
a->len
);
return ( a->len > b->len ) ? 1 : -1;
return (a->len > b->len) ? 1 : -1;
}
static tsstat*
formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
tsstat *newstat;
uint32 totallen, nentry;
uint32 slen=0;
WordEntry **ptr=entry;
char *curptr;
StatEntry *sptr,*nptr;
static tsstat *
formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
{
tsstat *newstat;
uint32 totallen,
nentry;
uint32 slen = 0;
WordEntry **ptr = entry;
char *curptr;
StatEntry *sptr,
*nptr;
while(ptr-entry<len) {
while (ptr - entry < len)
{
slen += (*ptr)->len;
ptr++;
}
nentry=stat->size + len;
slen+=STATSTRSIZE(stat);
totallen=CALCSTATSIZE(nentry,slen);
newstat=palloc(totallen);
newstat->len=totallen;
newstat->size=nentry;
nentry = stat->size + len;
slen += STATSTRSIZE(stat);
totallen = CALCSTATSIZE(nentry, slen);
newstat = palloc(totallen);
newstat->len = totallen;
newstat->size = nentry;
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
ptr=entry;
sptr=STATPTR(stat);
nptr=STATPTR(newstat);
ptr = entry;
sptr = STATPTR(stat);
nptr = STATPTR(newstat);
if ( len == 1 ) {
StatEntry *StopLow = STATPTR(stat);
StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
if (len == 1)
{
StatEntry *StopLow = STATPTR(stat);
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
while (StopLow < StopHigh) {
sptr=StopLow + (StopHigh - StopLow) / 2;
if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
while (StopLow < StopHigh)
{
sptr = StopLow + (StopHigh - StopLow) / 2;
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
StopLow = sptr + 1;
else
StopHigh = sptr;
StopHigh = sptr;
}
nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
nptr->nentry=POSDATALEN(txt,*ptr);
if ( nptr->nentry==0 )
nptr->nentry=1;
nptr->ndoc=1;
nptr->len=(*ptr)->len;
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
nptr->nentry = POSDATALEN(txt, *ptr);
if (nptr->nentry == 0)
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
nptr->pos = curptr - STATSTRPTR(newstat);
memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
} else {
while( sptr-STATPTR(stat) < stat->size && ptr-entry<len) {
if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
}
else
{
while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
{
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
{
memcpy(nptr, sptr, sizeof(StatEntry));
sptr++;
} else {
nptr->nentry=POSDATALEN(txt,*ptr);
if ( nptr->nentry==0 )
nptr->nentry=1;
nptr->ndoc=1;
nptr->len=(*ptr)->len;
}
else
{
nptr->nentry = POSDATALEN(txt, *ptr);
if (nptr->nentry == 0)
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
nptr->pos = curptr - STATSTRPTR(newstat);
curptr += nptr->len;
@ -120,138 +140,168 @@ formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
nptr++;
}
memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) );
while(ptr-entry<len) {
nptr->nentry=POSDATALEN(txt,*ptr);
if ( nptr->nentry==0 )
nptr->nentry=1;
nptr->ndoc=1;
nptr->len=(*ptr)->len;
memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
while (ptr - entry < len)
{
nptr->nentry = POSDATALEN(txt, *ptr);
if (nptr->nentry == 0)
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
nptr->pos = curptr - STATSTRPTR(newstat);
curptr += nptr->len;
ptr++; nptr++;
ptr++;
nptr++;
}
}
return newstat;
}
}
PG_FUNCTION_INFO_V1(ts_accum);
Datum ts_accum(PG_FUNCTION_ARGS);
Datum
ts_accum(PG_FUNCTION_ARGS) {
tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
WordEntry **newentry=NULL;
uint32 len=0, cur=0;
StatEntry *sptr;
WordEntry *wptr;
Datum ts_accum(PG_FUNCTION_ARGS);
Datum
ts_accum(PG_FUNCTION_ARGS)
{
tsstat *newstat,
*stat = (tsstat *) PG_GETARG_POINTER(0);
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
WordEntry **newentry = NULL;
uint32 len = 0,
cur = 0;
StatEntry *sptr;
WordEntry *wptr;
if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */
stat=palloc(STATHDRSIZE);
stat->len=STATHDRSIZE;
stat->size=0;
if (stat == NULL || PG_ARGISNULL(0))
{ /* Init in first */
stat = palloc(STATHDRSIZE);
stat->len = STATHDRSIZE;
stat->size = 0;
}
/* simple check of correctness */
if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
PG_FREE_IF_COPY(txt,1);
if (txt == NULL || PG_ARGISNULL(1) || txt->size == 0)
{
PG_FREE_IF_COPY(txt, 1);
PG_RETURN_POINTER(stat);
}
sptr=STATPTR(stat);
wptr=ARRPTR(txt);
sptr = STATPTR(stat);
wptr = ARRPTR(txt);
if ( stat->size < 100*txt->size ) { /* merge */
while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
int cmp = compareStatWord(sptr,wptr,stat,txt);
if ( cmp<0 ) {
if (stat->size < 100 * txt->size)
{ /* merge */
while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
{
int cmp = compareStatWord(sptr, wptr, stat, txt);
if (cmp < 0)
sptr++;
} else if ( cmp==0 ) {
int n=POSDATALEN(txt,wptr);
if (n==0) n=1;
else if (cmp == 0)
{
int n = POSDATALEN(txt, wptr);
if (n == 0)
n = 1;
sptr->ndoc++;
sptr->nentry +=n ;
sptr++; wptr++;
} else {
if ( cur==len )
newentry=SEI_realloc(newentry, &len);
newentry[cur]=wptr;
wptr++; cur++;
sptr->nentry += n;
sptr++;
wptr++;
}
else
{
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
wptr++;
cur++;
}
}
while( wptr-ARRPTR(txt) < txt->size ) {
if ( cur==len )
newentry=SEI_realloc(newentry, &len);
newentry[cur]=wptr;
wptr++; cur++;
while (wptr - ARRPTR(txt) < txt->size)
{
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
wptr++;
cur++;
}
} else { /* search */
while( wptr-ARRPTR(txt) < txt->size ) {
StatEntry *StopLow = STATPTR(stat);
StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
int cmp;
}
else
{ /* search */
while (wptr - ARRPTR(txt) < txt->size)
{
StatEntry *StopLow = STATPTR(stat);
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
int cmp;
while (StopLow < StopHigh) {
sptr=StopLow + (StopHigh - StopLow) / 2;
cmp = compareStatWord(sptr,wptr,stat,txt);
if (cmp==0) {
int n=POSDATALEN(txt,wptr);
if (n==0) n=1;
while (StopLow < StopHigh)
{
sptr = StopLow + (StopHigh - StopLow) / 2;
cmp = compareStatWord(sptr, wptr, stat, txt);
if (cmp == 0)
{
int n = POSDATALEN(txt, wptr);
if (n == 0)
n = 1;
sptr->ndoc++;
sptr->nentry +=n ;
sptr->nentry += n;
break;
} else if ( cmp < 0 )
}
else if (cmp < 0)
StopLow = sptr + 1;
else
StopHigh = sptr;
StopHigh = sptr;
}
if ( StopLow >= StopHigh ) { /* not found */
if ( cur==len )
newentry=SEI_realloc(newentry, &len);
newentry[cur]=wptr;
if (StopLow >= StopHigh)
{ /* not found */
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
cur++;
}
wptr++;
}
}
}
if ( cur==0 ) { /* no new words */
PG_FREE_IF_COPY(txt,1);
if (cur == 0)
{ /* no new words */
PG_FREE_IF_COPY(txt, 1);
PG_RETURN_POINTER(stat);
}
newstat = formstat(stat, txt, newentry, cur);
pfree(newentry);
PG_FREE_IF_COPY(txt,1);
PG_FREE_IF_COPY(txt, 1);
/* pfree(stat); */
PG_RETURN_POINTER(newstat);
}
typedef struct {
uint32 cur;
tsvector *stat;
} StatStorage;
typedef struct
{
uint32 cur;
tsvector *stat;
} StatStorage;
static void
ts_setup_firstcall(FuncCallContext *funcctx, tsstat *stat) {
TupleDesc tupdesc;
MemoryContext oldcontext;
StatStorage *st;
ts_setup_firstcall(FuncCallContext *funcctx, tsstat * stat)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
StatStorage *st;
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st=palloc( sizeof(StatStorage) );
st->cur=0;
st->stat=palloc( stat->len );
st = palloc(sizeof(StatStorage));
st->cur = 0;
st->stat = palloc(stat->len);
memcpy(st->stat, stat, stat->len);
funcctx->user_fctx = (void*)st;
funcctx->user_fctx = (void *) st;
tupdesc = RelationNameGetTupleDesc("statinfo");
funcctx->slot = TupleDescGetSlot(tupdesc);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
@ -260,162 +310,175 @@ ts_setup_firstcall(FuncCallContext *funcctx, tsstat *stat) {
static Datum
ts_process_call(FuncCallContext *funcctx) {
StatStorage *st;
st=(StatStorage*)funcctx->user_fctx;
ts_process_call(FuncCallContext *funcctx)
{
StatStorage *st;
if ( st->cur < st->stat->size ) {
Datum result;
char* values[3];
char ndoc[16];
char nentry[16];
StatEntry *entry=STATPTR(st->stat) + st->cur;
HeapTuple tuple;
st = (StatStorage *) funcctx->user_fctx;
values[1]=ndoc;
sprintf(ndoc,"%d",entry->ndoc);
values[2]=nentry;
sprintf(nentry,"%d",entry->nentry);
values[0]=palloc( entry->len+1 );
memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
(values[0])[entry->len]='\0';
if (st->cur < st->stat->size)
{
Datum result;
char *values[3];
char ndoc[16];
char nentry[16];
StatEntry *entry = STATPTR(st->stat) + st->cur;
HeapTuple tuple;
values[1] = ndoc;
sprintf(ndoc, "%d", entry->ndoc);
values[2] = nentry;
sprintf(nentry, "%d", entry->nentry);
values[0] = palloc(entry->len + 1);
memcpy(values[0], STATSTRPTR(st->stat) + entry->pos, entry->len);
(values[0])[entry->len] = '\0';
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = TupleGetDatum(funcctx->slot, tuple);
pfree(values[0]);
st->cur++;
return result;
} else {
return result;
}
else
{
pfree(st->stat);
pfree(st);
}
return (Datum)0;
return (Datum) 0;
}
PG_FUNCTION_INFO_V1(ts_accum_finish);
Datum ts_accum_finish(PG_FUNCTION_ARGS);
Datum
ts_accum_finish(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
Datum ts_accum_finish(PG_FUNCTION_ARGS);
Datum
ts_accum_finish(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
ts_setup_firstcall(funcctx, (tsstat *) PG_GETARG_POINTER(0));
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=ts_process_call(funcctx)) != (Datum)0 )
if ((result = ts_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
static Oid tiOid=InvalidOid;
static void
get_ti_Oid(void) {
int ret;
bool isnull;
static Oid tiOid = InvalidOid;
static void
get_ti_Oid(void)
{
int ret;
bool isnull;
if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )
if ((ret = SPI_exec("select oid from pg_type where typname='tsvector'", 1)) < 0)
/* internal error */
elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
if ( SPI_processed<0 )
if (SPI_processed < 0)
/* internal error */
elog(ERROR, "There is no tsvector type");
tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
if ( tiOid==InvalidOid )
tiOid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
if (tiOid == InvalidOid)
/* internal error */
elog(ERROR, "tsvector type has InvalidOid");
}
static tsstat*
ts_stat_sql(text *txt) {
char *query=text2char(txt);
int i;
tsstat *newstat,*stat;
bool isnull;
Portal portal;
void *plan;
static tsstat *
ts_stat_sql(text *txt)
{
char *query = text2char(txt);
int i;
tsstat *newstat,
*stat;
bool isnull;
Portal portal;
void *plan;
if ( tiOid==InvalidOid )
if (tiOid == InvalidOid)
get_ti_Oid();
if ( (plan = SPI_prepare(query,0,NULL))==NULL )
if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
/* internal error */
elog(ERROR, "SPI_prepare('%s') returns NULL",query);
elog(ERROR, "SPI_prepare('%s') returns NULL", query);
if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL)
/* internal error */
elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
elog(ERROR, "SPI_cursor_open('%s') returns NULL", query);
SPI_cursor_fetch(portal, true, 100);
if ( SPI_tuptable->tupdesc->natts != 1 )
if (SPI_tuptable->tupdesc->natts != 1)
/* internal error */
elog(ERROR, "number of fields doesn't equal to 1");
if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
if (SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid)
/* internal error */
elog(ERROR, "column isn't of tsvector type");
stat=palloc(STATHDRSIZE);
stat->len=STATHDRSIZE;
stat->size=0;
stat = palloc(STATHDRSIZE);
stat->len = STATHDRSIZE;
stat->size = 0;
while(SPI_processed>0) {
for(i=0;i<SPI_processed;i++) {
Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
while (SPI_processed > 0)
{
for (i = 0; i < SPI_processed; i++)
{
Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
if ( !isnull ) {
newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
ts_accum,
PointerGetDatum(stat),
data
));
if ( stat!=newstat && stat )
if (!isnull)
{
newstat = (tsstat *) DatumGetPointer(DirectFunctionCall2(
ts_accum,
PointerGetDatum(stat),
data
));
if (stat != newstat && stat)
pfree(stat);
stat=newstat;
stat = newstat;
}
}
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_fetch(portal, true, 100);
}
SPI_cursor_fetch(portal, true, 100);
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_close(portal);
SPI_freeplan(plan);
pfree(query);
return stat;
return stat;
}
PG_FUNCTION_INFO_V1(ts_stat);
Datum ts_stat(PG_FUNCTION_ARGS);
Datum
ts_stat(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
Datum ts_stat(PG_FUNCTION_ARGS);
Datum
ts_stat(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
tsstat *stat;
text *txt = PG_GETARG_TEXT_P(0);
if (SRF_IS_FIRSTCALL()) {
tsstat *stat;
text *txt=PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
SPI_connect();
stat = ts_stat_sql(txt);
PG_FREE_IF_COPY(txt,0);
ts_setup_firstcall(funcctx, stat );
PG_FREE_IF_COPY(txt, 0);
ts_setup_firstcall(funcctx, stat);
SPI_finish();
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=ts_process_call(funcctx)) != (Datum)0 )
if ((result = ts_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}

View File

@ -8,14 +8,16 @@
#include "utils/builtins.h"
#include "storage/bufpage.h"
typedef struct {
uint32 len;
uint32 pos;
uint32 ndoc;
uint32 nentry;
typedef struct
{
uint32 len;
uint32 pos;
uint32 ndoc;
uint32 nentry;
} StatEntry;
typedef struct {
typedef struct
{
int4 len;
int4 size;
char data[1];

View File

@ -31,8 +31,10 @@ Datum tsvector_out(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector);
Datum to_tsvector(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector_current);
Datum to_tsvector_current(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector_name);
Datum to_tsvector_name(PG_FUNCTION_ARGS);
@ -45,32 +47,38 @@ Datum tsvector_length(PG_FUNCTION_ARGS);
/*
* in/out text index type
*/
static int
comparePos(const void *a, const void *b) {
if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
static int
comparePos(const void *a, const void *b)
{
if (((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos)
return 1;
return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
return (((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos) ? 1 : -1;
}
static int
uniquePos(WordEntryPos *a, int4 l) {
WordEntryPos *ptr, *res;
uniquePos(WordEntryPos * a, int4 l)
{
WordEntryPos *ptr,
*res;
res=a;
if (l==1)
res = a;
if (l == 1)
return l;
qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
ptr = a + 1;
while (ptr - a < l) {
if ( ptr->pos != res->pos ) {
while (ptr - a < l)
{
if (ptr->pos != res->pos)
{
res++;
res->pos = ptr->pos;
res->weight = ptr->weight;
if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
if (res - a >= MAXNUMPOS - 1 || res->pos == MAXENTRYPOS - 1)
break;
} else if ( ptr->weight > res->weight )
}
else if (ptr->weight > res->weight)
res->weight = ptr->weight;
ptr++;
}
@ -81,27 +89,29 @@ static char *BufferStr;
static int
compareentry(const void *a, const void *b)
{
if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
{
return strncmp(
&BufferStr[((WordEntryIN *) a)->entry.pos],
&BufferStr[((WordEntryIN *) b)->entry.pos],
((WordEntryIN *) a)->entry.len);
}
return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
}
static int
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{
WordEntryIN *ptr,
WordEntryIN *ptr,
*res;
res = a;
if (l == 1) {
if ( a->entry.haspos ) {
*(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
if (l == 1)
{
if (a->entry.haspos)
{
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
}
return l;
}
@ -115,31 +125,39 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
if (!(ptr->entry.len == res->entry.len &&
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
{
if ( res->entry.haspos ) {
*(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
*outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
res++;
memcpy(res,ptr,sizeof(WordEntryIN));
} else if ( ptr->entry.haspos ){
if ( res->entry.haspos ) {
int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]),
&(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
*(uint16*)(res->pos) += *(uint16*)(ptr->pos);
pfree( ptr->pos );
} else {
res->entry.haspos=1;
memcpy(res, ptr, sizeof(WordEntryIN));
}
else if (ptr->entry.haspos)
{
if (res->entry.haspos)
{
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
pfree(ptr->pos);
}
else
{
res->entry.haspos = 1;
res->pos = ptr->pos;
}
}
ptr++;
}
if ( res->entry.haspos ) {
*(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
*outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
@ -150,7 +168,7 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
@ -172,7 +190,7 @@ gettoken_tsvector(TI_IN_STATE * state)
state->curpos = state->word;
state->state = WAITWORD;
state->alen=0;
state->alen = 0;
while (1)
{
@ -228,14 +246,16 @@ gettoken_tsvector(TI_IN_STATE * state)
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
*(state->curpos) = '\0';
return 1;
} else if ( *(state->prsbuf) == ':' ) {
return 1;
}
else if (*(state->prsbuf) == ':')
{
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
*(state->curpos) = '\0';
if ( state->oprisdelim )
if (state->oprisdelim)
return 1;
else
state->state = INPOSINFO;
@ -257,10 +277,12 @@ gettoken_tsvector(TI_IN_STATE * state)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
if ( state->oprisdelim ) {
if (state->oprisdelim)
{
state->prsbuf++;
return 1;
} else
}
else
state->state = WAITPOSINFO;
}
else if (*(state->prsbuf) == '\\')
@ -278,67 +300,87 @@ gettoken_tsvector(TI_IN_STATE * state)
*(state->curpos) = *(state->prsbuf);
state->curpos++;
}
} else if (state->state == WAITPOSINFO) {
if ( *(state->prsbuf) == ':' )
state->state=INPOSINFO;
}
else if (state->state == WAITPOSINFO)
{
if (*(state->prsbuf) == ':')
state->state = INPOSINFO;
else
return 1;
} else if (state->state == INPOSINFO) {
if ( isdigit(*(state->prsbuf)) ) {
if ( state->alen==0 ) {
state->alen=4;
state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
*(uint16*)(state->pos)=0;
} else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
state->alen *= 2;
state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
}
else if (state->state == INPOSINFO)
{
if (isdigit(*(state->prsbuf)))
{
if (state->alen == 0)
{
state->alen = 4;
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
*(uint16 *) (state->pos) = 0;
}
( *(uint16*)(state->pos) )++;
state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
{
state->alen *= 2;
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
}
(*(uint16 *) (state->pos))++;
state->pos[*(uint16 *) (state->pos)].pos = LIMITPOS(atoi(state->prsbuf));
if (state->pos[*(uint16 *) (state->pos)].pos == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info")));
state->pos[ *(uint16*)(state->pos) ].weight = 0;
state->pos[*(uint16 *) (state->pos)].weight = 0;
state->state = WAITPOSDELIM;
} else
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
} else if (state->state == WAITPOSDELIM) {
if ( *(state->prsbuf) == ',' ) {
}
else if (state->state == WAITPOSDELIM)
{
if (*(state->prsbuf) == ',')
state->state = INPOSINFO;
} else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
if ( state->pos[ *(uint16*)(state->pos) ].weight )
else if (tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf) == '*')
{
if (state->pos[*(uint16 *) (state->pos)].weight)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
state->pos[ *(uint16*)(state->pos) ].weight = 3;
} else if ( tolower(*(state->prsbuf)) == 'b' ) {
if ( state->pos[ *(uint16*)(state->pos) ].weight )
state->pos[*(uint16 *) (state->pos)].weight = 3;
}
else if (tolower(*(state->prsbuf)) == 'b')
{
if (state->pos[*(uint16 *) (state->pos)].weight)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
state->pos[ *(uint16*)(state->pos) ].weight = 2;
} else if ( tolower(*(state->prsbuf)) == 'c' ) {
if ( state->pos[ *(uint16*)(state->pos) ].weight )
state->pos[*(uint16 *) (state->pos)].weight = 2;
}
else if (tolower(*(state->prsbuf)) == 'c')
{
if (state->pos[*(uint16 *) (state->pos)].weight)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
state->pos[ *(uint16*)(state->pos) ].weight = 1;
} else if ( tolower(*(state->prsbuf)) == 'd' ) {
if ( state->pos[ *(uint16*)(state->pos) ].weight )
state->pos[*(uint16 *) (state->pos)].weight = 1;
}
else if (tolower(*(state->prsbuf)) == 'd')
{
if (state->pos[*(uint16 *) (state->pos)].weight)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
state->pos[ *(uint16*)(state->pos) ].weight = 0;
} else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
state->pos[*(uint16 *) (state->pos)].weight = 0;
}
else if (isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0')
return 1;
} else if ( !isdigit(*(state->prsbuf)) )
else if (!isdigit(*(state->prsbuf)))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error")));
} else
}
else
/* internal error */
elog(ERROR, "internal error");
state->prsbuf++;
@ -352,11 +394,11 @@ tsvector_in(PG_FUNCTION_ARGS)
{
char *buf = PG_GETARG_CSTRING(0);
TI_IN_STATE state;
WordEntryIN *arr;
WordEntryIN *arr;
WordEntry *inarr;
int4 len = 0,
totallen = 64;
tsvector *in;
tsvector *in;
char *tmpbuf,
*cur;
int4 i,
@ -388,28 +430,30 @@ tsvector_in(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long")));
arr[len].entry.len= state.curpos - state.word;
arr[len].entry.len = state.curpos - state.word;
if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("too long value")));
arr[len].entry.pos=cur - tmpbuf;
arr[len].entry.pos = cur - tmpbuf;
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
cur += arr[len].entry.len;
if ( state.alen ) {
arr[len].entry.haspos=1;
if (state.alen)
{
arr[len].entry.haspos = 1;
arr[len].pos = state.pos;
} else
arr[len].entry.haspos=0;
}
else
arr[len].entry.haspos = 0;
len++;
}
pfree(state.word);
if ( len > 0 )
if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen);
totallen = CALCDATASIZE(len, buflen);
in = (tsvector *) palloc(totallen);
memset(in,0,totallen);
memset(in, 0, totallen);
in->len = totallen;
in->size = len;
cur = STRPTR(in);
@ -417,14 +461,15 @@ tsvector_in(PG_FUNCTION_ARGS)
for (i = 0; i < len; i++)
{
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
arr[i].entry.pos=cur - STRPTR(in);
arr[i].entry.pos = cur - STRPTR(in);
cur += SHORTALIGN(arr[i].entry.len);
if ( arr[i].entry.haspos ) {
memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
cur += (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
pfree( arr[i].pos );
if (arr[i].entry.haspos)
{
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
pfree(arr[i].pos);
}
memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
}
pfree(tmpbuf);
pfree(arr);
@ -434,7 +479,7 @@ tsvector_in(PG_FUNCTION_ARGS)
Datum
tsvector_length(PG_FUNCTION_ARGS)
{
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
int4 ret = in->size;
PG_FREE_IF_COPY(in, 0);
@ -444,26 +489,28 @@ tsvector_length(PG_FUNCTION_ARGS)
Datum
tsvector_out(PG_FUNCTION_ARGS)
{
tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
char *outbuf;
int4 i,
j,
lenbuf = 0, pp;
lenbuf = 0,
pp;
WordEntry *ptr = ARRPTR(out);
char *curin,
*curout;
lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
for (i = 0; i < out->size; i++) {
lenbuf += ptr[i].len*2 /*for escape */;
if ( ptr[i].haspos )
lenbuf += 7*POSDATALEN(out, &(ptr[i]));
}
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
for (i = 0; i < out->size; i++)
{
lenbuf += ptr[i].len * 2 /* for escape */ ;
if (ptr[i].haspos)
lenbuf += 7 * POSDATALEN(out, &(ptr[i]));
}
curout = outbuf = (char *) palloc(lenbuf);
for (i = 0; i < out->size; i++)
{
curin = STRPTR(out)+ptr->pos;
curin = STRPTR(out) + ptr->pos;
if (i != 0)
*curout++ = ' ';
*curout++ = '\'';
@ -481,27 +528,40 @@ tsvector_out(PG_FUNCTION_ARGS)
*curout++ = *curin++;
}
*curout++ = '\'';
if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
if ((pp = POSDATALEN(out, ptr)) != 0)
{
WordEntryPos *wptr;
*curout++ = ':';
wptr=POSDATAPTR(out,ptr);
while(pp) {
sprintf(curout,"%d",wptr->pos);
curout=strchr(curout,'\0');
switch( wptr->weight ) {
case 3: *curout++ = 'A'; break;
case 2: *curout++ = 'B'; break;
case 1: *curout++ = 'C'; break;
case 0:
default: break;
wptr = POSDATAPTR(out, ptr);
while (pp)
{
sprintf(curout, "%d", wptr->pos);
curout = strchr(curout, '\0');
switch (wptr->weight)
{
case 3:
*curout++ = 'A';
break;
case 2:
*curout++ = 'B';
break;
case 1:
*curout++ = 'C';
break;
case 0:
default:
break;
}
if ( pp>1 ) *curout++ = ',';
pp--; wptr++;
if (pp > 1)
*curout++ = ',';
pp--;
wptr++;
}
}
ptr++;
}
*curout='\0';
*curout = '\0';
outbuf[lenbuf - 1] = '\0';
PG_FREE_IF_COPY(out, 0);
PG_RETURN_POINTER(outbuf);
@ -510,13 +570,15 @@ tsvector_out(PG_FUNCTION_ARGS)
static int
compareWORD(const void *a, const void *b)
{
if (((WORD *) a)->len == ((WORD *) b)->len) {
int res = strncmp(
((WORD *) a)->word,
((WORD *) b)->word,
((WORD *) b)->len);
if ( res==0 )
return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
if (((WORD *) a)->len == ((WORD *) b)->len)
{
int res = strncmp(
((WORD *) a)->word,
((WORD *) b)->word,
((WORD *) b)->len);
if (res == 0)
return (((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos) ? 1 : -1;
return res;
}
return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
@ -527,14 +589,15 @@ uniqueWORD(WORD * a, int4 l)
{
WORD *ptr,
*res;
int tmppos;
int tmppos;
if (l == 1) {
tmppos=LIMITPOS(a->pos.pos);
a->alen=2;
a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
a->pos.apos[0]=1;
a->pos.apos[1]=tmppos;
if (l == 1)
{
tmppos = LIMITPOS(a->pos.pos);
a->alen = 2;
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
a->pos.apos[0] = 1;
a->pos.apos[1] = tmppos;
return l;
}
@ -542,11 +605,11 @@ uniqueWORD(WORD * a, int4 l)
ptr = a + 1;
qsort((void *) a, l, sizeof(WORD), compareWORD);
tmppos=LIMITPOS(a->pos.pos);
a->alen=2;
a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
a->pos.apos[0]=1;
a->pos.apos[1]=tmppos;
tmppos = LIMITPOS(a->pos.pos);
a->alen = 2;
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
a->pos.apos[0] = 1;
a->pos.apos[1] = tmppos;
while (ptr - a < l)
{
@ -556,20 +619,24 @@ uniqueWORD(WORD * a, int4 l)
res++;
res->len = ptr->len;
res->word = ptr->word;
tmppos=LIMITPOS(ptr->pos.pos);
res->alen=2;
res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
res->pos.apos[0]=1;
res->pos.apos[1]=tmppos;
} else {
tmppos = LIMITPOS(ptr->pos.pos);
res->alen = 2;
res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
res->pos.apos[0] = 1;
res->pos.apos[1] = tmppos;
}
else
{
pfree(ptr->word);
if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
if ( res->pos.apos[0]+1 >= res->alen ) {
res->alen*=2;
res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
{
if (res->pos.apos[0] + 1 >= res->alen)
{
res->alen *= 2;
res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
}
res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
res->pos.apos[0]++;
res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
res->pos.apos[0]++;
}
}
ptr++;
@ -584,25 +651,27 @@ uniqueWORD(WORD * a, int4 l)
static tsvector *
makevalue(PRSTEXT * prs)
{
int4 i,j,
int4 i,
j,
lenstr = 0,
totallen;
tsvector *in;
tsvector *in;
WordEntry *ptr;
char *str,
*cur;
prs->curwords = uniqueWORD(prs->words, prs->curwords);
for (i = 0; i < prs->curwords; i++) {
for (i = 0; i < prs->curwords; i++)
{
lenstr += SHORTALIGN(prs->words[i].len);
if ( prs->words[i].alen )
if (prs->words[i].alen)
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
}
totallen = CALCDATASIZE(prs->curwords, lenstr);
in = (tsvector *) palloc(totallen);
memset(in,0,totallen);
memset(in, 0, totallen);
in->len = totallen;
in->size = prs->curwords;
@ -615,24 +684,27 @@ makevalue(PRSTEXT * prs)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("value is too big")));
ptr->pos= cur - str;
ptr->pos = cur - str;
memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
pfree(prs->words[i].word);
cur += SHORTALIGN(prs->words[i].len);
if ( prs->words[i].alen ) {
if (prs->words[i].alen)
{
WordEntryPos *wptr;
ptr->haspos=1;
*(uint16*)cur = prs->words[i].pos.apos[0];
wptr=POSDATAPTR(in,ptr);
for(j=0;j<*(uint16*)cur;j++) {
wptr[j].weight=0;
wptr[j].pos=prs->words[i].pos.apos[j+1];
ptr->haspos = 1;
*(uint16 *) cur = prs->words[i].pos.apos[0];
wptr = POSDATAPTR(in, ptr);
for (j = 0; j < *(uint16 *) cur; j++)
{
wptr[j].weight = 0;
wptr[j].pos = prs->words[i].pos.apos[j + 1];
}
cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
pfree(prs->words[i].pos.apos);
} else
ptr->haspos=0;
}
else
ptr->haspos = 0;
ptr++;
}
pfree(prs->words);
@ -645,70 +717,78 @@ to_tsvector(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(1);
PRSTEXT prs;
tsvector *out = NULL;
TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0));
tsvector *out = NULL;
TSCfgInfo *cfg = findcfg(PG_GETARG_INT32(0));
prs.lenwords = 32;
prs.curwords = 0;
prs.pos = 0;
prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
PG_FREE_IF_COPY(in, 1);
if (prs.curwords)
out = makevalue(&prs);
else {
else
{
pfree(prs.words);
out = palloc(CALCDATASIZE(0,0));
out->len = CALCDATASIZE(0,0);
out = palloc(CALCDATASIZE(0, 0));
out->len = CALCDATASIZE(0, 0);
out->size = 0;
}
}
PG_RETURN_POINTER(out);
}
Datum
to_tsvector_name(PG_FUNCTION_ARGS) {
text *cfg=PG_GETARG_TEXT_P(0);
Datum res = DirectFunctionCall3(
to_tsvector,
Int32GetDatum( name2id_cfg( cfg ) ),
PG_GETARG_DATUM(1),
(Datum)0
to_tsvector_name(PG_FUNCTION_ARGS)
{
text *cfg = PG_GETARG_TEXT_P(0);
Datum res = DirectFunctionCall3(
to_tsvector,
Int32GetDatum(name2id_cfg(cfg)),
PG_GETARG_DATUM(1),
(Datum) 0
);
PG_FREE_IF_COPY(cfg,0);
PG_RETURN_DATUM(res);
PG_FREE_IF_COPY(cfg, 0);
PG_RETURN_DATUM(res);
}
Datum
to_tsvector_current(PG_FUNCTION_ARGS) {
Datum res = DirectFunctionCall3(
to_tsvector,
Int32GetDatum( get_currcfg() ),
PG_GETARG_DATUM(0),
(Datum)0
to_tsvector_current(PG_FUNCTION_ARGS)
{
Datum res = DirectFunctionCall3(
to_tsvector,
Int32GetDatum(get_currcfg()),
PG_GETARG_DATUM(0),
(Datum) 0
);
PG_RETURN_DATUM(res);
PG_RETURN_DATUM(res);
}
static Oid
findFunc(char *fname) {
FuncCandidateList clist,ptr;
Oid funcid = InvalidOid;
List *names=makeList1(makeString(fname));
findFunc(char *fname)
{
FuncCandidateList clist,
ptr;
Oid funcid = InvalidOid;
List *names = makeList1(makeString(fname));
ptr = clist = FuncnameGetCandidates(names, 1);
freeList(names);
if ( !ptr )
if (!ptr)
return funcid;
while(ptr) {
if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
funcid=ptr->oid;
clist=ptr->next;
while (ptr)
{
if (ptr->args[0] == TEXTOID && funcid == InvalidOid)
funcid = ptr->oid;
clist = ptr->next;
pfree(ptr);
ptr=clist;
ptr = clist;
}
return funcid;
@ -724,12 +804,12 @@ tsearch2(PG_FUNCTION_ARGS)
Trigger *trigger;
Relation rel;
HeapTuple rettuple = NULL;
TSCfgInfo *cfg=findcfg(get_currcfg());
TSCfgInfo *cfg = findcfg(get_currcfg());
int numidxattr,
i;
PRSTEXT prs;
Datum datum = (Datum) 0;
Oid funcoid = InvalidOid;
Oid funcoid = InvalidOid;
if (!CALLED_AS_TRIGGER(fcinfo))
/* internal error */
@ -782,8 +862,8 @@ tsearch2(PG_FUNCTION_ARGS)
numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
if (numattr == SPI_ERROR_NOATTRIBUTE)
{
funcoid=findFunc(trigger->tgargs[i]);
if ( funcoid==InvalidOid )
funcoid = findFunc(trigger->tgargs[i]);
if (funcoid == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("could not find function or field \"%s\"",
@ -805,19 +885,22 @@ tsearch2(PG_FUNCTION_ARGS)
if (isnull)
continue;
if ( funcoid!=InvalidOid ) {
text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
funcoid,
PointerGetDatum(txt_toasted)
));
if (funcoid != InvalidOid)
{
text *txttmp = (text *) DatumGetPointer(OidFunctionCall1(
funcoid,
PointerGetDatum(txt_toasted)
));
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
if ( txt == txttmp )
if (txt == txttmp)
txt_toasted = PointerGetDatum(txt);
} else
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
}
else
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
if (txt != (text*)DatumGetPointer(txt_toasted) )
if (txt != (text *) DatumGetPointer(txt_toasted))
pfree(txt);
}
@ -831,8 +914,9 @@ tsearch2(PG_FUNCTION_ARGS)
}
else
{
tsvector *out = palloc(CALCDATASIZE(0,0));
out->len = CALCDATASIZE(0,0);
tsvector *out = palloc(CALCDATASIZE(0, 0));
out->len = CALCDATASIZE(0, 0);
out->size = 0;
datum = PointerGetDatum(out);
pfree(prs.words);

View File

@ -12,23 +12,27 @@
#include "utils/builtins.h"
#include "storage/bufpage.h"
typedef struct {
typedef struct
{
uint32
haspos:1,
len:11, /* MAX 2Kb */
pos:20; /* MAX 1Mb */
haspos:1,
len:11, /* MAX 2Kb */
pos:20; /* MAX 1Mb */
} WordEntry;
#define MAXSTRLEN ( 1<<11 )
#define MAXSTRPOS ( 1<<20 )
typedef struct {
typedef struct
{
uint16
weight:2,
pos:14;
} WordEntryPos;
#define MAXENTRYPOS (1<<14)
weight:2,
pos:14;
} WordEntryPos;
#define MAXENTRYPOS (1<<14)
#define MAXNUMPOS 256
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
typedef struct
{
@ -43,13 +47,14 @@ typedef struct
#define STRPTR(x) ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
typedef struct {
WordEntry entry;
WordEntryPos *pos;
typedef struct
{
WordEntry entry;
WordEntryPos *pos;
} WordEntryIN;
typedef struct
@ -60,7 +65,7 @@ typedef struct
int4 len;
int4 state;
int4 alen;
WordEntryPos *pos;
WordEntryPos *pos;
bool oprisdelim;
} TI_IN_STATE;

View File

@ -33,30 +33,33 @@ Datum concat(PG_FUNCTION_ARGS);
Datum
strip(PG_FUNCTION_ARGS)
{
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *out;
int i,len=0;
WordEntry *arrin=ARRPTR(in), *arrout;
char *cur;
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *out;
int i,
len = 0;
WordEntry *arrin = ARRPTR(in),
*arrout;
char *cur;
for(i=0;i<in->size;i++)
len += SHORTALIGN( arrin[i].len );
for (i = 0; i < in->size; i++)
len += SHORTALIGN(arrin[i].len);
len = CALCDATASIZE(in->size, len);
out=(tsvector*)palloc(len);
memset(out,0,len);
out->len=len;
out->size=in->size;
arrout=ARRPTR(out);
cur=STRPTR(out);
for(i=0;i<in->size;i++) {
memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
out = (tsvector *) palloc(len);
memset(out, 0, len);
out->len = len;
out->size = in->size;
arrout = ARRPTR(out);
cur = STRPTR(out);
for (i = 0; i < in->size; i++)
{
memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
arrout[i].haspos = 0;
arrout[i].len = arrin[i].len;
arrout[i].pos = cur - STRPTR(out);
cur += SHORTALIGN( arrout[i].len );
cur += SHORTALIGN(arrout[i].len);
}
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(out);
}
@ -64,200 +67,263 @@ strip(PG_FUNCTION_ARGS)
Datum
setweight(PG_FUNCTION_ARGS)
{
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
char cw = PG_GETARG_CHAR(1);
tsvector *out;
int i,j;
WordEntry *entry;
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
char cw = PG_GETARG_CHAR(1);
tsvector *out;
int i,
j;
WordEntry *entry;
WordEntryPos *p;
int w=0;
int w = 0;
switch(tolower(cw)) {
case 'a': w=3; break;
case 'b': w=2; break;
case 'c': w=1; break;
case 'd': w=0; break;
/* internal error */
default: elog(ERROR,"unrecognized weight");
switch (tolower(cw))
{
case 'a':
w = 3;
break;
case 'b':
w = 2;
break;
case 'c':
w = 1;
break;
case 'd':
w = 0;
break;
/* internal error */
default:
elog(ERROR, "unrecognized weight");
}
out=(tsvector*)palloc(in->len);
memcpy(out,in,in->len);
entry=ARRPTR(out);
i=out->size;
while(i--) {
if ( (j=POSDATALEN(out,entry)) != 0 ) {
p=POSDATAPTR(out,entry);
while(j--) {
p->weight=w;
out = (tsvector *) palloc(in->len);
memcpy(out, in, in->len);
entry = ARRPTR(out);
i = out->size;
while (i--)
{
if ((j = POSDATALEN(out, entry)) != 0)
{
p = POSDATAPTR(out, entry);
while (j--)
{
p->weight = w;
p++;
}
}
entry++;
}
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(out);
}
static int
compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
{
if ( a->len == b->len)
{
return strncmp(
ptra + a->pos,
ptrb + b->pos,
a->len);
}
return ( a->len > b->len ) ? 1 : -1;
if (a->len == b->len)
{
return strncmp(
ptra + a->pos,
ptrb + b->pos,
a->len);
}
return (a->len > b->len) ? 1 : -1;
}
static int4
add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
int i;
uint16 slen = POSDATALEN(src, srcptr), startlen;
WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
add_pos(tsvector * src, WordEntry * srcptr, tsvector * dest, WordEntry * destptr, int4 maxpos)
{
uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr);
int i;
uint16 slen = POSDATALEN(src, srcptr),
startlen;
WordEntryPos *spos = POSDATAPTR(src, srcptr),
*dpos = POSDATAPTR(dest, destptr);
if ( ! destptr->haspos )
*clen=0;
if (!destptr->haspos)
*clen = 0;
startlen = *clen;
for(i=0; i<slen && *clen<MAXNUMPOS && ( *clen==0 || dpos[ *clen-1 ].pos != MAXENTRYPOS-1 ) ;i++) {
dpos[ *clen ].weight = spos[i].weight;
dpos[ *clen ].pos = LIMITPOS(spos[i].pos + maxpos);
for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || dpos[*clen - 1].pos != MAXENTRYPOS - 1); i++)
{
dpos[*clen].weight = spos[i].weight;
dpos[*clen].pos = LIMITPOS(spos[i].pos + maxpos);
(*clen)++;
}
if ( *clen != startlen )
destptr->haspos=1;
return *clen - startlen;
if (*clen != startlen)
destptr->haspos = 1;
return *clen - startlen;
}
Datum
concat(PG_FUNCTION_ARGS) {
tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
tsvector *out;
WordEntry *ptr;
WordEntry *ptr1,*ptr2;
concat(PG_FUNCTION_ARGS)
{
tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
tsvector *out;
WordEntry *ptr;
WordEntry *ptr1,
*ptr2;
WordEntryPos *p;
int maxpos=0,i,j,i1,i2;
char *cur;
char *data,*data1,*data2;
int maxpos = 0,
i,
j,
i1,
i2;
char *cur;
char *data,
*data1,
*data2;
ptr=ARRPTR(in1);
i=in1->size;
while(i--) {
if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
p=POSDATAPTR(in1,ptr);
while(j--) {
if ( p->pos > maxpos )
ptr = ARRPTR(in1);
i = in1->size;
while (i--)
{
if ((j = POSDATALEN(in1, ptr)) != 0)
{
p = POSDATAPTR(in1, ptr);
while (j--)
{
if (p->pos > maxpos)
maxpos = p->pos;
p++;
}
}
ptr++;
}
ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
data1=STRPTR(in1); data2=STRPTR(in2);
i1=in1->size; i2=in2->size;
out=(tsvector*)palloc( in1->len + in2->len );
memset(out,0,in1->len + in2->len);
ptr1 = ARRPTR(in1);
ptr2 = ARRPTR(in2);
data1 = STRPTR(in1);
data2 = STRPTR(in2);
i1 = in1->size;
i2 = in2->size;
out = (tsvector *) palloc(in1->len + in2->len);
memset(out, 0, in1->len + in2->len);
out->len = in1->len + in2->len;
out->size = in1->size + in2->size;
data=cur=STRPTR(out);
ptr=ARRPTR(out);
while( i1 && i2 ) {
int cmp=compareEntry(data1,ptr1,data2,ptr2);
if ( cmp < 0 ) { /* in1 first */
data = cur = STRPTR(out);
ptr = ARRPTR(out);
while (i1 && i2)
{
int cmp = compareEntry(data1, ptr1, data2, ptr2);
if (cmp < 0)
{ /* in1 first */
ptr->haspos = ptr1->haspos;
ptr->len = ptr1->len;
memcpy( cur, data1 + ptr1->pos, ptr1->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr1->len);
if ( ptr->haspos ) {
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
memcpy(cur, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data;
cur += SHORTALIGN(ptr1->len);
if (ptr->haspos)
{
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
}
ptr++; ptr1++; i1--;
} else if ( cmp>0 ) { /* in2 first */
ptr++;
ptr1++;
i1--;
}
else if (cmp > 0)
{ /* in2 first */
ptr->haspos = ptr2->haspos;
ptr->len = ptr2->len;
memcpy( cur, data2 + ptr2->pos, ptr2->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr2->len);
if ( ptr->haspos ) {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
if ( addlen == 0 )
ptr->haspos=0;
memcpy(cur, data2 + ptr2->pos, ptr2->len);
ptr->pos = cur - data;
cur += SHORTALIGN(ptr2->len);
if (ptr->haspos)
{
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
if (addlen == 0)
ptr->haspos = 0;
else
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
ptr++; ptr2++; i2--;
} else {
ptr++;
ptr2++;
i2--;
}
else
{
ptr->haspos = ptr1->haspos | ptr2->haspos;
ptr->len = ptr1->len;
memcpy( cur, data1 + ptr1->pos, ptr1->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr1->len);
if ( ptr->haspos ) {
if ( ptr1->haspos ) {
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
if ( ptr2->haspos )
cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
} else if ( ptr2->haspos ) {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
if ( addlen == 0 )
ptr->haspos=0;
memcpy(cur, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data;
cur += SHORTALIGN(ptr1->len);
if (ptr->haspos)
{
if (ptr1->haspos)
{
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
if (ptr2->haspos)
cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
}
else if (ptr2->haspos)
{
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
if (addlen == 0)
ptr->haspos = 0;
else
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
}
ptr++; ptr1++; ptr2++; i1--; i2--;
ptr++;
ptr1++;
ptr2++;
i1--;
i2--;
}
}
while(i1) {
while (i1)
{
ptr->haspos = ptr1->haspos;
ptr->len = ptr1->len;
memcpy( cur, data1 + ptr1->pos, ptr1->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr1->len);
if ( ptr->haspos ) {
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
memcpy(cur, data1 + ptr1->pos, ptr1->len);
ptr->pos = cur - data;
cur += SHORTALIGN(ptr1->len);
if (ptr->haspos)
{
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
}
ptr++; ptr1++; i1--;
ptr++;
ptr1++;
i1--;
}
while(i2) {
while (i2)
{
ptr->haspos = ptr2->haspos;
ptr->len = ptr2->len;
memcpy( cur, data2 + ptr2->pos, ptr2->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr2->len);
if ( ptr->haspos ) {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
if ( addlen == 0 )
ptr->haspos=0;
memcpy(cur, data2 + ptr2->pos, ptr2->len);
ptr->pos = cur - data;
cur += SHORTALIGN(ptr2->len);
if (ptr->haspos)
{
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
if (addlen == 0)
ptr->haspos = 0;
else
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
}
ptr++; ptr2++; i2--;
ptr++;
ptr2++;
i2--;
}
out->size=ptr-ARRPTR(out);
out->len = CALCDATASIZE( out->size, cur-data );
if ( data != STRPTR(out) )
memmove( STRPTR(out), data, cur-data );
out->size = ptr - ARRPTR(out);
out->len = CALCDATASIZE(out->size, cur - data);
if (data != STRPTR(out))
memmove(STRPTR(out), data, cur - data);
PG_FREE_IF_COPY(in1, 0);
PG_FREE_IF_COPY(in2, 1);
PG_RETURN_POINTER(out);
}

View File

@ -1,6 +1,6 @@
#include "deflex.h"
const char *lex_descr[]={
const char *lex_descr[] = {
"",
"Latin word",
"Non-latin word",
@ -27,7 +27,7 @@ const char *lex_descr[]={
"HTML Entity"
};
const char *tok_alias[]={
const char *tok_alias[] = {
"",
"lword",
"nlword",
@ -53,4 +53,3 @@ const char *tok_alias[]={
"uint",
"entity"
};

View File

@ -1,5 +1,5 @@
/*
* interface functions to parser
/*
* interface functions to parser
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
@ -21,154 +21,171 @@
/*********top interface**********/
static void *plan_getparser=NULL;
static Oid current_parser_id=InvalidOid;
static void *plan_getparser = NULL;
static Oid current_parser_id = InvalidOid;
void
init_prs(Oid id, WParserInfo *prs) {
Oid arg[1]={ OIDOID };
bool isnull;
Datum pars[1]={ ObjectIdGetDatum(id) };
int stat;
init_prs(Oid id, WParserInfo * prs)
{
Oid arg[1] = {OIDOID};
bool isnull;
Datum pars[1] = {ObjectIdGetDatum(id)};
int stat;
memset(prs,0,sizeof(WParserInfo));
memset(prs, 0, sizeof(WParserInfo));
SPI_connect();
if ( !plan_getparser ) {
plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
if ( !plan_getparser )
if (!plan_getparser)
{
plan_getparser = SPI_saveplan(SPI_prepare("select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1", 1, arg));
if (!plan_getparser)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getparser, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 ) {
Oid oid=InvalidOid;
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
{
Oid oid = InvalidOid;
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull));
fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
prs->lextype = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull));
oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull));
fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
prs->prs_id=id;
} else
prs->prs_id = id;
}
else
ts_error(ERROR, "No parser with id %d", id);
SPI_finish();
}
typedef struct {
WParserInfo *last_prs;
int len;
int reallen;
WParserInfo *list;
typedef struct
{
WParserInfo *last_prs;
int len;
int reallen;
WParserInfo *list;
SNMap name2id_map;
} PrsList;
} PrsList;
static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
static PrsList PList = {NULL, 0, 0, NULL, {0, 0, NULL}};
void
reset_prs(void) {
freeSNMap( &(PList.name2id_map) );
if ( PList.list )
void
reset_prs(void)
{
freeSNMap(&(PList.name2id_map));
if (PList.list)
free(PList.list);
memset(&PList,0,sizeof(PrsList));
memset(&PList, 0, sizeof(PrsList));
}
static int
compareprs(const void *a, const void *b) {
return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
compareprs(const void *a, const void *b)
{
return ((WParserInfo *) a)->prs_id - ((WParserInfo *) b)->prs_id;
}
WParserInfo *
findprs(Oid id) {
findprs(Oid id)
{
/* last used prs */
if ( PList.last_prs && PList.last_prs->prs_id==id )
if (PList.last_prs && PList.last_prs->prs_id == id)
return PList.last_prs;
/* already used prs */
if ( PList.len != 0 ) {
if (PList.len != 0)
{
WParserInfo key;
key.prs_id=id;
key.prs_id = id;
PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
if ( PList.last_prs != NULL )
if (PList.last_prs != NULL)
return PList.last_prs;
}
/* last chance */
if ( PList.len==PList.reallen ) {
if (PList.len == PList.reallen)
{
WParserInfo *tmp;
int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
if ( !tmp )
ts_error(ERROR,"No memory");
PList.reallen=reallen;
PList.list=tmp;
int reallen = (PList.reallen) ? 2 * PList.reallen : 16;
tmp = (WParserInfo *) realloc(PList.list, sizeof(WParserInfo) * reallen);
if (!tmp)
ts_error(ERROR, "No memory");
PList.reallen = reallen;
PList.list = tmp;
}
PList.last_prs=&(PList.list[PList.len]);
PList.last_prs = &(PList.list[PList.len]);
init_prs(id, PList.last_prs);
PList.len++;
qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
return findprs(id); /* qsort changed order!! */;
return findprs(id); /* qsort changed order!! */ ;
}
static void *plan_name2id=NULL;
static void *plan_name2id = NULL;
Oid
name2id_prs(text *name) {
Oid arg[1]={ TEXTOID };
bool isnull;
Datum pars[1]={ PointerGetDatum(name) };
int stat;
Oid id=findSNMap_t( &(PList.name2id_map), name );
if ( id )
name2id_prs(text *name)
{
Oid arg[1] = {TEXTOID};
bool isnull;
Datum pars[1] = {PointerGetDatum(name)};
int stat;
Oid id = findSNMap_t(&(PList.name2id_map), name);
if (id)
return id;
SPI_connect();
if ( !plan_name2id ) {
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
if ( !plan_name2id )
if (!plan_name2id)
{
plan_name2id = SPI_saveplan(SPI_prepare("select oid from pg_ts_parser where prs_name = $1", 1, arg));
if (!plan_name2id)
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 )
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
else
if (stat < 0)
ts_error(ERROR, "SPI_execp return %d", stat);
if (SPI_processed > 0)
id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
else
ts_error(ERROR, "No parser '%s'", text2char(name));
SPI_finish();
addSNMap_t( &(PList.name2id_map), name, id );
addSNMap_t(&(PList.name2id_map), name, id);
return id;
}
/******sql-level interface******/
typedef struct {
int cur;
LexDescr *list;
} TypeStorage;
typedef struct
{
int cur;
LexDescr *list;
} TypeStorage;
static void
setup_firstcall(FuncCallContext *funcctx, Oid prsid) {
TupleDesc tupdesc;
MemoryContext oldcontext;
TypeStorage *st;
WParserInfo *prs = findprs(prsid);
setup_firstcall(FuncCallContext *funcctx, Oid prsid)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
TypeStorage *st;
WParserInfo *prs = findprs(prsid);
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st=(TypeStorage*)palloc( sizeof(TypeStorage) );
st->cur=0;
st->list = (LexDescr*)DatumGetPointer(
OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
);
funcctx->user_fctx = (void*)st;
st = (TypeStorage *) palloc(sizeof(TypeStorage));
st->cur = 0;
st->list = (LexDescr *) DatumGetPointer(
OidFunctionCall1(prs->lextype, PointerGetDatum(prs->prs))
);
funcctx->user_fctx = (void *) st;
tupdesc = RelationNameGetTupleDesc("tokentype");
funcctx->slot = TupleDescGetSlot(tupdesc);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
@ -176,20 +193,22 @@ setup_firstcall(FuncCallContext *funcctx, Oid prsid) {
}
static Datum
process_call(FuncCallContext *funcctx) {
TypeStorage *st;
process_call(FuncCallContext *funcctx)
{
TypeStorage *st;
st=(TypeStorage*)funcctx->user_fctx;
if ( st->list && st->list[st->cur].lexid ) {
Datum result;
char* values[3];
char txtid[16];
HeapTuple tuple;
st = (TypeStorage *) funcctx->user_fctx;
if (st->list && st->list[st->cur].lexid)
{
Datum result;
char *values[3];
char txtid[16];
HeapTuple tuple;
values[0]=txtid;
sprintf(txtid,"%d",st->list[st->cur].lexid);
values[1]=st->list[st->cur].alias;
values[2]=st->list[st->cur].descr;
values[0] = txtid;
sprintf(txtid, "%d", st->list[st->cur].lexid);
values[1] = st->list[st->cur].alias;
values[2] = st->list[st->cur].descr;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = TupleGetDatum(funcctx->slot, tuple);
@ -198,161 +217,179 @@ process_call(FuncCallContext *funcctx) {
pfree(values[2]);
st->cur++;
return result;
} else {
if ( st->list ) pfree(st->list);
}
else
{
if (st->list)
pfree(st->list);
pfree(st);
}
return (Datum)0;
return (Datum) 0;
}
PG_FUNCTION_INFO_V1(token_type);
Datum token_type(PG_FUNCTION_ARGS);
Datum token_type(PG_FUNCTION_ARGS);
Datum
token_type(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
token_type(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
setup_firstcall(funcctx, PG_GETARG_OID(0) );
setup_firstcall(funcctx, PG_GETARG_OID(0));
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=process_call(funcctx)) != (Datum)0 )
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(token_type_byname);
Datum token_type_byname(PG_FUNCTION_ARGS);
Datum token_type_byname(PG_FUNCTION_ARGS);
Datum
token_type_byname(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
token_type_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *name = PG_GETARG_TEXT_P(0);
if (SRF_IS_FIRSTCALL()) {
text *name = PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
setup_firstcall(funcctx, name2id_prs( name ) );
PG_FREE_IF_COPY(name,0);
setup_firstcall(funcctx, name2id_prs(name));
PG_FREE_IF_COPY(name, 0);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=process_call(funcctx)) != (Datum)0 )
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(token_type_current);
Datum token_type_current(PG_FUNCTION_ARGS);
Datum token_type_current(PG_FUNCTION_ARGS);
Datum
token_type_current(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
token_type_current(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
if (SRF_IS_FIRSTCALL())
{
funcctx = SRF_FIRSTCALL_INIT();
if ( current_parser_id==InvalidOid )
current_parser_id = name2id_prs( char2text("default") );
setup_firstcall(funcctx, current_parser_id );
if (current_parser_id == InvalidOid)
current_parser_id = name2id_prs(char2text("default"));
setup_firstcall(funcctx, current_parser_id);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=process_call(funcctx)) != (Datum)0 )
if ((result = process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(set_curprs);
Datum set_curprs(PG_FUNCTION_ARGS);
Datum set_curprs(PG_FUNCTION_ARGS);
Datum
set_curprs(PG_FUNCTION_ARGS) {
findprs(PG_GETARG_OID(0));
current_parser_id=PG_GETARG_OID(0);
PG_RETURN_VOID();
set_curprs(PG_FUNCTION_ARGS)
{
findprs(PG_GETARG_OID(0));
current_parser_id = PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curprs_byname);
Datum set_curprs_byname(PG_FUNCTION_ARGS);
Datum set_curprs_byname(PG_FUNCTION_ARGS);
Datum
set_curprs_byname(PG_FUNCTION_ARGS) {
text *name=PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curprs,
ObjectIdGetDatum( name2id_prs(name) )
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
set_curprs_byname(PG_FUNCTION_ARGS)
{
text *name = PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curprs,
ObjectIdGetDatum(name2id_prs(name))
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
}
typedef struct {
int type;
char *lexem;
} LexemEntry;
typedef struct
{
int type;
char *lexem;
} LexemEntry;
typedef struct
{
int cur;
int len;
LexemEntry *list;
} PrsStorage;
typedef struct {
int cur;
int len;
LexemEntry *list;
} PrsStorage;
static void
prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt) {
TupleDesc tupdesc;
MemoryContext oldcontext;
PrsStorage *st;
WParserInfo *prs = findprs(prsid);
char *lex=NULL;
int llen=0, type=0;
prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt)
{
TupleDesc tupdesc;
MemoryContext oldcontext;
PrsStorage *st;
WParserInfo *prs = findprs(prsid);
char *lex = NULL;
int llen = 0,
type = 0;
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st=(PrsStorage*)palloc( sizeof(PrsStorage) );
st->cur=0;
st->len=16;
st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
st = (PrsStorage *) palloc(sizeof(PrsStorage));
st->cur = 0;
st->len = 16;
st->list = (LexemEntry *) palloc(sizeof(LexemEntry) * st->len);
prs->prs = (void*)DatumGetPointer(
FunctionCall2(
&(prs->start_info),
PointerGetDatum(VARDATA(txt)),
Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
)
);
prs->prs = (void *) DatumGetPointer(
FunctionCall2(
&(prs->start_info),
PointerGetDatum(VARDATA(txt)),
Int32GetDatum(VARSIZE(txt) - VARHDRSZ)
)
);
while( ( type=DatumGetInt32(FunctionCall3(
&(prs->getlexeme_info),
PointerGetDatum(prs->prs),
PointerGetDatum(&lex),
PointerGetDatum(&llen))) ) != 0 ) {
while ((type = DatumGetInt32(FunctionCall3(
&(prs->getlexeme_info),
PointerGetDatum(prs->prs),
PointerGetDatum(&lex),
PointerGetDatum(&llen)))) != 0)
{
if ( st->cur>=st->len ) {
st->len=2*st->len;
st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
if (st->cur >= st->len)
{
st->len = 2 * st->len;
st->list = (LexemEntry *) repalloc(st->list, sizeof(LexemEntry) * st->len);
}
st->list[st->cur].lexem = palloc(llen+1);
memcpy( st->list[st->cur].lexem, lex, llen);
st->list[st->cur].lexem[llen]='\0';
st->list[st->cur].type=type;
st->list[st->cur].lexem = palloc(llen + 1);
memcpy(st->list[st->cur].lexem, lex, llen);
st->list[st->cur].lexem[llen] = '\0';
st->list[st->cur].type = type;
st->cur++;
}
FunctionCall1(
&(prs->end_info),
PointerGetDatum(prs->prs)
);
st->len=st->cur;
st->cur=0;
funcctx->user_fctx = (void*)st;
FunctionCall1(
&(prs->end_info),
PointerGetDatum(prs->prs)
);
st->len = st->cur;
st->cur = 0;
funcctx->user_fctx = (void *) st;
tupdesc = RelationNameGetTupleDesc("tokenout");
funcctx->slot = TupleDescGetSlot(tupdesc);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
@ -360,132 +397,148 @@ prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt) {
}
static Datum
prs_process_call(FuncCallContext *funcctx) {
PrsStorage *st;
prs_process_call(FuncCallContext *funcctx)
{
PrsStorage *st;
st=(PrsStorage*)funcctx->user_fctx;
if ( st->cur < st->len ) {
Datum result;
char* values[2];
char tid[16];
HeapTuple tuple;
st = (PrsStorage *) funcctx->user_fctx;
if (st->cur < st->len)
{
Datum result;
char *values[2];
char tid[16];
HeapTuple tuple;
values[0]=tid;
sprintf(tid,"%d",st->list[st->cur].type);
values[1]=st->list[st->cur].lexem;
values[0] = tid;
sprintf(tid, "%d", st->list[st->cur].type);
values[1] = st->list[st->cur].lexem;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = TupleGetDatum(funcctx->slot, tuple);
pfree(values[1]);
st->cur++;
return result;
} else {
if ( st->list ) pfree(st->list);
}
else
{
if (st->list)
pfree(st->list);
pfree(st);
}
return (Datum)0;
return (Datum) 0;
}
PG_FUNCTION_INFO_V1(parse);
Datum parse(PG_FUNCTION_ARGS);
Datum parse(PG_FUNCTION_ARGS);
Datum
parse(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
parse(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *txt = PG_GETARG_TEXT_P(1);
if (SRF_IS_FIRSTCALL()) {
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
PG_FREE_IF_COPY(txt,1);
prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
PG_FREE_IF_COPY(txt, 1);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(parse_byname);
Datum parse_byname(PG_FUNCTION_ARGS);
Datum parse_byname(PG_FUNCTION_ARGS);
Datum
parse_byname(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
parse_byname(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *name = PG_GETARG_TEXT_P(0);
text *txt = PG_GETARG_TEXT_P(1);
if (SRF_IS_FIRSTCALL()) {
text *name = PG_GETARG_TEXT_P(0);
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
PG_FREE_IF_COPY(name,0);
PG_FREE_IF_COPY(txt,1);
prs_setup_firstcall(funcctx, name2id_prs(name), txt);
PG_FREE_IF_COPY(name, 0);
PG_FREE_IF_COPY(txt, 1);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(parse_current);
Datum parse_current(PG_FUNCTION_ARGS);
Datum parse_current(PG_FUNCTION_ARGS);
Datum
parse_current(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
parse_current(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL())
{
text *txt = PG_GETARG_TEXT_P(0);
if (SRF_IS_FIRSTCALL()) {
text *txt = PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
if ( current_parser_id==InvalidOid )
current_parser_id = name2id_prs( char2text("default") );
prs_setup_firstcall(funcctx, current_parser_id,txt );
PG_FREE_IF_COPY(txt,0);
if (current_parser_id == InvalidOid)
current_parser_id = name2id_prs(char2text("default"));
prs_setup_firstcall(funcctx, current_parser_id, txt);
PG_FREE_IF_COPY(txt, 0);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
if ((result = prs_process_call(funcctx)) != (Datum) 0)
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(headline);
Datum headline(PG_FUNCTION_ARGS);
Datum headline(PG_FUNCTION_ARGS);
Datum
headline(PG_FUNCTION_ARGS) {
TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
text *in = PG_GETARG_TEXT_P(1);
headline(PG_FUNCTION_ARGS)
{
TSCfgInfo *cfg = findcfg(PG_GETARG_OID(0));
text *in = PG_GETARG_TEXT_P(1);
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
text *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
HLPRSTEXT prs;
text *out;
text *out;
WParserInfo *prsobj = findprs(cfg->prs_id);
memset(&prs,0,sizeof(HLPRSTEXT));
memset(&prs, 0, sizeof(HLPRSTEXT));
prs.lenwords = 32;
prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
FunctionCall3(
&(prsobj->headline_info),
PointerGetDatum(&prs),
PointerGetDatum(opt),
PointerGetDatum(query)
);
&(prsobj->headline_info),
PointerGetDatum(&prs),
PointerGetDatum(opt),
PointerGetDatum(query)
);
out = genhl(&prs);
PG_FREE_IF_COPY(in,1);
PG_FREE_IF_COPY(query,2);
if ( opt ) PG_FREE_IF_COPY(opt,3);
PG_FREE_IF_COPY(in, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
pfree(prs.startsel);
pfree(prs.stopsel);
@ -495,35 +548,34 @@ headline(PG_FUNCTION_ARGS) {
PG_FUNCTION_INFO_V1(headline_byname);
Datum headline_byname(PG_FUNCTION_ARGS);
Datum headline_byname(PG_FUNCTION_ARGS);
Datum
headline_byname(PG_FUNCTION_ARGS) {
text *cfg=PG_GETARG_TEXT_P(0);
headline_byname(PG_FUNCTION_ARGS)
{
text *cfg = PG_GETARG_TEXT_P(0);
Datum out=DirectFunctionCall4(
headline,
ObjectIdGetDatum(name2id_cfg( cfg ) ),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2),
( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
Datum out = DirectFunctionCall4(
headline,
ObjectIdGetDatum(name2id_cfg(cfg)),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2),
(PG_NARGS() > 3) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
);
PG_FREE_IF_COPY(cfg,0);
PG_RETURN_DATUM(out);
PG_FREE_IF_COPY(cfg, 0);
PG_RETURN_DATUM(out);
}
PG_FUNCTION_INFO_V1(headline_current);
Datum headline_current(PG_FUNCTION_ARGS);
Datum headline_current(PG_FUNCTION_ARGS);
Datum
headline_current(PG_FUNCTION_ARGS) {
headline_current(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(
headline,
ObjectIdGetDatum(get_currcfg()),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
));
headline,
ObjectIdGetDatum(get_currcfg()),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
(PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
));
}

View File

@ -3,26 +3,28 @@
#include "postgres.h"
#include "fmgr.h"
typedef struct {
Oid prs_id;
FmgrInfo start_info;
FmgrInfo getlexeme_info;
FmgrInfo end_info;
FmgrInfo headline_info;
Oid lextype;
void *prs;
} WParserInfo;
typedef struct
{
Oid prs_id;
FmgrInfo start_info;
FmgrInfo getlexeme_info;
FmgrInfo end_info;
FmgrInfo headline_info;
Oid lextype;
void *prs;
} WParserInfo;
void init_prs(Oid id, WParserInfo *prs);
WParserInfo* findprs(Oid id);
Oid name2id_prs(text *name);
void reset_prs(void);
void init_prs(Oid id, WParserInfo * prs);
WParserInfo *findprs(Oid id);
Oid name2id_prs(text *name);
void reset_prs(void);
typedef struct {
int lexid;
char *alias;
char *descr;
} LexDescr;
typedef struct
{
int lexid;
char *alias;
char *descr;
} LexDescr;
#endif

View File

@ -1,5 +1,5 @@
/*
* default word parser
/*
* default word parser
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
@ -17,40 +17,44 @@
#include "wordparser/deflex.h"
PG_FUNCTION_INFO_V1(prsd_lextype);
Datum prsd_lextype(PG_FUNCTION_ARGS);
Datum prsd_lextype(PG_FUNCTION_ARGS);
Datum
prsd_lextype(PG_FUNCTION_ARGS) {
LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
int i;
Datum
prsd_lextype(PG_FUNCTION_ARGS)
{
LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
int i;
for(i=1;i<=LASTNUM;i++) {
descr[i-1].lexid = i;
descr[i-1].alias = pstrdup(tok_alias[i]);
descr[i-1].descr = pstrdup(lex_descr[i]);
for (i = 1; i <= LASTNUM; i++)
{
descr[i - 1].lexid = i;
descr[i - 1].alias = pstrdup(tok_alias[i]);
descr[i - 1].descr = pstrdup(lex_descr[i]);
}
descr[LASTNUM].lexid=0;
descr[LASTNUM].lexid = 0;
PG_RETURN_POINTER(descr);
}
PG_FUNCTION_INFO_V1(prsd_start);
Datum prsd_start(PG_FUNCTION_ARGS);
Datum
prsd_start(PG_FUNCTION_ARGS) {
start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
Datum prsd_start(PG_FUNCTION_ARGS);
Datum
prsd_start(PG_FUNCTION_ARGS)
{
start_parse_str((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1));
PG_RETURN_POINTER(NULL);
}
PG_FUNCTION_INFO_V1(prsd_getlexeme);
Datum prsd_getlexeme(PG_FUNCTION_ARGS);
Datum
prsd_getlexeme(PG_FUNCTION_ARGS) {
Datum prsd_getlexeme(PG_FUNCTION_ARGS);
Datum
prsd_getlexeme(PG_FUNCTION_ARGS)
{
/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
char **t=(char**)PG_GETARG_POINTER(1);
int *tlen=(int*)PG_GETARG_POINTER(2);
int type=tsearch2_yylex();
char **t = (char **) PG_GETARG_POINTER(1);
int *tlen = (int *) PG_GETARG_POINTER(2);
int type = tsearch2_yylex();
*t = token;
*tlen = tokenlen;
@ -58,34 +62,39 @@ prsd_getlexeme(PG_FUNCTION_ARGS) {
}
PG_FUNCTION_INFO_V1(prsd_end);
Datum prsd_end(PG_FUNCTION_ARGS);
Datum
prsd_end(PG_FUNCTION_ARGS) {
Datum prsd_end(PG_FUNCTION_ARGS);
Datum
prsd_end(PG_FUNCTION_ARGS)
{
/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
end_parse();
PG_RETURN_VOID();
}
#define LEAVETOKEN(x) ( (x)==12 )
#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
#define ENDPUNCTOKEN(x) ( (x)==12 )
#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
#define ENDPUNCTOKEN(x) ( (x)==12 )
#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
typedef struct {
HLWORD *words;
int len;
} hlCheck;
typedef struct
{
HLWORD *words;
int len;
} hlCheck;
static bool
checkcondition_HL(void *checkval, ITEM *val) {
int i;
for(i=0;i<((hlCheck*)checkval)->len;i++) {
if ( ((hlCheck*)checkval)->words[i].item==val )
checkcondition_HL(void *checkval, ITEM * val)
{
int i;
for (i = 0; i < ((hlCheck *) checkval)->len; i++)
{
if (((hlCheck *) checkval)->words[i].item == val)
return true;
}
return false;
@ -93,21 +102,28 @@ checkcondition_HL(void *checkval, ITEM *val) {
static bool
hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
int i,j;
ITEM *item=GETQUERY(query);
int pos=*p;
*q=0;
*p=0x7fffffff;
hlCover(HLPRSTEXT * prs, QUERYTYPE * query, int *p, int *q)
{
int i,
j;
ITEM *item = GETQUERY(query);
int pos = *p;
for(j=0;j<query->size;j++) {
if ( item->type != VAL ) {
*q = 0;
*p = 0x7fffffff;
for (j = 0; j < query->size; j++)
{
if (item->type != VAL)
{
item++;
continue;
}
for(i=pos;i<prs->curwords;i++) {
if ( prs->words[i].item == item ) {
if ( i>*q)
for (i = pos; i < prs->curwords; i++)
{
if (prs->words[i].item == item)
{
if (i > *q)
*q = i;
break;
}
@ -115,32 +131,39 @@ hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
item++;
}
if ( *q==0 )
if (*q == 0)
return false;
item=GETQUERY(query);
for(j=0;j<query->size;j++) {
if ( item->type != VAL ) {
item = GETQUERY(query);
for (j = 0; j < query->size; j++)
{
if (item->type != VAL)
{
item++;
continue;
}
for(i=*q;i>=pos;i--) {
if ( prs->words[i].item == item ) {
if ( i<*p )
*p=i;
for (i = *q; i >= pos; i--)
{
if (prs->words[i].item == item)
{
if (i < *p)
*p = i;
break;
}
}
item++;
}
}
if ( *p<=*q ) {
hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) {
if (*p <= *q)
{
hlCheck ch = {&(prs->words[*p]), *q - *p + 1};
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL))
return true;
} else {
else
{
(*p)++;
return hlCover(prs,query,p,q);
return hlCover(prs, query, p, q);
}
}
@ -148,45 +171,54 @@ hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
}
PG_FUNCTION_INFO_V1(prsd_headline);
Datum prsd_headline(PG_FUNCTION_ARGS);
Datum
prsd_headline(PG_FUNCTION_ARGS) {
HLPRSTEXT *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
text *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
QUERYTYPE *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
Datum prsd_headline(PG_FUNCTION_ARGS);
Datum
prsd_headline(PG_FUNCTION_ARGS)
{
HLPRSTEXT *prs = (HLPRSTEXT *) PG_GETARG_POINTER(0);
text *opt = (text *) PG_GETARG_POINTER(1); /* can't be toasted */
QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(2); /* can't be toasted */
/* from opt + start and and tag */
int min_words=15;
int max_words=35;
int shortword=3;
int min_words = 15;
int max_words = 35;
int shortword = 3;
int p=0,q=0;
int bestb=-1,beste=-1;
int bestlen=-1;
int pose=0, poslen, curlen;
int p = 0,
q = 0;
int bestb = -1,
beste = -1;
int bestlen = -1;
int pose = 0,
poslen,
curlen;
int i;
int i;
/*config*/
prs->startsel=NULL;
prs->stopsel=NULL;
if ( opt ) {
Map *map,*mptr;
parse_cfgdict(opt,&map);
mptr=map;
/* config */
prs->startsel = NULL;
prs->stopsel = NULL;
if (opt)
{
Map *map,
*mptr;
parse_cfgdict(opt, &map);
mptr = map;
while (mptr && mptr->key)
{
if (strcasecmp(mptr->key, "MaxWords") == 0)
max_words = pg_atoi(mptr->value, 4, 1);
else if (strcasecmp(mptr->key, "MinWords") == 0)
min_words = pg_atoi(mptr->value, 4, 1);
else if (strcasecmp(mptr->key, "ShortWord") == 0)
shortword = pg_atoi(mptr->value, 4, 1);
else if (strcasecmp(mptr->key, "StartSel") == 0)
prs->startsel = pstrdup(mptr->value);
else if (strcasecmp(mptr->key, "StopSel") == 0)
prs->stopsel = pstrdup(mptr->value);
while(mptr && mptr->key) {
if ( strcasecmp(mptr->key,"MaxWords")==0 )
max_words=pg_atoi(mptr->value,4,1);
else if ( strcasecmp(mptr->key,"MinWords")==0 )
min_words=pg_atoi(mptr->value,4,1);
else if ( strcasecmp(mptr->key,"ShortWord")==0 )
shortword=pg_atoi(mptr->value,4,1);
else if ( strcasecmp(mptr->key,"StartSel")==0 )
prs->startsel=pstrdup(mptr->value);
else if ( strcasecmp(mptr->key,"StopSel")==0 )
prs->stopsel=pstrdup(mptr->value);
pfree(mptr->key);
pfree(mptr->value);
@ -194,104 +226,118 @@ prsd_headline(PG_FUNCTION_ARGS) {
}
pfree(map);
if ( min_words >= max_words )
if (min_words >= max_words)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be MinWords < MaxWords")));
if ( min_words<=0 )
if (min_words <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be MinWords > 0")));
if ( shortword<0 )
if (shortword < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("must be ShortWord >= 0")));
}
while( hlCover(prs,query,&p,&q) ) {
while (hlCover(prs, query, &p, &q))
{
/* find cover len in words */
curlen=0;
poslen=0;
for(i=p;i<=q && curlen < max_words ; i++) {
if ( !NONWORDTOKEN(prs->words[i].type) )
curlen = 0;
poslen = 0;
for (i = p; i <= q && curlen < max_words; i++)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if ( prs->words[i].item && !prs->words[i].repeated )
poslen++;
pose=i;
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
pose = i;
}
if ( poslen<bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) {
if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
{
/* best already finded, so try one more cover */
p++;
continue;
}
if ( curlen < max_words ) { /* find good end */
for(i=i-1 ;i<prs->curwords && curlen<max_words; i++) {
if ( i!=q ) {
if ( !NONWORDTOKEN(prs->words[i].type) )
if (curlen < max_words)
{ /* find good end */
for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
{
if (i != q)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
if ( prs->words[i].item && !prs->words[i].repeated )
if (prs->words[i].item && !prs->words[i].repeated)
poslen++;
}
pose=i;
if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
pose = i;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
if ( curlen>=min_words )
if (curlen >= min_words)
break;
}
} else { /* shorter cover :((( */
for(;curlen>min_words;i--) {
if ( !NONWORDTOKEN(prs->words[i].type) )
}
else
{ /* shorter cover :((( */
for (; curlen > min_words; i--)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen--;
if ( prs->words[i].item && !prs->words[i].repeated )
if (prs->words[i].item && !prs->words[i].repeated)
poslen--;
pose=i;
if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
pose = i;
if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword)
continue;
break;
}
}
if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
bestb=p; beste=pose;
bestlen=poslen;
}
if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
(bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)))
{
bestb = p;
beste = pose;
bestlen = poslen;
}
p++;
}
if ( bestlen<0 ) {
curlen=0;
poslen=0;
for(i=0;i<prs->curwords && curlen<min_words ; i++) {
if ( !NONWORDTOKEN(prs->words[i].type) )
if (bestlen < 0)
{
curlen = 0;
poslen = 0;
for (i = 0; i < prs->curwords && curlen < min_words; i++)
{
if (!NONWORDTOKEN(prs->words[i].type))
curlen++;
pose=i;
pose = i;
}
bestb=0; beste=pose;
bestb = 0;
beste = pose;
}
for(i=bestb;i<=beste;i++) {
if ( prs->words[i].item )
prs->words[i].selected=1;
if ( prs->words[i].repeated )
prs->words[i].skip=1;
if ( HLIDIGNORE(prs->words[i].type) )
prs->words[i].replace=1;
for (i = bestb; i <= beste; i++)
{
if (prs->words[i].item)
prs->words[i].selected = 1;
if (prs->words[i].repeated)
prs->words[i].skip = 1;
if (HLIDIGNORE(prs->words[i].type))
prs->words[i].replace = 1;
prs->words[i].in=1;
prs->words[i].in = 1;
}
if (!prs->startsel)
prs->startsel=pstrdup("<b>");
prs->startsel = pstrdup("<b>");
if (!prs->stopsel)
prs->stopsel=pstrdup("</b>");
prs->startsellen=strlen(prs->startsel);
prs->stopsellen=strlen(prs->stopsel);
prs->stopsel = pstrdup("</b>");
prs->startsellen = strlen(prs->startsel);
prs->stopsellen = strlen(prs->stopsel);
PG_RETURN_POINTER(prs);
}