mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
pgindent run for 8.2.
This commit is contained in:
@ -164,16 +164,19 @@ get_oidnamespace(Oid funcoid)
|
||||
return nspoid;
|
||||
}
|
||||
|
||||
/* if path is relative, take it as relative to share dir */
|
||||
/* if path is relative, take it as relative to share dir */
|
||||
char *
|
||||
to_absfilename(char *filename) {
|
||||
if (!is_absolute_path(filename)) {
|
||||
char sharepath[MAXPGPATH];
|
||||
char *absfn;
|
||||
#ifdef WIN32
|
||||
char delim = '\\';
|
||||
to_absfilename(char *filename)
|
||||
{
|
||||
if (!is_absolute_path(filename))
|
||||
{
|
||||
char sharepath[MAXPGPATH];
|
||||
char *absfn;
|
||||
|
||||
#ifdef WIN32
|
||||
char delim = '\\';
|
||||
#else
|
||||
char delim = '/';
|
||||
char delim = '/';
|
||||
#endif
|
||||
get_share_path(my_exec_path, sharepath);
|
||||
absfn = palloc(strlen(sharepath) + strlen(filename) + 2);
|
||||
|
@ -14,7 +14,7 @@ text *mtextdup(text *in);
|
||||
|
||||
int text_cmp(text *a, text *b);
|
||||
|
||||
char * to_absfilename(char *filename);
|
||||
char *to_absfilename(char *filename);
|
||||
|
||||
#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
|
||||
#define ARRNELEMS(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.12 2006/05/31 14:05:31 teodor Exp $ */
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.c,v 1.13 2006/10/04 00:29:46 momjian Exp $ */
|
||||
|
||||
/*
|
||||
* interface functions to dictionary
|
||||
@ -102,7 +102,8 @@ comparedict(const void *a, const void *b)
|
||||
}
|
||||
|
||||
static void
|
||||
insertdict(Oid id) {
|
||||
insertdict(Oid id)
|
||||
{
|
||||
DictInfo newdict;
|
||||
|
||||
if (DList.len == DList.reallen)
|
||||
@ -143,7 +144,7 @@ finddict(Oid id)
|
||||
return DList.last_dict;
|
||||
}
|
||||
|
||||
/* insert new dictionary */
|
||||
/* insert new dictionary */
|
||||
insertdict(id);
|
||||
return finddict(id); /* qsort changed order!! */ ;
|
||||
}
|
||||
@ -201,30 +202,31 @@ lexize(PG_FUNCTION_ARGS)
|
||||
*ptr;
|
||||
Datum *da;
|
||||
ArrayType *a;
|
||||
DictSubState dstate = { false, false, NULL };
|
||||
DictSubState dstate = {false, false, NULL};
|
||||
|
||||
SET_FUNCOID();
|
||||
dict = finddict(PG_GETARG_OID(0));
|
||||
|
||||
ptr = res = (TSLexeme *) DatumGetPointer(
|
||||
FunctionCall4(&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(VARDATA(in)),
|
||||
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
|
||||
PointerGetDatum(&dstate)
|
||||
FunctionCall4(&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(VARDATA(in)),
|
||||
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
|
||||
PointerGetDatum(&dstate)
|
||||
)
|
||||
);
|
||||
|
||||
if (dstate.getnext) {
|
||||
dstate.isend = true;
|
||||
if (dstate.getnext)
|
||||
{
|
||||
dstate.isend = true;
|
||||
ptr = res = (TSLexeme *) DatumGetPointer(
|
||||
FunctionCall4(&(dict->lexize_info),
|
||||
FunctionCall4(&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(VARDATA(in)),
|
||||
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
|
||||
PointerGetDatum(&dstate)
|
||||
Int32GetDatum(VARSIZE(in) - VARHDRSZ),
|
||||
PointerGetDatum(&dstate)
|
||||
)
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.7 2006/05/31 14:05:31 teodor Exp $ */
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.8 2006/10/04 00:29:46 momjian Exp $ */
|
||||
|
||||
#ifndef __DICT_H__
|
||||
#define __DICT_H__
|
||||
@ -30,11 +30,14 @@ DictInfo *finddict(Oid id);
|
||||
Oid name2id_dict(text *name);
|
||||
void reset_dict(void);
|
||||
|
||||
typedef struct {
|
||||
bool isend; /* in: marks for lexize_info about text end is reached */
|
||||
bool getnext; /* out: dict wants next lexeme */
|
||||
void *private; /* internal dict state between calls with getnext == true */
|
||||
} DictSubState;
|
||||
typedef struct
|
||||
{
|
||||
bool isend; /* in: marks for lexize_info about text end is
|
||||
* reached */
|
||||
bool getnext; /* out: dict wants next lexeme */
|
||||
void *private; /* internal dict state between calls with
|
||||
* getnext == true */
|
||||
} DictSubState;
|
||||
|
||||
/* simple parser of cfg string */
|
||||
typedef struct
|
||||
@ -51,13 +54,8 @@ typedef struct
|
||||
/*
|
||||
* number of variant of split word , for example Word 'fotballklubber'
|
||||
* (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
|
||||
* ball, klubb ). So, dictionary should return:
|
||||
* nvariant lexeme
|
||||
* 1 fotball
|
||||
* 1 klubb
|
||||
* 2 fot
|
||||
* 2 ball
|
||||
* 2 klubb
|
||||
* ball, klubb ). So, dictionary should return: nvariant lexeme 1
|
||||
* fotball 1 klubb 2 fot 2 ball 2 klubb
|
||||
*/
|
||||
uint16 nvariant;
|
||||
|
||||
@ -74,38 +72,43 @@ typedef struct
|
||||
* Lexize subsystem
|
||||
*/
|
||||
|
||||
typedef struct ParsedLex {
|
||||
int type;
|
||||
char *lemm;
|
||||
int lenlemm;
|
||||
typedef struct ParsedLex
|
||||
{
|
||||
int type;
|
||||
char *lemm;
|
||||
int lenlemm;
|
||||
bool resfollow;
|
||||
struct ParsedLex *next;
|
||||
} ParsedLex;
|
||||
struct ParsedLex *next;
|
||||
} ParsedLex;
|
||||
|
||||
typedef struct ListParsedLex {
|
||||
ParsedLex *head;
|
||||
ParsedLex *tail;
|
||||
} ListParsedLex;
|
||||
typedef struct ListParsedLex
|
||||
{
|
||||
ParsedLex *head;
|
||||
ParsedLex *tail;
|
||||
} ListParsedLex;
|
||||
|
||||
typedef struct {
|
||||
TSCfgInfo *cfg;
|
||||
Oid curDictId;
|
||||
int posDict;
|
||||
DictSubState dictState;
|
||||
ParsedLex *curSub;
|
||||
ListParsedLex towork; /* current list to work */
|
||||
ListParsedLex waste; /* list of lexemes that already lexized */
|
||||
typedef struct
|
||||
{
|
||||
TSCfgInfo *cfg;
|
||||
Oid curDictId;
|
||||
int posDict;
|
||||
DictSubState dictState;
|
||||
ParsedLex *curSub;
|
||||
ListParsedLex towork; /* current list to work */
|
||||
ListParsedLex waste; /* list of lexemes that already lexized */
|
||||
|
||||
/* fields to store last variant to lexize (basically, thesaurus
|
||||
or similar to, which wants several lexemes */
|
||||
|
||||
ParsedLex *lastRes;
|
||||
TSLexeme *tmpRes;
|
||||
} LexizeData;
|
||||
/*
|
||||
* fields to store last variant to lexize (basically, thesaurus or similar
|
||||
* to, which wants several lexemes
|
||||
*/
|
||||
|
||||
ParsedLex *lastRes;
|
||||
TSLexeme *tmpRes;
|
||||
} LexizeData;
|
||||
|
||||
|
||||
void LexizeInit(LexizeData *ld, TSCfgInfo *cfg);
|
||||
void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm);
|
||||
TSLexeme* LexizeExec(LexizeData *ld, ParsedLex **correspondLexem);
|
||||
void LexizeInit(LexizeData * ld, TSCfgInfo * cfg);
|
||||
void LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm);
|
||||
TSLexeme *LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem);
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -15,29 +15,32 @@
|
||||
#include "query_cleanup.h"
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_tsvector);
|
||||
Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
|
||||
Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_extract_tsvector(PG_FUNCTION_ARGS) {
|
||||
tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
|
||||
Datum *entries = NULL;
|
||||
gin_extract_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *vector = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
|
||||
Datum *entries = NULL;
|
||||
|
||||
*nentries = 0;
|
||||
if ( vector->size > 0 ) {
|
||||
int i;
|
||||
WordEntry *we = ARRPTR( vector );
|
||||
if (vector->size > 0)
|
||||
{
|
||||
int i;
|
||||
WordEntry *we = ARRPTR(vector);
|
||||
|
||||
*nentries = (uint32)vector->size;
|
||||
entries = (Datum*)palloc( sizeof(Datum) * vector->size );
|
||||
*nentries = (uint32) vector->size;
|
||||
entries = (Datum *) palloc(sizeof(Datum) * vector->size);
|
||||
|
||||
for(i=0;i<vector->size;i++) {
|
||||
text *txt = (text*)palloc( VARHDRSZ + we->len );
|
||||
for (i = 0; i < vector->size; i++)
|
||||
{
|
||||
text *txt = (text *) palloc(VARHDRSZ + we->len);
|
||||
|
||||
VARATT_SIZEP(txt) = VARHDRSZ + we->len;
|
||||
memcpy( VARDATA(txt), STRPTR( vector ) + we->pos, we->len );
|
||||
VARATT_SIZEP(txt) = VARHDRSZ + we->len;
|
||||
memcpy(VARDATA(txt), STRPTR(vector) + we->pos, we->len);
|
||||
|
||||
entries[i] = PointerGetDatum( txt );
|
||||
entries[i] = PointerGetDatum(txt);
|
||||
|
||||
we++;
|
||||
}
|
||||
@ -49,45 +52,50 @@ gin_extract_tsvector(PG_FUNCTION_ARGS) {
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_tsquery);
|
||||
Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
|
||||
Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_extract_tsquery(PG_FUNCTION_ARGS) {
|
||||
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
uint32 *nentries = (uint32*)PG_GETARG_POINTER(1);
|
||||
StrategyNumber strategy = DatumGetUInt16( PG_GETARG_DATUM(2) );
|
||||
Datum *entries = NULL;
|
||||
gin_extract_tsquery(PG_FUNCTION_ARGS)
|
||||
{
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
|
||||
StrategyNumber strategy = DatumGetUInt16(PG_GETARG_DATUM(2));
|
||||
Datum *entries = NULL;
|
||||
|
||||
*nentries = 0;
|
||||
if ( query->size > 0 ) {
|
||||
int4 i, j=0, len;
|
||||
ITEM *item;
|
||||
if (query->size > 0)
|
||||
{
|
||||
int4 i,
|
||||
j = 0,
|
||||
len;
|
||||
ITEM *item;
|
||||
|
||||
item = clean_NOT_v2(GETQUERY(query), &len);
|
||||
if ( !item )
|
||||
elog(ERROR,"Query requires full scan, GIN doesn't support it");
|
||||
if (!item)
|
||||
elog(ERROR, "Query requires full scan, GIN doesn't support it");
|
||||
|
||||
item = GETQUERY(query);
|
||||
|
||||
for(i=0; i<query->size; i++)
|
||||
if ( item[i].type == VAL )
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
(*nentries)++;
|
||||
|
||||
entries = (Datum*)palloc( sizeof(Datum) * (*nentries) );
|
||||
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
|
||||
|
||||
for(i=0; i<query->size; i++)
|
||||
if ( item[i].type == VAL ) {
|
||||
text *txt;
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
{
|
||||
text *txt;
|
||||
|
||||
txt = (text*)palloc( VARHDRSZ + item[i].length );
|
||||
txt = (text *) palloc(VARHDRSZ + item[i].length);
|
||||
|
||||
VARATT_SIZEP(txt) = VARHDRSZ + item[i].length;
|
||||
memcpy( VARDATA(txt), GETOPERAND( query ) + item[i].distance, item[i].length );
|
||||
VARATT_SIZEP(txt) = VARHDRSZ + item[i].length;
|
||||
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
|
||||
|
||||
entries[j++] = PointerGetDatum( txt );
|
||||
entries[j++] = PointerGetDatum(txt);
|
||||
|
||||
if ( strategy == 1 && item[i].weight != 0 )
|
||||
elog(ERROR,"With class of lexeme restrictions use @@@ operation");
|
||||
if (strategy == 1 && item[i].weight != 0)
|
||||
elog(ERROR, "With class of lexeme restrictions use @@@ operation");
|
||||
}
|
||||
|
||||
}
|
||||
@ -96,51 +104,54 @@ gin_extract_tsquery(PG_FUNCTION_ARGS) {
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
ITEM *frst;
|
||||
bool *mapped_check;
|
||||
} GinChkVal;
|
||||
typedef struct
|
||||
{
|
||||
ITEM *frst;
|
||||
bool *mapped_check;
|
||||
} GinChkVal;
|
||||
|
||||
static bool
|
||||
checkcondition_gin(void *checkval, ITEM * val) {
|
||||
GinChkVal *gcv = (GinChkVal*)checkval;
|
||||
checkcondition_gin(void *checkval, ITEM * val)
|
||||
{
|
||||
GinChkVal *gcv = (GinChkVal *) checkval;
|
||||
|
||||
return gcv->mapped_check[ val - gcv->frst ];
|
||||
return gcv->mapped_check[val - gcv->frst];
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_ts_consistent);
|
||||
Datum gin_ts_consistent(PG_FUNCTION_ARGS);
|
||||
Datum gin_ts_consistent(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
gin_ts_consistent(PG_FUNCTION_ARGS) {
|
||||
bool *check = (bool*)PG_GETARG_POINTER(0);
|
||||
QUERYTYPE *query = (QUERYTYPE*) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
bool res = FALSE;
|
||||
gin_ts_consistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bool *check = (bool *) PG_GETARG_POINTER(0);
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
bool res = FALSE;
|
||||
|
||||
if ( query->size > 0 ) {
|
||||
int4 i, j=0;
|
||||
ITEM *item;
|
||||
if (query->size > 0)
|
||||
{
|
||||
int4 i,
|
||||
j = 0;
|
||||
ITEM *item;
|
||||
GinChkVal gcv;
|
||||
|
||||
gcv.frst = item = GETQUERY(query);
|
||||
gcv.mapped_check= (bool*)palloc( sizeof(bool) * query->size );
|
||||
gcv.frst = item = GETQUERY(query);
|
||||
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
|
||||
|
||||
for(i=0; i<query->size; i++)
|
||||
if ( item[i].type == VAL )
|
||||
gcv.mapped_check[ i ] = check[ j++ ];
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
gcv.mapped_check[i] = check[j++];
|
||||
|
||||
|
||||
res = TS_execute(
|
||||
GETQUERY(query),
|
||||
&gcv,
|
||||
true,
|
||||
checkcondition_gin
|
||||
);
|
||||
GETQUERY(query),
|
||||
&gcv,
|
||||
true,
|
||||
checkcondition_gin
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_BOOL(res);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/gistidx.c,v 1.14 2006/06/28 12:00:06 teodor Exp $ */
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/gistidx.c,v 1.15 2006/10/04 00:29:46 momjian Exp $ */
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
@ -447,7 +447,7 @@ sizebitvec(BITVECP sign)
|
||||
i;
|
||||
|
||||
LOOPBYTE(
|
||||
size += number_of_ones[(unsigned char) sign[i]];
|
||||
size += number_of_ones[(unsigned char) sign[i]];
|
||||
);
|
||||
return size;
|
||||
}
|
||||
@ -460,8 +460,8 @@ hemdistsign(BITVECP a, BITVECP b)
|
||||
dist = 0;
|
||||
|
||||
LOOPBYTE(
|
||||
diff = (unsigned char) (a[i] ^ b[i]);
|
||||
dist += number_of_ones[diff];
|
||||
diff = (unsigned char) (a[i] ^ b[i]);
|
||||
dist += number_of_ones[diff];
|
||||
);
|
||||
return dist;
|
||||
}
|
||||
@ -533,7 +533,7 @@ typedef struct
|
||||
{
|
||||
OffsetNumber pos;
|
||||
int4 cost;
|
||||
} SPLITCOST;
|
||||
} SPLITCOST;
|
||||
|
||||
static int
|
||||
comparecost(const void *a, const void *b)
|
||||
|
@ -9,7 +9,7 @@ RS_isRegis(const char *str)
|
||||
{
|
||||
if (t_isalpha(str) ||
|
||||
t_iseq(str, '[') ||
|
||||
t_iseq(str,']') ||
|
||||
t_iseq(str, ']') ||
|
||||
t_iseq(str, '^'))
|
||||
str += pg_mblen(str);
|
||||
else
|
||||
@ -42,13 +42,13 @@ RS_compile(Regis * r, bool issuffix, char *str)
|
||||
{
|
||||
int len = strlen(str);
|
||||
int state = RS_IN_WAIT;
|
||||
char *c = (char*)str;
|
||||
char *c = (char *) str;
|
||||
RegisNode *ptr = NULL;
|
||||
|
||||
memset(r, 0, sizeof(Regis));
|
||||
r->issuffix = (issuffix) ? 1 : 0;
|
||||
|
||||
while(*c)
|
||||
while (*c)
|
||||
{
|
||||
if (state == RS_IN_WAIT)
|
||||
{
|
||||
@ -62,7 +62,7 @@ RS_compile(Regis * r, bool issuffix, char *str)
|
||||
ptr->type = RSF_ONEOF;
|
||||
ptr->len = pg_mblen(c);
|
||||
}
|
||||
else if (t_iseq(c,'['))
|
||||
else if (t_iseq(c, '['))
|
||||
{
|
||||
if (ptr)
|
||||
ptr = newRegisNode(ptr, len);
|
||||
@ -72,11 +72,11 @@ RS_compile(Regis * r, bool issuffix, char *str)
|
||||
state = RS_IN_ONEOF;
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "Error in regis: %s", str );
|
||||
ts_error(ERROR, "Error in regis: %s", str);
|
||||
}
|
||||
else if (state == RS_IN_ONEOF)
|
||||
{
|
||||
if (t_iseq(c,'^'))
|
||||
if (t_iseq(c, '^'))
|
||||
{
|
||||
ptr->type = RSF_NONEOF;
|
||||
state = RS_IN_NONEOF;
|
||||
@ -94,10 +94,10 @@ RS_compile(Regis * r, bool issuffix, char *str)
|
||||
{
|
||||
if (t_isalpha(c))
|
||||
{
|
||||
COPYCHAR(ptr->data+ptr->len, c);
|
||||
ptr->len+=pg_mblen(c);
|
||||
COPYCHAR(ptr->data + ptr->len, c);
|
||||
ptr->len += pg_mblen(c);
|
||||
}
|
||||
else if (t_iseq(c,']'))
|
||||
else if (t_iseq(c, ']'))
|
||||
state = RS_IN_WAIT;
|
||||
else
|
||||
ts_error(ERROR, "Error in regis: %s", str);
|
||||
@ -133,28 +133,34 @@ RS_free(Regis * r)
|
||||
|
||||
#ifdef TS_USE_WIDE
|
||||
static bool
|
||||
mb_strchr(char *str, char *c) {
|
||||
int clen = pg_mblen(c), plen,i;
|
||||
char *ptr =str;
|
||||
bool res=false;
|
||||
mb_strchr(char *str, char *c)
|
||||
{
|
||||
int clen = pg_mblen(c),
|
||||
plen,
|
||||
i;
|
||||
char *ptr = str;
|
||||
bool res = false;
|
||||
|
||||
clen = pg_mblen(c);
|
||||
while( *ptr && !res) {
|
||||
while (*ptr && !res)
|
||||
{
|
||||
plen = pg_mblen(ptr);
|
||||
if ( plen == clen ) {
|
||||
i=plen;
|
||||
if (plen == clen)
|
||||
{
|
||||
i = plen;
|
||||
res = true;
|
||||
while(i--)
|
||||
if ( *(ptr+i) != *(c+i) ) {
|
||||
while (i--)
|
||||
if (*(ptr + i) != *(c + i))
|
||||
{
|
||||
res = false;
|
||||
break;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ptr += plen;
|
||||
}
|
||||
|
||||
return res;
|
||||
ptr += plen;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
#else
|
||||
#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true )
|
||||
@ -165,21 +171,23 @@ bool
|
||||
RS_execute(Regis * r, char *str)
|
||||
{
|
||||
RegisNode *ptr = r->node;
|
||||
char *c = str;
|
||||
int len=0;
|
||||
char *c = str;
|
||||
int len = 0;
|
||||
|
||||
while(*c) {
|
||||
while (*c)
|
||||
{
|
||||
len++;
|
||||
c += pg_mblen(c);
|
||||
}
|
||||
}
|
||||
|
||||
if (len < r->nchar)
|
||||
return 0;
|
||||
|
||||
c = str;
|
||||
if (r->issuffix) {
|
||||
if (r->issuffix)
|
||||
{
|
||||
len -= r->nchar;
|
||||
while(len-- > 0)
|
||||
while (len-- > 0)
|
||||
c += pg_mblen(c);
|
||||
}
|
||||
|
||||
@ -189,18 +197,18 @@ RS_execute(Regis * r, char *str)
|
||||
switch (ptr->type)
|
||||
{
|
||||
case RSF_ONEOF:
|
||||
if ( mb_strchr((char *) ptr->data, c) != true )
|
||||
if (mb_strchr((char *) ptr->data, c) != true)
|
||||
return false;
|
||||
break;
|
||||
case RSF_NONEOF:
|
||||
if ( mb_strchr((char *) ptr->data, c) == true )
|
||||
if (mb_strchr((char *) ptr->data, c) == true)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
ts_error(ERROR, "RS_execute: Unknown type node: %d\n", ptr->type);
|
||||
}
|
||||
ptr = ptr->next;
|
||||
c+=pg_mblen(c);
|
||||
c += pg_mblen(c);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -27,12 +27,12 @@ typedef struct Regis
|
||||
unused:15;
|
||||
} Regis;
|
||||
|
||||
bool RS_isRegis(const char *str);
|
||||
bool RS_isRegis(const char *str);
|
||||
|
||||
void RS_compile(Regis * r, bool issuffix, char *str);
|
||||
void RS_compile(Regis * r, bool issuffix, char *str);
|
||||
void RS_free(Regis * r);
|
||||
|
||||
/*returns true if matches */
|
||||
bool RS_execute(Regis * r, char *str);
|
||||
bool RS_execute(Regis * r, char *str);
|
||||
|
||||
#endif
|
||||
|
@ -41,16 +41,18 @@ strnduplicate(char *s, int len)
|
||||
}
|
||||
|
||||
static char *
|
||||
findchar(char *str, int c) {
|
||||
while( *str ) {
|
||||
if ( t_iseq(str, c) )
|
||||
findchar(char *str, int c)
|
||||
{
|
||||
while (*str)
|
||||
{
|
||||
if (t_iseq(str, c))
|
||||
return str;
|
||||
str+=pg_mblen(str);
|
||||
str += pg_mblen(str);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* backward string compare for suffix tree operations */
|
||||
static int
|
||||
@ -126,16 +128,16 @@ NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
|
||||
if (Conf->mspell)
|
||||
{
|
||||
Conf->mspell += 1024 * 20;
|
||||
Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL*));
|
||||
Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
|
||||
}
|
||||
else
|
||||
{
|
||||
Conf->mspell = 1024 * 20;
|
||||
Conf->Spell = (SPELL **) palloc(Conf->mspell * sizeof(SPELL*));
|
||||
Conf->Spell = (SPELL **) palloc(Conf->mspell * sizeof(SPELL *));
|
||||
}
|
||||
}
|
||||
Conf->Spell[Conf->nspell] = (SPELL*)palloc(SPELLHDRSZ + strlen(word) + 1);
|
||||
strcpy( Conf->Spell[Conf->nspell]->word ,word );
|
||||
Conf->Spell[Conf->nspell] = (SPELL *) palloc(SPELLHDRSZ + strlen(word) + 1);
|
||||
strcpy(Conf->Spell[Conf->nspell]->word, word);
|
||||
strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, 16);
|
||||
Conf->nspell++;
|
||||
return (0);
|
||||
@ -155,7 +157,7 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
|
||||
char *s;
|
||||
const char *flag;
|
||||
|
||||
pg_verifymbstr( str, strlen(str), false);
|
||||
pg_verifymbstr(str, strlen(str), false);
|
||||
|
||||
flag = NULL;
|
||||
if ((s = findchar(str, '/')))
|
||||
@ -181,11 +183,12 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
|
||||
s = str;
|
||||
while (*s)
|
||||
{
|
||||
if (t_isspace(s)) {
|
||||
if (t_isspace(s))
|
||||
{
|
||||
*s = '\0';
|
||||
break;
|
||||
}
|
||||
s+=pg_mblen(s);
|
||||
s += pg_mblen(s);
|
||||
}
|
||||
lowerstr(str);
|
||||
|
||||
@ -268,12 +271,13 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
|
||||
}
|
||||
else
|
||||
{
|
||||
int masklen = strlen(mask);
|
||||
int masklen = strlen(mask);
|
||||
|
||||
Conf->Affix[Conf->naffixes].issimple = 0;
|
||||
Conf->Affix[Conf->naffixes].isregis = 0;
|
||||
Conf->Affix[Conf->naffixes].mask = (char *) malloc(masklen + 2);
|
||||
if (type == FF_SUFFIX)
|
||||
sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
|
||||
if (type == FF_SUFFIX)
|
||||
sprintf(Conf->Affix[Conf->naffixes].mask, "%s$", mask);
|
||||
else
|
||||
sprintf(Conf->Affix[Conf->naffixes].mask, "^%s", mask);
|
||||
}
|
||||
@ -286,83 +290,121 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
|
||||
|
||||
Conf->Affix[Conf->naffixes].find = (find && *find) ? strdup(find) : VoidString;
|
||||
MEMOUT(Conf->Affix[Conf->naffixes].find);
|
||||
if ( (Conf->Affix[Conf->naffixes].replen = strlen(repl)) > 0 ) {
|
||||
if ((Conf->Affix[Conf->naffixes].replen = strlen(repl)) > 0)
|
||||
{
|
||||
Conf->Affix[Conf->naffixes].repl = strdup(repl);
|
||||
MEMOUT(Conf->Affix[Conf->naffixes].repl);
|
||||
} else
|
||||
Conf->Affix[Conf->naffixes].repl = VoidString;
|
||||
}
|
||||
else
|
||||
Conf->Affix[Conf->naffixes].repl = VoidString;
|
||||
Conf->naffixes++;
|
||||
return (0);
|
||||
}
|
||||
|
||||
#define PAE_WAIT_MASK 0
|
||||
#define PAE_INMASK 1
|
||||
#define PAE_WAIT_FIND 2
|
||||
#define PAE_INFIND 3
|
||||
#define PAE_WAIT_REPL 4
|
||||
#define PAE_INREPL 5
|
||||
#define PAE_INMASK 1
|
||||
#define PAE_WAIT_FIND 2
|
||||
#define PAE_INFIND 3
|
||||
#define PAE_WAIT_REPL 4
|
||||
#define PAE_INREPL 5
|
||||
|
||||
static bool
|
||||
parse_affentry( char *str, char *mask, char *find, char *repl, int line ) {
|
||||
int state = PAE_WAIT_MASK;
|
||||
char *pmask=mask, *pfind=find, *prepl=repl;
|
||||
parse_affentry(char *str, char *mask, char *find, char *repl, int line)
|
||||
{
|
||||
int state = PAE_WAIT_MASK;
|
||||
char *pmask = mask,
|
||||
*pfind = find,
|
||||
*prepl = repl;
|
||||
|
||||
*mask = *find = *repl = '\0';
|
||||
|
||||
while(*str) {
|
||||
if ( state == PAE_WAIT_MASK ) {
|
||||
if ( t_iseq(str,'#') )
|
||||
while (*str)
|
||||
{
|
||||
if (state == PAE_WAIT_MASK)
|
||||
{
|
||||
if (t_iseq(str, '#'))
|
||||
return false;
|
||||
else if (!t_isspace(str)) {
|
||||
else if (!t_isspace(str))
|
||||
{
|
||||
COPYCHAR(pmask, str);
|
||||
pmask += pg_mblen(str);
|
||||
state = PAE_INMASK;
|
||||
}
|
||||
} else if ( state == PAE_INMASK ) {
|
||||
if ( t_iseq(str,'>') ) {
|
||||
*pmask='\0';
|
||||
}
|
||||
else if (state == PAE_INMASK)
|
||||
{
|
||||
if (t_iseq(str, '>'))
|
||||
{
|
||||
*pmask = '\0';
|
||||
state = PAE_WAIT_FIND;
|
||||
} else if (!t_isspace(str)) {
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
{
|
||||
COPYCHAR(pmask, str);
|
||||
pmask += pg_mblen(str);
|
||||
}
|
||||
} else if ( state == PAE_WAIT_FIND ) {
|
||||
if ( t_iseq(str,'-') ) {
|
||||
}
|
||||
else if (state == PAE_WAIT_FIND)
|
||||
{
|
||||
if (t_iseq(str, '-'))
|
||||
{
|
||||
state = PAE_INFIND;
|
||||
} else if (t_isalpha(str) || t_iseq(str,'\'') /* english 's */) {
|
||||
COPYCHAR(prepl,str);
|
||||
}
|
||||
else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
|
||||
{
|
||||
COPYCHAR(prepl, str);
|
||||
prepl += pg_mblen(str);
|
||||
state = PAE_INREPL;
|
||||
} else if (!t_isspace(str))
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
ts_error(ERROR, "Affix parse error at %d line", line);
|
||||
} else if ( state == PAE_INFIND ) {
|
||||
if ( t_iseq(str,',') ) {
|
||||
*pfind='\0';
|
||||
}
|
||||
else if (state == PAE_INFIND)
|
||||
{
|
||||
if (t_iseq(str, ','))
|
||||
{
|
||||
*pfind = '\0';
|
||||
state = PAE_WAIT_REPL;
|
||||
} else if (t_isalpha(str)) {
|
||||
COPYCHAR(pfind,str);
|
||||
}
|
||||
else if (t_isalpha(str))
|
||||
{
|
||||
COPYCHAR(pfind, str);
|
||||
pfind += pg_mblen(str);
|
||||
} else if (!t_isspace(str))
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
ts_error(ERROR, "Affix parse error at %d line", line);
|
||||
} else if ( state == PAE_WAIT_REPL ) {
|
||||
if ( t_iseq(str,'-') ) {
|
||||
break; /* void repl */
|
||||
} else if ( t_isalpha(str) ) {
|
||||
COPYCHAR(prepl,str);
|
||||
}
|
||||
else if (state == PAE_WAIT_REPL)
|
||||
{
|
||||
if (t_iseq(str, '-'))
|
||||
{
|
||||
break; /* void repl */
|
||||
}
|
||||
else if (t_isalpha(str))
|
||||
{
|
||||
COPYCHAR(prepl, str);
|
||||
prepl += pg_mblen(str);
|
||||
state = PAE_INREPL;
|
||||
} else if (!t_isspace(str))
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
ts_error(ERROR, "Affix parse error at %d line", line);
|
||||
} else if ( state == PAE_INREPL ) {
|
||||
if ( t_iseq(str,'#') ) {
|
||||
}
|
||||
else if (state == PAE_INREPL)
|
||||
{
|
||||
if (t_iseq(str, '#'))
|
||||
{
|
||||
*prepl = '\0';
|
||||
break;
|
||||
} else if ( t_isalpha(str) ) {
|
||||
COPYCHAR(prepl,str);
|
||||
}
|
||||
else if (t_isalpha(str))
|
||||
{
|
||||
COPYCHAR(prepl, str);
|
||||
prepl += pg_mblen(str);
|
||||
} else if (!t_isspace(str))
|
||||
}
|
||||
else if (!t_isspace(str))
|
||||
ts_error(ERROR, "Affix parse error at %d line", line);
|
||||
} else
|
||||
}
|
||||
else
|
||||
ts_error(ERROR, "Unknown state in parse_affentry: %d", state);
|
||||
|
||||
str += pg_mblen(str);
|
||||
@ -370,8 +412,8 @@ parse_affentry( char *str, char *mask, char *find, char *repl, int line ) {
|
||||
|
||||
*pmask = *pfind = *prepl = '\0';
|
||||
|
||||
return ( *mask && ( *find || *repl) ) ? true : false;
|
||||
}
|
||||
return (*mask && (*find || *repl)) ? true : false;
|
||||
}
|
||||
|
||||
int
|
||||
NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
@ -387,8 +429,8 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
int flag = 0;
|
||||
char flagflags = 0;
|
||||
FILE *affix;
|
||||
int line=0;
|
||||
int oldformat = 0;
|
||||
int line = 0;
|
||||
int oldformat = 0;
|
||||
|
||||
if (!(affix = fopen(filename, "r")))
|
||||
return (1);
|
||||
@ -397,18 +439,20 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
while (fgets(str, sizeof(str), affix))
|
||||
{
|
||||
line++;
|
||||
pg_verifymbstr( str, strlen(str), false);
|
||||
memcpy(tmpstr, str, 32); /* compoundwords... */
|
||||
tmpstr[32]='\0';
|
||||
pg_verifymbstr(str, strlen(str), false);
|
||||
memcpy(tmpstr, str, 32); /* compoundwords... */
|
||||
tmpstr[32] = '\0';
|
||||
lowerstr(tmpstr);
|
||||
if (STRNCMP(tmpstr, "compoundwords") == 0)
|
||||
{
|
||||
s = findchar(str, 'l');
|
||||
if (s)
|
||||
{
|
||||
while (*s && !t_isspace(s)) s++;
|
||||
while (*s && t_isspace(s)) s++;
|
||||
if ( *s && pg_mblen(s) == 1 )
|
||||
while (*s && !t_isspace(s))
|
||||
s++;
|
||||
while (*s && t_isspace(s))
|
||||
s++;
|
||||
if (*s && pg_mblen(s) == 1)
|
||||
Conf->compoundcontrol = *s;
|
||||
oldformat++;
|
||||
continue;
|
||||
@ -433,12 +477,13 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
s = str + 4;
|
||||
flagflags = 0;
|
||||
|
||||
while (*s && t_isspace(s)) s++;
|
||||
while (*s && t_isspace(s))
|
||||
s++;
|
||||
oldformat++;
|
||||
|
||||
/* allow only single-encoded flags */
|
||||
if ( pg_mblen(s) != 1 )
|
||||
elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
|
||||
if (pg_mblen(s) != 1)
|
||||
elog(ERROR, "Multiencoded flag at line %d: %s", line, s);
|
||||
|
||||
if (*s == '*')
|
||||
{
|
||||
@ -455,29 +500,31 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
s++;
|
||||
|
||||
/* allow only single-encoded flags */
|
||||
if ( pg_mblen(s) != 1 ) {
|
||||
if (pg_mblen(s) != 1)
|
||||
{
|
||||
flagflags = 0;
|
||||
elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
|
||||
elog(ERROR, "Multiencoded flag at line %d: %s", line, s);
|
||||
}
|
||||
|
||||
flag = (unsigned char) *s;
|
||||
continue;
|
||||
}
|
||||
if ( STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 ||
|
||||
STRNCMP(str, "PFX")==0 || STRNCMP(str, "SFX")==0 ) {
|
||||
if (STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 ||
|
||||
STRNCMP(str, "PFX") == 0 || STRNCMP(str, "SFX") == 0)
|
||||
{
|
||||
|
||||
if ( oldformat )
|
||||
elog(ERROR,"Wrong affix file format");
|
||||
if (oldformat)
|
||||
elog(ERROR, "Wrong affix file format");
|
||||
|
||||
fclose(affix);
|
||||
return NIImportOOAffixes(Conf, filename);
|
||||
|
||||
|
||||
}
|
||||
if ((!suffixes) && (!prefixes))
|
||||
continue;
|
||||
|
||||
lowerstr(str);
|
||||
if ( !parse_affentry(str, mask, find, repl, line) )
|
||||
if (!parse_affentry(str, mask, find, repl, line))
|
||||
continue;
|
||||
|
||||
NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
|
||||
@ -488,7 +535,8 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
|
||||
}
|
||||
|
||||
int
|
||||
NIImportOOAffixes(IspellDict * Conf, const char *filename) {
|
||||
NIImportOOAffixes(IspellDict * Conf, const char *filename)
|
||||
{
|
||||
char str[BUFSIZ];
|
||||
char type[BUFSIZ];
|
||||
char sflag[BUFSIZ];
|
||||
@ -499,11 +547,11 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename) {
|
||||
int flag = 0;
|
||||
char flagflags = 0;
|
||||
FILE *affix;
|
||||
int line=0;
|
||||
int scanread = 0;
|
||||
int line = 0;
|
||||
int scanread = 0;
|
||||
char scanbuf[BUFSIZ];
|
||||
|
||||
sprintf(scanbuf,"%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ/5, BUFSIZ/5, BUFSIZ/5, BUFSIZ/5);
|
||||
sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5);
|
||||
|
||||
if (!(affix = fopen(filename, "r")))
|
||||
return (1);
|
||||
@ -512,14 +560,17 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename) {
|
||||
while (fgets(str, sizeof(str), affix))
|
||||
{
|
||||
line++;
|
||||
if ( *str == '\0' || t_isspace(str) || t_iseq(str,'#') )
|
||||
if (*str == '\0' || t_isspace(str) || t_iseq(str, '#'))
|
||||
continue;
|
||||
pg_verifymbstr( str, strlen(str), false);
|
||||
pg_verifymbstr(str, strlen(str), false);
|
||||
|
||||
if ( STRNCMP(str, "COMPOUNDFLAG")==0 ) {
|
||||
char *s = str+strlen("COMPOUNDFLAG");
|
||||
while (*s && t_isspace(s)) s++;
|
||||
if ( *s && pg_mblen(s) == 1 )
|
||||
if (STRNCMP(str, "COMPOUNDFLAG") == 0)
|
||||
{
|
||||
char *s = str + strlen("COMPOUNDFLAG");
|
||||
|
||||
while (*s && t_isspace(s))
|
||||
s++;
|
||||
if (*s && pg_mblen(s) == 1)
|
||||
Conf->compoundcontrol = *s;
|
||||
continue;
|
||||
}
|
||||
@ -527,28 +578,31 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename) {
|
||||
scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask);
|
||||
|
||||
lowerstr(type);
|
||||
if ( scanread<4 || (STRNCMP(type,"sfx") && STRNCMP(type,"pfx")) )
|
||||
if (scanread < 4 || (STRNCMP(type, "sfx") && STRNCMP(type, "pfx")))
|
||||
continue;
|
||||
|
||||
if ( scanread == 4 ) {
|
||||
if ( strlen(sflag) != 1 )
|
||||
if (scanread == 4)
|
||||
{
|
||||
if (strlen(sflag) != 1)
|
||||
continue;
|
||||
flag = *sflag;
|
||||
isSuffix = (STRNCMP(type,"sfx")==0) ? true : false;
|
||||
isSuffix = (STRNCMP(type, "sfx") == 0) ? true : false;
|
||||
lowerstr(find);
|
||||
if ( t_iseq(find,'y') )
|
||||
if (t_iseq(find, 'y'))
|
||||
flagflags |= FF_CROSSPRODUCT;
|
||||
else
|
||||
flagflags = 0;
|
||||
} else {
|
||||
if ( strlen(sflag) != 1 || flag != *sflag || flag==0 )
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
|
||||
continue;
|
||||
lowerstr(repl);
|
||||
lowerstr(find);
|
||||
lowerstr(mask);
|
||||
if ( t_iseq(find,'0') )
|
||||
if (t_iseq(find, '0'))
|
||||
*find = '\0';
|
||||
if ( t_iseq(repl,'0') )
|
||||
if (t_iseq(repl, '0'))
|
||||
*repl = '\0';
|
||||
|
||||
NIAddAffix(Conf, flag, flagflags, mask, find, repl, isSuffix ? FF_SUFFIX : FF_PREFIX);
|
||||
@ -658,7 +712,7 @@ NISortDictionary(IspellDict * Conf)
|
||||
int naffix = 3;
|
||||
|
||||
/* compress affixes */
|
||||
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL*), cmpspellaffix);
|
||||
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
|
||||
for (i = 1; i < Conf->nspell; i++)
|
||||
if (strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag))
|
||||
naffix++;
|
||||
@ -685,7 +739,7 @@ NISortDictionary(IspellDict * Conf)
|
||||
Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
|
||||
}
|
||||
|
||||
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL*), cmpspell);
|
||||
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
|
||||
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
|
||||
|
||||
for (i = 0; i < Conf->nspell; i++)
|
||||
@ -806,7 +860,7 @@ NISortAffixes(IspellDict * Conf)
|
||||
CMPDAffix *ptr;
|
||||
int firstsuffix = -1;
|
||||
|
||||
if (Conf->naffixes==0)
|
||||
if (Conf->naffixes == 0)
|
||||
return;
|
||||
|
||||
if (Conf->naffixes > 1)
|
||||
@ -822,7 +876,7 @@ NISortAffixes(IspellDict * Conf)
|
||||
{
|
||||
if (firstsuffix < 0)
|
||||
firstsuffix = i;
|
||||
if ((Affix->flagflags & FF_COMPOUNDONLYAFX) && Affix->replen>0 )
|
||||
if ((Affix->flagflags & FF_COMPOUNDONLYAFX) && Affix->replen > 0)
|
||||
{
|
||||
if (ptr == Conf->CompoundAffix ||
|
||||
strbncmp((const unsigned char *) (ptr - 1)->affix,
|
||||
@ -907,14 +961,16 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
|
||||
{
|
||||
strcpy(newword, word);
|
||||
strcpy(newword + len - Affix->replen, Affix->find);
|
||||
if ( baselen ) /* store length of non-changed part of word */
|
||||
if (baselen) /* store length of non-changed part of word */
|
||||
*baselen = len - Affix->replen;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* if prefix is a all non-chaged part's length then all word contains only prefix and suffix,
|
||||
so out */
|
||||
if ( baselen && *baselen + strlen(Affix->find) <= Affix->replen )
|
||||
/*
|
||||
* if prefix is a all non-chaged part's length then all word contains
|
||||
* only prefix and suffix, so out
|
||||
*/
|
||||
if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
|
||||
return NULL;
|
||||
strcpy(newword, Affix->find);
|
||||
strcat(newword, word + Affix->replen);
|
||||
@ -944,6 +1000,7 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
|
||||
int wmasklen,
|
||||
masklen = strlen(Affix->mask);
|
||||
pg_wchar *mask;
|
||||
|
||||
mask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar));
|
||||
wmasklen = pg_mb2wchar_with_len(Affix->mask, mask, masklen);
|
||||
|
||||
@ -1040,7 +1097,7 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
|
||||
*/
|
||||
while (snode)
|
||||
{
|
||||
int baselen=0;
|
||||
int baselen = 0;
|
||||
|
||||
/* find possible suffix */
|
||||
suffix = FinfAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
|
||||
@ -1111,7 +1168,8 @@ typedef struct SplitVar
|
||||
static int
|
||||
CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace)
|
||||
{
|
||||
if ( CheckInPlace ) {
|
||||
if (CheckInPlace)
|
||||
{
|
||||
while ((*ptr)->affix)
|
||||
{
|
||||
if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
|
||||
@ -1122,13 +1180,16 @@ CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace)
|
||||
}
|
||||
(*ptr)++;
|
||||
}
|
||||
} else {
|
||||
char *affbegin;
|
||||
}
|
||||
else
|
||||
{
|
||||
char *affbegin;
|
||||
|
||||
while ((*ptr)->affix)
|
||||
{
|
||||
if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
|
||||
{
|
||||
len = (*ptr)->len + (affbegin-word);
|
||||
len = (*ptr)->len + (affbegin - word);
|
||||
(*ptr)++;
|
||||
return len;
|
||||
}
|
||||
@ -1227,8 +1288,8 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
|
||||
}
|
||||
}
|
||||
|
||||
if ( !node )
|
||||
break;
|
||||
if (!node)
|
||||
break;
|
||||
|
||||
StopLow = node->data;
|
||||
StopHigh = node->data + node->length;
|
||||
@ -1243,7 +1304,8 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
if (StopLow < StopHigh) {
|
||||
if (StopLow < StopHigh)
|
||||
{
|
||||
|
||||
/* find infinitive */
|
||||
if (StopMiddle->isword && StopMiddle->compoundallow && notprobed[level])
|
||||
@ -1264,7 +1326,7 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
|
||||
{
|
||||
/* then we will search more big word at the same point */
|
||||
SplitVar *ptr = var;
|
||||
|
||||
|
||||
while (ptr->next)
|
||||
ptr = ptr->next;
|
||||
ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
|
||||
@ -1279,8 +1341,9 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
|
||||
}
|
||||
}
|
||||
node = StopMiddle->node;
|
||||
} else
|
||||
node = NULL;
|
||||
}
|
||||
else
|
||||
node = NULL;
|
||||
level++;
|
||||
}
|
||||
|
||||
@ -1436,9 +1499,12 @@ NIFree(IspellDict * Conf)
|
||||
else
|
||||
pg_regfree(&(Affix[i].reg.regex));
|
||||
}
|
||||
if ( Affix[i].mask != VoidString ) free(Affix[i].mask);
|
||||
if ( Affix[i].find != VoidString ) free(Affix[i].find);
|
||||
if ( Affix[i].repl != VoidString ) free(Affix[i].repl);
|
||||
if (Affix[i].mask != VoidString)
|
||||
free(Affix[i].mask);
|
||||
if (Affix[i].find != VoidString)
|
||||
free(Affix[i].find);
|
||||
if (Affix[i].repl != VoidString)
|
||||
free(Affix[i].repl);
|
||||
}
|
||||
if (Conf->Spell)
|
||||
{
|
||||
|
@ -42,8 +42,8 @@ typedef struct spell_struct
|
||||
int affix;
|
||||
int len;
|
||||
} d;
|
||||
} p;
|
||||
char word[1];
|
||||
} p;
|
||||
char word[1];
|
||||
} SPELL;
|
||||
|
||||
#define SPELLHDRSZ (offsetof(SPELL, word))
|
||||
@ -110,7 +110,7 @@ typedef struct
|
||||
|
||||
int nspell;
|
||||
int mspell;
|
||||
SPELL **Spell;
|
||||
SPELL **Spell;
|
||||
|
||||
AffixNode *Suffix;
|
||||
AffixNode *Prefix;
|
||||
|
@ -33,9 +33,9 @@ nstrdup(char *ptr, int len)
|
||||
{
|
||||
if (t_iseq(ptr, '\\'))
|
||||
ptr++;
|
||||
COPYCHAR( cptr, ptr );
|
||||
cptr+=pg_mblen(ptr);
|
||||
ptr+=pg_mblen(ptr);
|
||||
COPYCHAR(cptr, ptr);
|
||||
cptr += pg_mblen(ptr);
|
||||
ptr += pg_mblen(ptr);
|
||||
}
|
||||
*cptr = '\0';
|
||||
|
||||
@ -53,9 +53,9 @@ parse_cfgdict(text *in, Map ** m)
|
||||
|
||||
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
|
||||
{
|
||||
if ( t_iseq(ptr, ',') )
|
||||
if (t_iseq(ptr, ','))
|
||||
num++;
|
||||
ptr+=pg_mblen(ptr);
|
||||
ptr += pg_mblen(ptr);
|
||||
}
|
||||
|
||||
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
|
||||
@ -84,7 +84,7 @@ parse_cfgdict(text *in, Map ** m)
|
||||
mptr->key = nstrdup(begin, ptr - begin);
|
||||
state = CS_WAITEQ;
|
||||
}
|
||||
else if (t_iseq(ptr,'='))
|
||||
else if (t_iseq(ptr, '='))
|
||||
{
|
||||
mptr->key = nstrdup(begin, ptr - begin);
|
||||
state = CS_WAITVALUE;
|
||||
@ -163,7 +163,7 @@ parse_cfgdict(text *in, Map ** m)
|
||||
errmsg("bad parser state"),
|
||||
errdetail("%d at position %d.",
|
||||
state, (int) (ptr - VARDATA(in)))));
|
||||
ptr+=pg_mblen(ptr);
|
||||
ptr += pg_mblen(ptr);
|
||||
}
|
||||
|
||||
if (state == CS_IN2VALUE)
|
||||
|
@ -108,11 +108,11 @@ get_weight(char *buf, int2 *weight)
|
||||
{
|
||||
*weight = 0;
|
||||
|
||||
if ( !t_iseq(buf, ':') )
|
||||
if (!t_iseq(buf, ':'))
|
||||
return buf;
|
||||
|
||||
buf++;
|
||||
while ( *buf && pg_mblen(buf) == 1 )
|
||||
while (*buf && pg_mblen(buf) == 1)
|
||||
{
|
||||
switch (*buf)
|
||||
{
|
||||
@ -153,25 +153,26 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
|
||||
{
|
||||
case WAITFIRSTOPERAND:
|
||||
case WAITOPERAND:
|
||||
if ( t_iseq(state->buf, '!') )
|
||||
if (t_iseq(state->buf, '!'))
|
||||
{
|
||||
(state->buf)++; /* can safely ++, t_iseq guarantee that pg_mblen()==1 */
|
||||
(state->buf)++; /* can safely ++, t_iseq guarantee
|
||||
* that pg_mblen()==1 */
|
||||
*val = (int4) '!';
|
||||
return OPR;
|
||||
}
|
||||
else if ( t_iseq(state->buf, '(') )
|
||||
else if (t_iseq(state->buf, '('))
|
||||
{
|
||||
state->count++;
|
||||
(state->buf)++;
|
||||
return OPEN;
|
||||
}
|
||||
else if ( t_iseq(state->buf, ':') )
|
||||
else if (t_iseq(state->buf, ':'))
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("error at start of operand")));
|
||||
}
|
||||
else if ( !t_isspace(state->buf) )
|
||||
else if (!t_isspace(state->buf))
|
||||
{
|
||||
state->valstate.prsbuf = state->buf;
|
||||
if (gettoken_tsvector(&(state->valstate)))
|
||||
@ -191,14 +192,14 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
|
||||
}
|
||||
break;
|
||||
case WAITOPERATOR:
|
||||
if ( t_iseq(state->buf, '&') || t_iseq(state->buf, '|') )
|
||||
if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
|
||||
{
|
||||
state->state = WAITOPERAND;
|
||||
*val = (int4) *(state->buf);
|
||||
(state->buf)++;
|
||||
return OPR;
|
||||
}
|
||||
else if ( t_iseq(state->buf, ')') )
|
||||
else if (t_iseq(state->buf, ')'))
|
||||
{
|
||||
(state->buf)++;
|
||||
state->count--;
|
||||
@ -206,7 +207,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
|
||||
}
|
||||
else if (*(state->buf) == '\0')
|
||||
return (state->count) ? ERR : END;
|
||||
else if ( !t_isspace(state->buf) )
|
||||
else if (!t_isspace(state->buf))
|
||||
return ERR;
|
||||
break;
|
||||
case WAITSINGLEOPERAND:
|
||||
@ -221,7 +222,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
|
||||
return ERR;
|
||||
break;
|
||||
}
|
||||
state->buf+=pg_mblen(state->buf);
|
||||
state->buf += pg_mblen(state->buf);
|
||||
}
|
||||
return END;
|
||||
}
|
||||
@ -604,7 +605,7 @@ findoprnd(ITEM * ptr, int4 *pos)
|
||||
* input
|
||||
*/
|
||||
static QUERYTYPE *
|
||||
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id, bool isplain)
|
||||
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id, bool isplain)
|
||||
{
|
||||
QPRS_STATE state;
|
||||
int4 i;
|
||||
@ -701,8 +702,9 @@ queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int c
|
||||
Datum
|
||||
tsquery_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char * in = (char*)PG_GETARG_POINTER(0);
|
||||
pg_verifymbstr( in, strlen(in), false);
|
||||
char *in = (char *) PG_GETARG_POINTER(0);
|
||||
|
||||
pg_verifymbstr(in, strlen(in), false);
|
||||
|
||||
SET_FUNCOID();
|
||||
PG_RETURN_POINTER(queryin((char *) in, pushval_asis, 0, false));
|
||||
@ -739,23 +741,23 @@ infix(INFIX * in, bool first)
|
||||
if (in->curpol->type == VAL)
|
||||
{
|
||||
char *op = in->op + in->curpol->distance;
|
||||
int clen;
|
||||
int clen;
|
||||
|
||||
RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length()+1) + 2 + 5);
|
||||
RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
while (*op)
|
||||
{
|
||||
if ( t_iseq(op, '\'') )
|
||||
if (t_iseq(op, '\''))
|
||||
{
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
}
|
||||
COPYCHAR(in->cur,op);
|
||||
COPYCHAR(in->cur, op);
|
||||
|
||||
clen = pg_mblen(op);
|
||||
op+=clen;
|
||||
in->cur+=clen;
|
||||
op += clen;
|
||||
in->cur += clen;
|
||||
}
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
|
@ -48,7 +48,7 @@ typedef struct
|
||||
#define CLOSE 5
|
||||
#define VALSTOP 6 /* for stop words */
|
||||
|
||||
bool TS_execute(ITEM *curitem, void *checkval,
|
||||
bool calcnot, bool (*chkcond) (void *checkval, ITEM *val));
|
||||
bool TS_execute(ITEM * curitem, void *checkval,
|
||||
bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
|
||||
|
||||
#endif
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include "query.h"
|
||||
|
||||
ITEM *clean_NOT_v2(ITEM *ptr, int4 *len);
|
||||
ITEM *clean_fakeval_v2(ITEM *ptr, int4 *len);
|
||||
ITEM *clean_NOT_v2(ITEM * ptr, int4 *len);
|
||||
ITEM *clean_fakeval_v2(ITEM * ptr, int4 *len);
|
||||
|
||||
#endif
|
||||
|
@ -29,7 +29,7 @@ makesign(QUERYTYPE * a)
|
||||
for (i = 0; i < a->size; i++)
|
||||
{
|
||||
if (ptr->type == VAL)
|
||||
sign |= ((TPQTGist)1) << (ptr->val % SIGLEN);
|
||||
sign |= ((TPQTGist) 1) << (ptr->val % SIGLEN);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
@ -104,7 +104,7 @@ tsq_mcontained(PG_FUNCTION_ARGS)
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(0)
|
||||
)
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(gtsq_in);
|
||||
@ -272,7 +272,7 @@ typedef struct
|
||||
{
|
||||
OffsetNumber pos;
|
||||
int4 cost;
|
||||
} SPLITCOST;
|
||||
} SPLITCOST;
|
||||
|
||||
static int
|
||||
comparecost(const void *a, const void *b)
|
||||
|
@ -41,13 +41,13 @@ static float weights[] = {0.1, 0.2, 0.4, 1.0};
|
||||
|
||||
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
|
||||
|
||||
#define RANK_NO_NORM 0x00
|
||||
#define RANK_NORM_LOGLENGTH 0x01
|
||||
#define RANK_NORM_LENGTH 0x02
|
||||
#define RANK_NORM_EXTDIST 0x04
|
||||
#define RANK_NO_NORM 0x00
|
||||
#define RANK_NORM_LOGLENGTH 0x01
|
||||
#define RANK_NORM_LENGTH 0x02
|
||||
#define RANK_NORM_EXTDIST 0x04
|
||||
#define RANK_NORM_UNIQ 0x08
|
||||
#define RANK_NORM_LOGUNIQ 0x10
|
||||
#define DEF_NORM_METHOD RANK_NO_NORM
|
||||
#define DEF_NORM_METHOD RANK_NO_NORM
|
||||
|
||||
static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
|
||||
static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
|
||||
@ -334,19 +334,20 @@ calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
|
||||
if (res < 0)
|
||||
res = 1e-20;
|
||||
|
||||
if ( (method & RANK_NORM_LOGLENGTH) && t->size>0 )
|
||||
if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
|
||||
res /= log((double) (cnt_length(t) + 1)) / log(2.0);
|
||||
|
||||
if ( method & RANK_NORM_LENGTH ) {
|
||||
if (method & RANK_NORM_LENGTH)
|
||||
{
|
||||
len = cnt_length(t);
|
||||
if ( len>0 )
|
||||
if (len > 0)
|
||||
res /= (float) len;
|
||||
}
|
||||
|
||||
if ( (method & RANK_NORM_UNIQ) && t->size > 0 )
|
||||
res /= (float)( t->size );
|
||||
if ((method & RANK_NORM_UNIQ) && t->size > 0)
|
||||
res /= (float) (t->size);
|
||||
|
||||
if ( (method & RANK_NORM_LOGUNIQ) && t->size > 0 )
|
||||
if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
|
||||
res /= log((double) (t->size + 1)) / log(2.0);
|
||||
|
||||
return res;
|
||||
@ -457,17 +458,18 @@ reset_istrue_flag(QUERYTYPE * query)
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int pos;
|
||||
int p;
|
||||
int q;
|
||||
DocRepresentation *begin;
|
||||
DocRepresentation *end;
|
||||
} Extention;
|
||||
typedef struct
|
||||
{
|
||||
int pos;
|
||||
int p;
|
||||
int q;
|
||||
DocRepresentation *begin;
|
||||
DocRepresentation *end;
|
||||
} Extention;
|
||||
|
||||
|
||||
static bool
|
||||
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention *ext)
|
||||
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
|
||||
{
|
||||
DocRepresentation *ptr;
|
||||
int lastpos = ext->pos;
|
||||
@ -513,7 +515,8 @@ Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention *ext)
|
||||
ptr->item[i]->istrue = 1;
|
||||
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM))
|
||||
{
|
||||
if (ptr->pos < ext->p) {
|
||||
if (ptr->pos < ext->p)
|
||||
{
|
||||
ext->begin = ptr;
|
||||
ext->p = ptr->pos;
|
||||
}
|
||||
@ -629,69 +632,77 @@ get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
|
||||
}
|
||||
|
||||
static float4
|
||||
calc_rank_cd(float4 *arrdata, tsvector *txt, QUERYTYPE *query, int method) {
|
||||
calc_rank_cd(float4 *arrdata, tsvector * txt, QUERYTYPE * query, int method)
|
||||
{
|
||||
DocRepresentation *doc;
|
||||
int len,
|
||||
int len,
|
||||
i,
|
||||
doclen = 0;
|
||||
Extention ext;
|
||||
double Wdoc = 0.0;
|
||||
double invws[lengthof(weights)];
|
||||
double SumDist=0.0, PrevExtPos=0.0, CurExtPos=0.0;
|
||||
int NExtent=0;
|
||||
double SumDist = 0.0,
|
||||
PrevExtPos = 0.0,
|
||||
CurExtPos = 0.0;
|
||||
int NExtent = 0;
|
||||
|
||||
for (i = 0; i < lengthof(weights); i++)
|
||||
{
|
||||
invws[i] = ((double)((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
|
||||
invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
|
||||
if (invws[i] > 1.0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("weight out of range")));
|
||||
invws[i] = 1.0/invws[i];
|
||||
invws[i] = 1.0 / invws[i];
|
||||
}
|
||||
|
||||
doc = get_docrep(txt, query, &doclen);
|
||||
if (!doc)
|
||||
if (!doc)
|
||||
return 0.0;
|
||||
|
||||
MemSet( &ext, 0, sizeof(Extention) );
|
||||
while (Cover(doc, doclen, query, &ext)) {
|
||||
double Cpos = 0.0;
|
||||
double InvSum = 0.0;
|
||||
MemSet(&ext, 0, sizeof(Extention));
|
||||
while (Cover(doc, doclen, query, &ext))
|
||||
{
|
||||
double Cpos = 0.0;
|
||||
double InvSum = 0.0;
|
||||
DocRepresentation *ptr = ext.begin;
|
||||
|
||||
while ( ptr<=ext.end ) {
|
||||
InvSum += invws[ ptr->wclass ];
|
||||
while (ptr <= ext.end)
|
||||
{
|
||||
InvSum += invws[ptr->wclass];
|
||||
ptr++;
|
||||
}
|
||||
|
||||
Cpos = ((double)( ext.end-ext.begin+1 )) / InvSum;
|
||||
Wdoc += Cpos / ( (double)(( 1 + (ext.q - ext.p) - (ext.end - ext.begin) )) );
|
||||
Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
|
||||
Wdoc += Cpos / ((double) ((1 + (ext.q - ext.p) - (ext.end - ext.begin))));
|
||||
|
||||
CurExtPos = ((double)(ext.q + ext.p))/2.0;
|
||||
if ( NExtent>0 && CurExtPos > PrevExtPos /* prevent devision by zero in a case of multiple lexize */ )
|
||||
SumDist += 1.0/( CurExtPos - PrevExtPos );
|
||||
CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
|
||||
if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by
|
||||
* zero in a case of
|
||||
multiple lexize */ )
|
||||
SumDist += 1.0 / (CurExtPos - PrevExtPos);
|
||||
|
||||
PrevExtPos = CurExtPos;
|
||||
NExtent++;
|
||||
NExtent++;
|
||||
}
|
||||
|
||||
if ( (method & RANK_NORM_LOGLENGTH) && txt->size > 0 )
|
||||
if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
|
||||
Wdoc /= log((double) (cnt_length(txt) + 1));
|
||||
|
||||
if ( method & RANK_NORM_LENGTH ) {
|
||||
if (method & RANK_NORM_LENGTH)
|
||||
{
|
||||
len = cnt_length(txt);
|
||||
if ( len>0 )
|
||||
if (len > 0)
|
||||
Wdoc /= (double) len;
|
||||
}
|
||||
|
||||
if ( (method & RANK_NORM_EXTDIST) && SumDist > 0 )
|
||||
Wdoc /= ((double)NExtent) / SumDist;
|
||||
if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
|
||||
Wdoc /= ((double) NExtent) / SumDist;
|
||||
|
||||
if ( (method & RANK_NORM_UNIQ) && txt->size > 0 )
|
||||
Wdoc /= (double)( txt->size );
|
||||
if ((method & RANK_NORM_UNIQ) && txt->size > 0)
|
||||
Wdoc /= (double) (txt->size);
|
||||
|
||||
if ( (method & RANK_NORM_LOGUNIQ) && txt->size > 0 )
|
||||
if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
|
||||
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
|
||||
|
||||
for (i = 0; i < doclen; i++)
|
||||
@ -699,13 +710,13 @@ calc_rank_cd(float4 *arrdata, tsvector *txt, QUERYTYPE *query, int method) {
|
||||
pfree(doc[i].item);
|
||||
pfree(doc);
|
||||
|
||||
return (float4)Wdoc;
|
||||
}
|
||||
return (float4) Wdoc;
|
||||
}
|
||||
|
||||
Datum
|
||||
rank_cd(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
|
||||
int method = DEF_NORM_METHOD;
|
||||
@ -729,7 +740,7 @@ rank_cd(PG_FUNCTION_ARGS)
|
||||
if (PG_NARGS() == 4)
|
||||
method = PG_GETARG_INT32(3);
|
||||
|
||||
res = calc_rank_cd( (float4 *) ARR_DATA_PTR(win), txt, query, method);
|
||||
res = calc_rank_cd((float4 *) ARR_DATA_PTR(win), txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
@ -744,10 +755,10 @@ rank_cd_def(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
|
||||
float4 res;
|
||||
float4 res;
|
||||
|
||||
res = calc_rank_cd(weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
|
||||
|
||||
res = calc_rank_cd( weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
|
||||
@ -791,7 +802,7 @@ get_covers(PG_FUNCTION_ARGS)
|
||||
text *out;
|
||||
char *cptr;
|
||||
DocRepresentation *doc;
|
||||
int olddwpos = 0;
|
||||
int olddwpos = 0;
|
||||
int ncover = 1;
|
||||
Extention ext;
|
||||
|
||||
@ -833,7 +844,7 @@ get_covers(PG_FUNCTION_ARGS)
|
||||
}
|
||||
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
|
||||
|
||||
MemSet( &ext, 0, sizeof(Extention) );
|
||||
MemSet(&ext, 0, sizeof(Extention));
|
||||
while (Cover(doc, rlen, query, &ext))
|
||||
{
|
||||
dwptr = dw + olddwpos;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,15 +2,16 @@
|
||||
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
extern struct SN_env * russian_UTF_8_create_env(void);
|
||||
extern void russian_UTF_8_close_env(struct SN_env * z);
|
||||
extern struct SN_env *russian_UTF_8_create_env(void);
|
||||
extern void russian_UTF_8_close_env(struct SN_env * z);
|
||||
|
||||
extern int russian_UTF_8_stem(struct SN_env * z);
|
||||
extern int russian_UTF_8_stem(struct SN_env * z);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -48,7 +48,7 @@ readstoplist(text *in, StopList * s)
|
||||
while (fgets(buf, STOPBUFLEN, hin))
|
||||
{
|
||||
buf[strlen(buf) - 1] = '\0';
|
||||
pg_verifymbstr( buf, strlen(buf), false );
|
||||
pg_verifymbstr(buf, strlen(buf), false);
|
||||
lowerstr(buf);
|
||||
if (*buf == '\0')
|
||||
continue;
|
||||
|
@ -301,14 +301,15 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
|
||||
|
||||
LexizeInit(&ldata, cfg);
|
||||
|
||||
do {
|
||||
do
|
||||
{
|
||||
type = DatumGetInt32(FunctionCall3(
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm)));
|
||||
|
||||
if (type>0 && lenlemm >= MAXSTRLEN)
|
||||
if (type > 0 && lenlemm >= MAXSTRLEN)
|
||||
{
|
||||
#ifdef IGNORE_LONGLEXEME
|
||||
ereport(NOTICE,
|
||||
@ -324,9 +325,9 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
|
||||
|
||||
LexizeAddLemm(&ldata, type, lemm, lenlemm);
|
||||
|
||||
while( (norms = LexizeExec(&ldata, NULL)) != NULL )
|
||||
while ((norms = LexizeExec(&ldata, NULL)) != NULL)
|
||||
{
|
||||
TSLexeme *ptr = norms;
|
||||
TSLexeme *ptr = norms;
|
||||
|
||||
prs->pos++; /* set pos */
|
||||
|
||||
@ -338,7 +339,7 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
|
||||
prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD));
|
||||
}
|
||||
|
||||
if ( ptr->flags & TSL_ADDPOS )
|
||||
if (ptr->flags & TSL_ADDPOS)
|
||||
prs->pos++;
|
||||
prs->words[prs->curwords].len = strlen(ptr->lexeme);
|
||||
prs->words[prs->curwords].word = ptr->lexeme;
|
||||
@ -349,8 +350,8 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
|
||||
prs->curwords++;
|
||||
}
|
||||
pfree(norms);
|
||||
}
|
||||
} while(type>0);
|
||||
}
|
||||
} while (type > 0);
|
||||
|
||||
FunctionCall1(
|
||||
&(prsobj->end_info),
|
||||
@ -407,30 +408,35 @@ hlfinditem(HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int buflen)
|
||||
}
|
||||
|
||||
static void
|
||||
addHLParsedLex(HLPRSTEXT *prs, QUERYTYPE * query, ParsedLex *lexs, TSLexeme *norms) {
|
||||
ParsedLex *tmplexs;
|
||||
TSLexeme *ptr;
|
||||
addHLParsedLex(HLPRSTEXT * prs, QUERYTYPE * query, ParsedLex * lexs, TSLexeme * norms)
|
||||
{
|
||||
ParsedLex *tmplexs;
|
||||
TSLexeme *ptr;
|
||||
|
||||
while( lexs ) {
|
||||
|
||||
if ( lexs->type > 0 )
|
||||
while (lexs)
|
||||
{
|
||||
|
||||
if (lexs->type > 0)
|
||||
hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
|
||||
|
||||
ptr = norms;
|
||||
while( ptr && ptr->lexeme ) {
|
||||
while (ptr && ptr->lexeme)
|
||||
{
|
||||
hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
|
||||
ptr++;
|
||||
}
|
||||
|
||||
tmplexs = lexs->next;
|
||||
pfree( lexs );
|
||||
pfree(lexs);
|
||||
lexs = tmplexs;
|
||||
}
|
||||
|
||||
if ( norms ) {
|
||||
if (norms)
|
||||
{
|
||||
ptr = norms;
|
||||
while( ptr->lexeme ) {
|
||||
pfree( ptr->lexeme );
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
pfree(ptr->lexeme);
|
||||
ptr++;
|
||||
}
|
||||
pfree(norms);
|
||||
@ -445,8 +451,8 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
|
||||
char *lemm = NULL;
|
||||
WParserInfo *prsobj = findprs(cfg->prs_id);
|
||||
LexizeData ldata;
|
||||
TSLexeme *norms;
|
||||
ParsedLex *lexs;
|
||||
TSLexeme *norms;
|
||||
ParsedLex *lexs;
|
||||
|
||||
prsobj->prs = (void *) DatumGetPointer(
|
||||
FunctionCall2(
|
||||
@ -458,14 +464,15 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
|
||||
|
||||
LexizeInit(&ldata, cfg);
|
||||
|
||||
do {
|
||||
do
|
||||
{
|
||||
type = DatumGetInt32(FunctionCall3(
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm)));
|
||||
&(prsobj->getlexeme_info),
|
||||
PointerGetDatum(prsobj->prs),
|
||||
PointerGetDatum(&lemm),
|
||||
PointerGetDatum(&lenlemm)));
|
||||
|
||||
if (type>0 && lenlemm >= MAXSTRLEN)
|
||||
if (type > 0 && lenlemm >= MAXSTRLEN)
|
||||
{
|
||||
#ifdef IGNORE_LONGLEXEME
|
||||
ereport(NOTICE,
|
||||
@ -481,14 +488,15 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
|
||||
|
||||
LexizeAddLemm(&ldata, type, lemm, lenlemm);
|
||||
|
||||
do {
|
||||
if ( (norms = LexizeExec(&ldata,&lexs)) != NULL )
|
||||
do
|
||||
{
|
||||
if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
|
||||
addHLParsedLex(prs, query, lexs, norms);
|
||||
else
|
||||
else
|
||||
addHLParsedLex(prs, query, lexs, NULL);
|
||||
} while( norms );
|
||||
} while (norms);
|
||||
|
||||
} while( type>0 );
|
||||
} while (type > 0);
|
||||
|
||||
FunctionCall1(
|
||||
&(prsobj->end_info),
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* lexize stream of lexemes
|
||||
* lexize stream of lexemes
|
||||
* Teodor Sigaev <teodor@sigaev.ru>
|
||||
*/
|
||||
#include "postgres.h"
|
||||
@ -11,34 +11,39 @@
|
||||
#include "dict.h"
|
||||
|
||||
void
|
||||
LexizeInit(LexizeData *ld, TSCfgInfo *cfg) {
|
||||
LexizeInit(LexizeData * ld, TSCfgInfo * cfg)
|
||||
{
|
||||
ld->cfg = cfg;
|
||||
ld->curDictId = InvalidOid;
|
||||
ld->posDict = 0;
|
||||
ld->towork.head = ld->towork.tail = ld->curSub = NULL;
|
||||
ld->waste.head = ld->waste.tail = NULL;
|
||||
ld->lastRes=NULL;
|
||||
ld->tmpRes=NULL;
|
||||
ld->lastRes = NULL;
|
||||
ld->tmpRes = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
LPLAddTail(ListParsedLex *list, ParsedLex *newpl) {
|
||||
if ( list->tail ) {
|
||||
LPLAddTail(ListParsedLex * list, ParsedLex * newpl)
|
||||
{
|
||||
if (list->tail)
|
||||
{
|
||||
list->tail->next = newpl;
|
||||
list->tail = newpl;
|
||||
} else
|
||||
}
|
||||
else
|
||||
list->head = list->tail = newpl;
|
||||
newpl->next = NULL;
|
||||
}
|
||||
|
||||
static ParsedLex*
|
||||
LPLRemoveHead(ListParsedLex *list) {
|
||||
ParsedLex *res = list->head;
|
||||
static ParsedLex *
|
||||
LPLRemoveHead(ListParsedLex * list)
|
||||
{
|
||||
ParsedLex *res = list->head;
|
||||
|
||||
if ( list->head )
|
||||
if (list->head)
|
||||
list->head = list->head->next;
|
||||
|
||||
if ( list->head == NULL )
|
||||
if (list->head == NULL)
|
||||
list->tail = NULL;
|
||||
|
||||
return res;
|
||||
@ -46,10 +51,11 @@ LPLRemoveHead(ListParsedLex *list) {
|
||||
|
||||
|
||||
void
|
||||
LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm) {
|
||||
ParsedLex *newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
|
||||
LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm)
|
||||
{
|
||||
ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
|
||||
|
||||
newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
|
||||
newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
|
||||
newpl->type = type;
|
||||
newpl->lemm = lemm;
|
||||
newpl->lenlemm = lenlemm;
|
||||
@ -58,20 +64,27 @@ LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm) {
|
||||
}
|
||||
|
||||
static void
|
||||
RemoveHead(LexizeData *ld) {
|
||||
RemoveHead(LexizeData * ld)
|
||||
{
|
||||
LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
|
||||
|
||||
ld->posDict = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
setCorrLex(LexizeData *ld, ParsedLex **correspondLexem) {
|
||||
if ( correspondLexem ) {
|
||||
setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem)
|
||||
{
|
||||
if (correspondLexem)
|
||||
{
|
||||
*correspondLexem = ld->waste.head;
|
||||
} else {
|
||||
ParsedLex *tmp, *ptr = ld->waste.head;
|
||||
}
|
||||
else
|
||||
{
|
||||
ParsedLex *tmp,
|
||||
*ptr = ld->waste.head;
|
||||
|
||||
while(ptr) {
|
||||
while (ptr)
|
||||
{
|
||||
tmp = ptr->next;
|
||||
pfree(ptr);
|
||||
ptr = tmp;
|
||||
@ -81,11 +94,14 @@ setCorrLex(LexizeData *ld, ParsedLex **correspondLexem) {
|
||||
}
|
||||
|
||||
static void
|
||||
moveToWaste(LexizeData *ld, ParsedLex *stop) {
|
||||
bool go = true;
|
||||
moveToWaste(LexizeData * ld, ParsedLex * stop)
|
||||
{
|
||||
bool go = true;
|
||||
|
||||
while( ld->towork.head && go) {
|
||||
if (ld->towork.head == stop) {
|
||||
while (ld->towork.head && go)
|
||||
{
|
||||
if (ld->towork.head == stop)
|
||||
{
|
||||
ld->curSub = stop->next;
|
||||
go = false;
|
||||
}
|
||||
@ -94,110 +110,124 @@ moveToWaste(LexizeData *ld, ParsedLex *stop) {
|
||||
}
|
||||
|
||||
static void
|
||||
setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res) {
|
||||
if ( ld->tmpRes ) {
|
||||
TSLexeme *ptr;
|
||||
for( ptr=ld->tmpRes; ptr->lexeme; ptr++ )
|
||||
pfree( ptr->lexeme );
|
||||
pfree( ld->tmpRes );
|
||||
setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res)
|
||||
{
|
||||
if (ld->tmpRes)
|
||||
{
|
||||
TSLexeme *ptr;
|
||||
|
||||
for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
|
||||
pfree(ptr->lexeme);
|
||||
pfree(ld->tmpRes);
|
||||
}
|
||||
ld->tmpRes = res;
|
||||
ld->lastRes = lex;
|
||||
}
|
||||
|
||||
TSLexeme*
|
||||
LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) {
|
||||
int i;
|
||||
ListDictionary *map;
|
||||
DictInfo *dict;
|
||||
TSLexeme *res;
|
||||
TSLexeme *
|
||||
LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
|
||||
{
|
||||
int i;
|
||||
ListDictionary *map;
|
||||
DictInfo *dict;
|
||||
TSLexeme *res;
|
||||
|
||||
if ( ld->curDictId == InvalidOid ) {
|
||||
/*
|
||||
* usial mode: dictionary wants only one word,
|
||||
* but we should keep in mind that we should go through
|
||||
* all stack
|
||||
if (ld->curDictId == InvalidOid)
|
||||
{
|
||||
/*
|
||||
* usial mode: dictionary wants only one word, but we should keep in
|
||||
* mind that we should go through all stack
|
||||
*/
|
||||
|
||||
while( ld->towork.head ) {
|
||||
ParsedLex *curVal = ld->towork.head;
|
||||
while (ld->towork.head)
|
||||
{
|
||||
ParsedLex *curVal = ld->towork.head;
|
||||
|
||||
map = ld->cfg->map + curVal->type;
|
||||
|
||||
if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0 ) {
|
||||
if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0)
|
||||
{
|
||||
/* skip this type of lexeme */
|
||||
RemoveHead(ld);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = ld->posDict; i < map->len; i++) {
|
||||
for (i = ld->posDict; i < map->len; i++)
|
||||
{
|
||||
dict = finddict(DatumGetObjectId(map->dict_id[i]));
|
||||
|
||||
ld->dictState.isend = ld->dictState.getnext = false;
|
||||
ld->dictState.private = NULL;
|
||||
res = (TSLexeme *) DatumGetPointer( FunctionCall4(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(curVal->lemm),
|
||||
Int32GetDatum(curVal->lenlemm),
|
||||
PointerGetDatum(&ld->dictState)
|
||||
));
|
||||
res = (TSLexeme *) DatumGetPointer(FunctionCall4(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(curVal->lemm),
|
||||
Int32GetDatum(curVal->lenlemm),
|
||||
PointerGetDatum(&ld->dictState)
|
||||
));
|
||||
|
||||
if ( ld->dictState.getnext ) {
|
||||
/*
|
||||
* dictinary wants next word, so setup and store
|
||||
* current position and go to multiword mode
|
||||
if (ld->dictState.getnext)
|
||||
{
|
||||
/*
|
||||
* dictinary wants next word, so setup and store current
|
||||
* position and go to multiword mode
|
||||
*/
|
||||
|
||||
|
||||
ld->curDictId = DatumGetObjectId(map->dict_id[i]);
|
||||
ld->posDict = i+1;
|
||||
ld->posDict = i + 1;
|
||||
ld->curSub = curVal->next;
|
||||
if ( res )
|
||||
if (res)
|
||||
setNewTmpRes(ld, curVal, res);
|
||||
return LexizeExec(ld, correspondLexem);
|
||||
}
|
||||
|
||||
if (!res) /* dictionary doesn't know this lexeme */
|
||||
if (!res) /* dictionary doesn't know this lexeme */
|
||||
continue;
|
||||
|
||||
|
||||
RemoveHead(ld);
|
||||
setCorrLex(ld, correspondLexem);
|
||||
return res;
|
||||
}
|
||||
|
||||
RemoveHead(ld);
|
||||
}
|
||||
} else { /* curDictId is valid */
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* curDictId is valid */
|
||||
dict = finddict(ld->curDictId);
|
||||
|
||||
|
||||
/*
|
||||
* Dictionary ld->curDictId asks us about following words
|
||||
*/
|
||||
|
||||
while( ld->curSub ) {
|
||||
ParsedLex *curVal = ld->curSub;
|
||||
while (ld->curSub)
|
||||
{
|
||||
ParsedLex *curVal = ld->curSub;
|
||||
|
||||
map = ld->cfg->map + curVal->type;
|
||||
|
||||
if (curVal->type != 0) {
|
||||
bool dictExists = false;
|
||||
if (curVal->type != 0)
|
||||
{
|
||||
bool dictExists = false;
|
||||
|
||||
if (curVal->type >= ld->cfg->len || map->len == 0 ) {
|
||||
if (curVal->type >= ld->cfg->len || map->len == 0)
|
||||
{
|
||||
/* skip this type of lexeme */
|
||||
ld->curSub = curVal->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* We should be sure that current type of lexeme is recognized by
|
||||
* our dictinonary: we just check is it exist in
|
||||
* list of dictionaries ?
|
||||
* We should be sure that current type of lexeme is recognized
|
||||
* by our dictinonary: we just check is it exist in list of
|
||||
* dictionaries ?
|
||||
*/
|
||||
for(i=0;i < map->len && !dictExists; i++)
|
||||
if ( ld->curDictId == DatumGetObjectId(map->dict_id[i]) )
|
||||
for (i = 0; i < map->len && !dictExists; i++)
|
||||
if (ld->curDictId == DatumGetObjectId(map->dict_id[i]))
|
||||
dictExists = true;
|
||||
|
||||
if ( !dictExists ) {
|
||||
if (!dictExists)
|
||||
{
|
||||
/*
|
||||
* Dictionary can't work with current tpe of lexeme,
|
||||
* return to basic mode and redo all stored lexemes
|
||||
@ -205,38 +235,43 @@ LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) {
|
||||
ld->curDictId = InvalidOid;
|
||||
return LexizeExec(ld, correspondLexem);
|
||||
}
|
||||
}
|
||||
|
||||
ld->dictState.isend = (curVal->type==0) ? true : false;
|
||||
}
|
||||
|
||||
ld->dictState.isend = (curVal->type == 0) ? true : false;
|
||||
ld->dictState.getnext = false;
|
||||
|
||||
res = (TSLexeme *) DatumGetPointer( FunctionCall4(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(curVal->lemm),
|
||||
Int32GetDatum(curVal->lenlemm),
|
||||
PointerGetDatum(&ld->dictState)
|
||||
));
|
||||
res = (TSLexeme *) DatumGetPointer(FunctionCall4(
|
||||
&(dict->lexize_info),
|
||||
PointerGetDatum(dict->dictionary),
|
||||
PointerGetDatum(curVal->lemm),
|
||||
Int32GetDatum(curVal->lenlemm),
|
||||
PointerGetDatum(&ld->dictState)
|
||||
));
|
||||
|
||||
if ( ld->dictState.getnext ) {
|
||||
if (ld->dictState.getnext)
|
||||
{
|
||||
/* Dictionary wants one more */
|
||||
ld->curSub = curVal->next;
|
||||
if ( res )
|
||||
if (res)
|
||||
setNewTmpRes(ld, curVal, res);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( res || ld->tmpRes ) {
|
||||
if (res || ld->tmpRes)
|
||||
{
|
||||
/*
|
||||
* Dictionary normalizes lexemes,
|
||||
* so we remove from stack all used lexemes ,
|
||||
* return to basic mode and redo end of stack (if it exists)
|
||||
* Dictionary normalizes lexemes, so we remove from stack all
|
||||
* used lexemes , return to basic mode and redo end of stack
|
||||
* (if it exists)
|
||||
*/
|
||||
if ( res ) {
|
||||
moveToWaste( ld, ld->curSub );
|
||||
} else {
|
||||
if (res)
|
||||
{
|
||||
moveToWaste(ld, ld->curSub);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = ld->tmpRes;
|
||||
moveToWaste( ld, ld->lastRes );
|
||||
moveToWaste(ld, ld->lastRes);
|
||||
}
|
||||
|
||||
/* reset to initial state */
|
||||
@ -248,14 +283,15 @@ LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Dict don't want next lexem and didn't recognize anything,
|
||||
redo from ld->towork.head */
|
||||
/*
|
||||
* Dict don't want next lexem and didn't recognize anything, redo
|
||||
* from ld->towork.head
|
||||
*/
|
||||
ld->curDictId = InvalidOid;
|
||||
return LexizeExec(ld, correspondLexem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setCorrLex(ld, correspondLexem);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -70,54 +70,59 @@ char2wchar(wchar_t *to, const char *from, size_t len)
|
||||
|
||||
return mbstowcs(to, from, len);
|
||||
}
|
||||
|
||||
#endif /* WIN32 */
|
||||
#endif /* WIN32 */
|
||||
|
||||
int
|
||||
_t_isalpha( const char *ptr ) {
|
||||
wchar_t character;
|
||||
_t_isalpha(const char *ptr)
|
||||
{
|
||||
wchar_t character;
|
||||
|
||||
char2wchar(&character, ptr, 1);
|
||||
|
||||
return iswalpha( (wint_t)character );
|
||||
return iswalpha((wint_t) character);
|
||||
}
|
||||
|
||||
int
|
||||
_t_isprint( const char *ptr ) {
|
||||
wchar_t character;
|
||||
_t_isprint(const char *ptr)
|
||||
{
|
||||
wchar_t character;
|
||||
|
||||
char2wchar(&character, ptr, 1);
|
||||
|
||||
return iswprint( (wint_t)character );
|
||||
return iswprint((wint_t) character);
|
||||
}
|
||||
|
||||
#endif /* TS_USE_WIDE */
|
||||
#endif /* TS_USE_WIDE */
|
||||
|
||||
char *
|
||||
lowerstr(char *str)
|
||||
{
|
||||
char *ptr = str;
|
||||
char *ptr = str;
|
||||
|
||||
#ifdef TS_USE_WIDE
|
||||
|
||||
/*
|
||||
* Use wide char code only when max encoding length > 1 and ctype != C.
|
||||
* Some operating systems fail with multi-byte encodings and a C locale.
|
||||
* Also, for a C locale there is no need to process as multibyte. From
|
||||
* backend/utils/adt/oracle_compat.c Teodor
|
||||
*/
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) {
|
||||
wchar_t *wstr, *wptr;
|
||||
int len = strlen(str);
|
||||
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
||||
{
|
||||
wchar_t *wstr,
|
||||
*wptr;
|
||||
int len = strlen(str);
|
||||
|
||||
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len+1));
|
||||
char2wchar(wstr, str, len+1);
|
||||
while (*wptr) {
|
||||
*wptr = towlower((wint_t) *wptr);
|
||||
wptr++;
|
||||
}
|
||||
wchar2char(str, wstr, len);
|
||||
pfree( wstr );
|
||||
} else
|
||||
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
|
||||
char2wchar(wstr, str, len + 1);
|
||||
while (*wptr)
|
||||
{
|
||||
*wptr = towlower((wint_t) *wptr);
|
||||
wptr++;
|
||||
}
|
||||
wchar2char(str, wstr, len);
|
||||
pfree(wstr);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (*ptr)
|
||||
{
|
||||
@ -126,4 +131,3 @@ lowerstr(char *str)
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
|
@ -35,44 +35,44 @@
|
||||
|
||||
size_t wchar2char(char *to, const wchar_t *from, size_t len);
|
||||
size_t char2wchar(wchar_t *to, const char *from, size_t len);
|
||||
#else /* WIN32 */
|
||||
#else /* WIN32 */
|
||||
|
||||
/* correct mbstowcs */
|
||||
#define char2wchar mbstowcs
|
||||
#define wchar2char wcstombs
|
||||
#endif /* WIN32 */
|
||||
|
||||
#define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
|
||||
#define t_isspace(x) ( pg_mblen(x)==1 && isspace( TOUCHAR(x) ) )
|
||||
extern int _t_isalpha( const char *ptr );
|
||||
#define t_isalpha(x) ( (pg_mblen(x)==1) ? isalpha( TOUCHAR(x) ) : _t_isalpha(x) )
|
||||
extern int _t_isprint( const char *ptr );
|
||||
#define t_isprint(x) ( (pg_mblen(x)==1) ? isprint( TOUCHAR(x) ) : _t_isprint(x) )
|
||||
#define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
|
||||
#define t_isspace(x) ( pg_mblen(x)==1 && isspace( TOUCHAR(x) ) )
|
||||
extern int _t_isalpha(const char *ptr);
|
||||
|
||||
#define t_isalpha(x) ( (pg_mblen(x)==1) ? isalpha( TOUCHAR(x) ) : _t_isalpha(x) )
|
||||
extern int _t_isprint(const char *ptr);
|
||||
|
||||
#define t_isprint(x) ( (pg_mblen(x)==1) ? isprint( TOUCHAR(x) ) : _t_isprint(x) )
|
||||
/*
|
||||
* t_iseq() should be called only for ASCII symbols
|
||||
* t_iseq() should be called only for ASCII symbols
|
||||
*/
|
||||
#define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false )
|
||||
#define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false )
|
||||
|
||||
#define COPYCHAR(d,s) do { \
|
||||
int lll = pg_mblen( s ); \
|
||||
\
|
||||
while( lll-- ) \
|
||||
while( lll-- ) \
|
||||
TOUCHAR((d)+lll) = TOUCHAR((s)+lll); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#else /* not def TS_USE_WIDE */
|
||||
#else /* not def TS_USE_WIDE */
|
||||
|
||||
#define t_isdigit(x) isdigit( TOUCHAR(x) )
|
||||
#define t_isspace(x) isspace( TOUCHAR(x) )
|
||||
#define t_isalpha(x) isalpha( TOUCHAR(x) )
|
||||
#define t_isprint(x) isprint( TOUCHAR(x) )
|
||||
#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)) )
|
||||
|
||||
#define COPYCHAR(d,s) TOUCHAR(d) = TOUCHAR(s)
|
||||
#define t_isdigit(x) isdigit( TOUCHAR(x) )
|
||||
#define t_isspace(x) isspace( TOUCHAR(x) )
|
||||
#define t_isalpha(x) isalpha( TOUCHAR(x) )
|
||||
#define t_isprint(x) isprint( TOUCHAR(x) )
|
||||
#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)) )
|
||||
|
||||
#define COPYCHAR(d,s) TOUCHAR(d) = TOUCHAR(s)
|
||||
#endif
|
||||
|
||||
char* lowerstr(char *str);
|
||||
char *lowerstr(char *str);
|
||||
|
||||
#endif /* __TSLOCALE_H__ */
|
||||
|
@ -477,7 +477,8 @@ ts_stat_sql(text *txt, text *ws)
|
||||
buf = VARDATA(ws);
|
||||
while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ)
|
||||
{
|
||||
if ( pg_mblen(buf) == 1 ) {
|
||||
if (pg_mblen(buf) == 1)
|
||||
{
|
||||
switch (*buf)
|
||||
{
|
||||
case 'A':
|
||||
@ -500,7 +501,7 @@ ts_stat_sql(text *txt, text *ws)
|
||||
stat->weight |= 0;
|
||||
}
|
||||
}
|
||||
buf+=pg_mblen(buf);
|
||||
buf += pg_mblen(buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -165,13 +165,13 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
|
||||
}
|
||||
|
||||
#define WAITWORD 1
|
||||
#define WAITENDWORD 2
|
||||
#define WAITENDWORD 2
|
||||
#define WAITNEXTCHAR 3
|
||||
#define WAITENDCMPLX 4
|
||||
#define WAITPOSINFO 5
|
||||
#define WAITPOSINFO 5
|
||||
#define INPOSINFO 6
|
||||
#define WAITPOSDELIM 7
|
||||
#define WAITCHARCMPLX 8
|
||||
#define WAITCHARCMPLX 8
|
||||
|
||||
#define RESIZEPRSBUF \
|
||||
do { \
|
||||
@ -200,9 +200,9 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
{
|
||||
if (*(state->prsbuf) == '\0')
|
||||
return 0;
|
||||
else if ( t_iseq(state->prsbuf, '\'') )
|
||||
else if (t_iseq(state->prsbuf, '\''))
|
||||
state->state = WAITENDCMPLX;
|
||||
else if ( t_iseq(state->prsbuf, '\\') )
|
||||
else if (t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
state->state = WAITNEXTCHAR;
|
||||
oldstate = WAITENDWORD;
|
||||
@ -214,7 +214,7 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
else if (!t_isspace(state->prsbuf))
|
||||
{
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos+=pg_mblen(state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
state->state = WAITENDWORD;
|
||||
}
|
||||
}
|
||||
@ -228,18 +228,18 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos+=pg_mblen(state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
state->state = oldstate;
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITENDWORD)
|
||||
{
|
||||
if ( t_iseq(state->prsbuf, '\\') )
|
||||
if (t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
state->state = WAITNEXTCHAR;
|
||||
oldstate = WAITENDWORD;
|
||||
}
|
||||
else if ( t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
|
||||
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
|
||||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
@ -250,7 +250,7 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
*(state->curpos) = '\0';
|
||||
return 1;
|
||||
}
|
||||
else if ( t_iseq(state->prsbuf,':') )
|
||||
else if (t_iseq(state->prsbuf, ':'))
|
||||
{
|
||||
if (state->curpos == state->word)
|
||||
ereport(ERROR,
|
||||
@ -266,15 +266,16 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos+=pg_mblen(state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITENDCMPLX)
|
||||
{
|
||||
if ( t_iseq(state->prsbuf, '\'') ) {
|
||||
state->state = WAITCHARCMPLX;
|
||||
if (t_iseq(state->prsbuf, '\''))
|
||||
{
|
||||
state->state = WAITCHARCMPLX;
|
||||
}
|
||||
else if ( t_iseq(state->prsbuf, '\\') )
|
||||
else if (t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
state->state = WAITNEXTCHAR;
|
||||
oldstate = WAITENDCMPLX;
|
||||
@ -287,18 +288,20 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos+=pg_mblen(state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITCHARCMPLX)
|
||||
{
|
||||
if ( t_iseq(state->prsbuf, '\'') )
|
||||
if (t_iseq(state->prsbuf, '\''))
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos+=pg_mblen(state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
state->state = WAITENDCMPLX;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
*(state->curpos) = '\0';
|
||||
if (state->curpos == state->word)
|
||||
@ -312,12 +315,12 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
}
|
||||
else
|
||||
state->state = WAITPOSINFO;
|
||||
continue; /* recheck current character */
|
||||
continue; /* recheck current character */
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITPOSINFO)
|
||||
{
|
||||
if ( t_iseq(state->prsbuf, ':') )
|
||||
if (t_iseq(state->prsbuf, ':'))
|
||||
state->state = INPOSINFO;
|
||||
else
|
||||
return 1;
|
||||
@ -353,9 +356,9 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
}
|
||||
else if (state->state == WAITPOSDELIM)
|
||||
{
|
||||
if ( t_iseq(state->prsbuf, ',') )
|
||||
if (t_iseq(state->prsbuf, ','))
|
||||
state->state = INPOSINFO;
|
||||
else if ( t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*') )
|
||||
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
||||
ereport(ERROR,
|
||||
@ -363,7 +366,7 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
errmsg("syntax error")));
|
||||
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
|
||||
}
|
||||
else if ( t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B') )
|
||||
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
||||
ereport(ERROR,
|
||||
@ -371,7 +374,7 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
errmsg("syntax error")));
|
||||
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
|
||||
}
|
||||
else if ( t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C') )
|
||||
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
||||
ereport(ERROR,
|
||||
@ -379,7 +382,7 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
errmsg("syntax error")));
|
||||
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
|
||||
}
|
||||
else if ( t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D') )
|
||||
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
||||
ereport(ERROR,
|
||||
@ -400,7 +403,7 @@ gettoken_tsvector(TI_IN_STATE * state)
|
||||
elog(ERROR, "internal error");
|
||||
|
||||
/* get next char */
|
||||
state->prsbuf+=pg_mblen(state->prsbuf);
|
||||
state->prsbuf += pg_mblen(state->prsbuf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -423,7 +426,7 @@ tsvector_in(PG_FUNCTION_ARGS)
|
||||
|
||||
SET_FUNCOID();
|
||||
|
||||
pg_verifymbstr( buf, strlen(buf), false );
|
||||
pg_verifymbstr(buf, strlen(buf), false);
|
||||
state.prsbuf = buf;
|
||||
state.len = 32;
|
||||
state.word = (char *) palloc(state.len);
|
||||
@ -517,13 +520,14 @@ tsvector_out(PG_FUNCTION_ARGS)
|
||||
lenbuf = 0,
|
||||
pp;
|
||||
WordEntry *ptr = ARRPTR(out);
|
||||
char *curbegin, *curin,
|
||||
char *curbegin,
|
||||
*curin,
|
||||
*curout;
|
||||
|
||||
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
|
||||
for (i = 0; i < out->size; i++)
|
||||
{
|
||||
lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length()/* for escape */ ;
|
||||
lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
|
||||
if (ptr[i].haspos)
|
||||
lenbuf += 7 * POSDATALEN(out, &(ptr[i]));
|
||||
}
|
||||
@ -535,10 +539,11 @@ tsvector_out(PG_FUNCTION_ARGS)
|
||||
if (i != 0)
|
||||
*curout++ = ' ';
|
||||
*curout++ = '\'';
|
||||
while ( curin-curbegin < ptr->len )
|
||||
while (curin - curbegin < ptr->len)
|
||||
{
|
||||
int len = pg_mblen(curin);
|
||||
if ( t_iseq(curin, '\'') )
|
||||
int len = pg_mblen(curin);
|
||||
|
||||
if (t_iseq(curin, '\''))
|
||||
{
|
||||
int4 pos = curout - outbuf;
|
||||
|
||||
@ -546,7 +551,7 @@ tsvector_out(PG_FUNCTION_ARGS)
|
||||
curout = outbuf + pos;
|
||||
*curout++ = '\'';
|
||||
}
|
||||
while(len--)
|
||||
while (len--)
|
||||
*curout++ = *curin++;
|
||||
}
|
||||
*curout++ = '\'';
|
||||
@ -983,36 +988,49 @@ silly_cmp_tsvector(const tsvector * a, const tsvector * b)
|
||||
{
|
||||
WordEntry *aptr = ARRPTR(a);
|
||||
WordEntry *bptr = ARRPTR(b);
|
||||
int i = 0;
|
||||
int res;
|
||||
int i = 0;
|
||||
int res;
|
||||
|
||||
|
||||
for(i=0;i<a->size;i++) {
|
||||
if ( aptr->haspos != bptr->haspos ) {
|
||||
return ( aptr->haspos > bptr->haspos ) ? -1 : 1;
|
||||
} else if ( aptr->len != bptr->len ) {
|
||||
return ( aptr->len > bptr->len ) ? -1 : 1;
|
||||
} else if ( (res=strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len))!= 0 ) {
|
||||
for (i = 0; i < a->size; i++)
|
||||
{
|
||||
if (aptr->haspos != bptr->haspos)
|
||||
{
|
||||
return (aptr->haspos > bptr->haspos) ? -1 : 1;
|
||||
}
|
||||
else if (aptr->len != bptr->len)
|
||||
{
|
||||
return (aptr->len > bptr->len) ? -1 : 1;
|
||||
}
|
||||
else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
|
||||
{
|
||||
return res;
|
||||
} else if ( aptr->haspos ) {
|
||||
WordEntryPos *ap = POSDATAPTR(a, aptr);
|
||||
WordEntryPos *bp = POSDATAPTR(b, bptr);
|
||||
int j;
|
||||
}
|
||||
else if (aptr->haspos)
|
||||
{
|
||||
WordEntryPos *ap = POSDATAPTR(a, aptr);
|
||||
WordEntryPos *bp = POSDATAPTR(b, bptr);
|
||||
int j;
|
||||
|
||||
if ( POSDATALEN(a, aptr) != POSDATALEN(b, bptr) )
|
||||
return ( POSDATALEN(a, aptr) > POSDATALEN(b, bptr) ) ? -1 : 1;
|
||||
if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
|
||||
return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
|
||||
|
||||
for(j=0;j<POSDATALEN(a, aptr);j++) {
|
||||
if ( WEP_GETPOS(*ap) != WEP_GETPOS(*bp) ) {
|
||||
return ( WEP_GETPOS(*ap) > WEP_GETPOS(*bp) ) ? -1 : 1;
|
||||
} else if ( WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp) ) {
|
||||
return ( WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp) ) ? -1 : 1;
|
||||
for (j = 0; j < POSDATALEN(a, aptr); j++)
|
||||
{
|
||||
if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
|
||||
{
|
||||
return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
|
||||
}
|
||||
else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
|
||||
{
|
||||
return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
|
||||
}
|
||||
ap++, bp++;
|
||||
}
|
||||
}
|
||||
|
||||
aptr++; bptr++;
|
||||
aptr++;
|
||||
bptr++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -49,17 +49,17 @@ typedef uint16 WordEntryPos;
|
||||
|
||||
/*
|
||||
* Structure of tsvector datatype:
|
||||
* 1) int4 len - varlena's length
|
||||
* 1) int4 len - varlena's length
|
||||
* 2) int4 size - number of lexemes or WordEntry array, which is the same
|
||||
* 3) Array of WordEntry - sorted array, comparison based on word's length
|
||||
* and strncmp(). WordEntry->pos points number of
|
||||
* bytes from end of WordEntry array to start of
|
||||
* corresponding lexeme.
|
||||
* 4) Lexeme's storage:
|
||||
* SHORTALIGNED(lexeme) and position information if it exists
|
||||
* Position information: first int2 - is a number of positions and it
|
||||
* follows array of WordEntryPos
|
||||
*/
|
||||
* SHORTALIGNED(lexeme) and position information if it exists
|
||||
* Position information: first int2 - is a number of positions and it
|
||||
* follows array of WordEntryPos
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.10 2006/03/11 04:38:30 momjian Exp $ */
|
||||
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11 2006/10/04 00:29:47 momjian Exp $ */
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
@ -458,7 +458,7 @@ static TParserStateActionItem actionTPS_InVerVersion[] = {
|
||||
|
||||
static TParserStateActionItem actionTPS_InSVerVersion[] = {
|
||||
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
||||
{p_isdigit, 0, A_BINGO|A_CLRALL, TPS_InUnsignedInt, SPACE, NULL},
|
||||
{p_isdigit, 0, A_BINGO | A_CLRALL, TPS_InUnsignedInt, SPACE, NULL},
|
||||
{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
|
||||
};
|
||||
|
||||
@ -613,7 +613,7 @@ static TParserStateActionItem actionTPS_InTagEnd[] = {
|
||||
static TParserStateActionItem actionTPS_InCommentFirst[] = {
|
||||
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
|
||||
{p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
|
||||
/* <!DOCTYPE ...>*/
|
||||
/* <!DOCTYPE ...> */
|
||||
{p_iseqC, 'D', A_NEXT, TPS_InTag, 0, NULL},
|
||||
{p_iseqC, 'd', A_NEXT, TPS_InTag, 0, NULL},
|
||||
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
||||
@ -753,10 +753,10 @@ static TParserStateActionItem actionTPS_InPathFirstFirst[] = {
|
||||
};
|
||||
|
||||
static TParserStateActionItem actionTPS_InPathSecond[] = {
|
||||
{p_isEOF, 0, A_BINGO|A_CLEAR, TPS_Base, FILEPATH, NULL},
|
||||
{p_iseqC, '/', A_NEXT|A_PUSH, TPS_InFileFirst, 0, NULL},
|
||||
{p_iseqC, '/', A_BINGO|A_CLEAR, TPS_Base, FILEPATH, NULL},
|
||||
{p_isspace, 0, A_BINGO|A_CLEAR, TPS_Base, FILEPATH, NULL},
|
||||
{p_isEOF, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
|
||||
{p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
|
||||
{p_iseqC, '/', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
|
||||
{p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
|
||||
{NULL, 0, A_POP, TPS_Null, 0, NULL}
|
||||
};
|
||||
|
||||
|
@ -347,8 +347,8 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int cur;
|
||||
int len;
|
||||
int cur;
|
||||
int len;
|
||||
LexemeEntry *list;
|
||||
} PrsStorage;
|
||||
|
||||
|
Reference in New Issue
Block a user