mirror of
https://github.com/postgres/postgres.git
synced 2025-04-27 22:56:53 +03:00
Improvements from Heikki Linnakangas <heikki@enterprisedb.com>
- change the alignment requirement of lexemes in TSVector slightly. Lexeme strings were always padded to 2-byte aligned length to make sure that if there's position array (uint16[]) it has the right alignment. The patch changes that so that the padding is not done when there's no positions. That makes the storage of tsvectors without positions slightly more compact. - added some #include "miscadmin.h" lines I missed in the earlier when I added calls to check_stack_depth(). - Reimplement the send/recv functions, and added a comment above them describing the on-wire format. The CRC is now recalculated in tsquery as well per previous discussion.
This commit is contained in:
parent
8983852e34
commit
978de9d06d
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.3 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -22,7 +22,7 @@ Datum
|
|||||||
gin_extract_tsvector(PG_FUNCTION_ARGS)
|
gin_extract_tsvector(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
TSVector vector = PG_GETARG_TSVECTOR(0);
|
TSVector vector = PG_GETARG_TSVECTOR(0);
|
||||||
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||||
Datum *entries = NULL;
|
Datum *entries = NULL;
|
||||||
|
|
||||||
*nentries = 0;
|
*nentries = 0;
|
||||||
@ -55,7 +55,7 @@ Datum
|
|||||||
gin_extract_query(PG_FUNCTION_ARGS)
|
gin_extract_query(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||||
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||||
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
||||||
Datum *entries = NULL;
|
Datum *entries = NULL;
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.4 2007/09/07 15:35:10 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.5 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -21,7 +21,6 @@
|
|||||||
#include "tsearch/ts_utils.h"
|
#include "tsearch/ts_utils.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
#include "utils/pg_crc.h"
|
#include "utils/pg_crc.h"
|
||||||
#include "nodes/bitmapset.h"
|
|
||||||
|
|
||||||
|
|
||||||
struct TSQueryParserStateData
|
struct TSQueryParserStateData
|
||||||
@ -384,16 +383,15 @@ makepol(TSQueryParserState state,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Fills in the left-fields previously left unfilled. The input
|
|
||||||
* QueryItems must be in polish (prefix) notation.
|
|
||||||
*/
|
|
||||||
static void
|
static void
|
||||||
findoprnd(QueryItem *ptr, uint32 *pos)
|
findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
|
||||||
{
|
{
|
||||||
/* since this function recurses, it could be driven to stack overflow. */
|
/* since this function recurses, it could be driven to stack overflow. */
|
||||||
check_stack_depth();
|
check_stack_depth();
|
||||||
|
|
||||||
|
if (*pos >= nnodes)
|
||||||
|
elog(ERROR, "malformed tsquery; operand not found");
|
||||||
|
|
||||||
if (ptr[*pos].type == QI_VAL ||
|
if (ptr[*pos].type == QI_VAL ||
|
||||||
ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
|
ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
|
||||||
* they haven't been cleaned
|
* they haven't been cleaned
|
||||||
@ -410,7 +408,7 @@ findoprnd(QueryItem *ptr, uint32 *pos)
|
|||||||
{
|
{
|
||||||
ptr[*pos].operator.left = 1;
|
ptr[*pos].operator.left = 1;
|
||||||
(*pos)++;
|
(*pos)++;
|
||||||
findoprnd(ptr, pos);
|
findoprnd_recurse(ptr, pos, nnodes);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -420,13 +418,31 @@ findoprnd(QueryItem *ptr, uint32 *pos)
|
|||||||
Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
|
Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
|
||||||
|
|
||||||
(*pos)++;
|
(*pos)++;
|
||||||
findoprnd(ptr, pos);
|
findoprnd_recurse(ptr, pos, nnodes);
|
||||||
curitem->left = *pos - tmp;
|
curitem->left = *pos - tmp;
|
||||||
findoprnd(ptr, pos);
|
findoprnd_recurse(ptr, pos, nnodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fills in the left-fields previously left unfilled. The input
|
||||||
|
* QueryItems must be in polish (prefix) notation.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
findoprnd(QueryItem *ptr, int size)
|
||||||
|
{
|
||||||
|
uint32 pos;
|
||||||
|
|
||||||
|
pos = 0;
|
||||||
|
findoprnd_recurse(ptr, &pos, size);
|
||||||
|
|
||||||
|
if (pos != size)
|
||||||
|
elog(ERROR, "malformed tsquery; extra nodes");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Each value (operand) in the query is be passed to pushval. pushval can
|
* Each value (operand) in the query is be passed to pushval. pushval can
|
||||||
* transform the simple value to an arbitrarily complex expression using
|
* transform the simple value to an arbitrarily complex expression using
|
||||||
@ -452,7 +468,6 @@ parse_tsquery(char *buf,
|
|||||||
TSQuery query;
|
TSQuery query;
|
||||||
int commonlen;
|
int commonlen;
|
||||||
QueryItem *ptr;
|
QueryItem *ptr;
|
||||||
uint32 pos = 0;
|
|
||||||
ListCell *cell;
|
ListCell *cell;
|
||||||
|
|
||||||
/* init state */
|
/* init state */
|
||||||
@ -522,8 +537,7 @@ parse_tsquery(char *buf,
|
|||||||
pfree(state.op);
|
pfree(state.op);
|
||||||
|
|
||||||
/* Set left operand pointers for every operator. */
|
/* Set left operand pointers for every operator. */
|
||||||
pos = 0;
|
findoprnd(ptr, query->size);
|
||||||
findoprnd(ptr, &pos);
|
|
||||||
|
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
@ -734,6 +748,22 @@ tsqueryout(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_CSTRING(nrm.buf);
|
PG_RETURN_CSTRING(nrm.buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Binary Input / Output functions. The binary format is as follows:
|
||||||
|
*
|
||||||
|
* uint32 number of operators/operands in the query
|
||||||
|
*
|
||||||
|
* Followed by the operators and operands, in prefix notation. For each
|
||||||
|
* operand:
|
||||||
|
*
|
||||||
|
* uint8 type, QI_VAL
|
||||||
|
* uint8 weight
|
||||||
|
* operand text in client encoding, null-terminated
|
||||||
|
*
|
||||||
|
* For each operator:
|
||||||
|
* uint8 type, QI_OPR
|
||||||
|
* uint8 operator, one of OP_AND, OP_OR, OP_NOT.
|
||||||
|
*/
|
||||||
Datum
|
Datum
|
||||||
tsquerysend(PG_FUNCTION_ARGS)
|
tsquerysend(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -744,7 +774,7 @@ tsquerysend(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
pq_begintypsend(&buf);
|
pq_begintypsend(&buf);
|
||||||
|
|
||||||
pq_sendint(&buf, query->size, sizeof(int32));
|
pq_sendint(&buf, query->size, sizeof(uint32));
|
||||||
for (i = 0; i < query->size; i++)
|
for (i = 0; i < query->size; i++)
|
||||||
{
|
{
|
||||||
pq_sendint(&buf, item->type, sizeof(item->type));
|
pq_sendint(&buf, item->type, sizeof(item->type));
|
||||||
@ -752,16 +782,13 @@ tsquerysend(PG_FUNCTION_ARGS)
|
|||||||
switch(item->type)
|
switch(item->type)
|
||||||
{
|
{
|
||||||
case QI_VAL:
|
case QI_VAL:
|
||||||
pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
|
pq_sendint(&buf, item->operand.weight, sizeof(uint8));
|
||||||
pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
|
pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
|
||||||
pq_sendint(&buf, item->operand.length, sizeof(int16));
|
|
||||||
/* istrue flag is just for temporary use in tsrank.c/Cover,
|
/* istrue flag is just for temporary use in tsrank.c/Cover,
|
||||||
* so we don't need to transfer that */
|
* so we don't need to transfer that */
|
||||||
break;
|
break;
|
||||||
case QI_OPR:
|
case QI_OPR:
|
||||||
pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
|
pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
|
||||||
if (item->operator.oper != OP_NOT)
|
|
||||||
pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
elog(ERROR, "unknown tsquery node type %d", item->type);
|
elog(ERROR, "unknown tsquery node type %d", item->type);
|
||||||
@ -769,14 +796,6 @@ tsquerysend(PG_FUNCTION_ARGS)
|
|||||||
item++;
|
item++;
|
||||||
}
|
}
|
||||||
|
|
||||||
item = GETQUERY(query);
|
|
||||||
for (i = 0; i < query->size; i++)
|
|
||||||
{
|
|
||||||
if (item->type == QI_VAL)
|
|
||||||
pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
|
|
||||||
item++;
|
|
||||||
}
|
|
||||||
|
|
||||||
PG_FREE_IF_COPY(query, 0);
|
PG_FREE_IF_COPY(query, 0);
|
||||||
|
|
||||||
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
||||||
@ -788,141 +807,113 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
|||||||
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
||||||
TSQuery query;
|
TSQuery query;
|
||||||
int i,
|
int i,
|
||||||
size,
|
|
||||||
len;
|
len;
|
||||||
QueryItem *item;
|
QueryItem *item;
|
||||||
int datalen = 0;
|
int datalen;
|
||||||
char *ptr;
|
char *ptr;
|
||||||
Bitmapset *parentset = NULL;
|
uint32 size;
|
||||||
|
const char **operands;
|
||||||
|
|
||||||
size = pq_getmsgint(buf, sizeof(uint32));
|
size = pq_getmsgint(buf, sizeof(uint32));
|
||||||
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
|
if (size > (MaxAllocSize / sizeof(QueryItem)))
|
||||||
elog(ERROR, "invalid size of tsquery");
|
elog(ERROR, "invalid size of tsquery");
|
||||||
|
|
||||||
len = HDRSIZETQ + sizeof(QueryItem) * size;
|
/* Allocate space to temporarily hold operand strings */
|
||||||
|
operands = palloc(size * sizeof(char *));
|
||||||
|
|
||||||
|
/* Allocate space for all the QueryItems. */
|
||||||
|
len = HDRSIZETQ + sizeof(QueryItem) * size;
|
||||||
query = (TSQuery) palloc0(len);
|
query = (TSQuery) palloc0(len);
|
||||||
query->size = size;
|
query->size = size;
|
||||||
item = GETQUERY(query);
|
item = GETQUERY(query);
|
||||||
|
|
||||||
|
datalen = 0;
|
||||||
for (i = 0; i < size; i++)
|
for (i = 0; i < size; i++)
|
||||||
{
|
{
|
||||||
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
|
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
|
||||||
|
|
||||||
switch(item->type)
|
if (item->type == QI_VAL)
|
||||||
{
|
{
|
||||||
case QI_VAL:
|
size_t val_len; /* length after recoding to server encoding */
|
||||||
item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
|
uint8 weight;
|
||||||
item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
|
const char *val;
|
||||||
item->operand.length = pq_getmsgint(buf, sizeof(int16));
|
pg_crc32 valcrc;
|
||||||
|
|
||||||
/* Check that the weight bitmap is valid */
|
weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
|
||||||
if (item->operand.weight < 0 || item->operand.weight > 0xF)
|
val = pq_getmsgstring(buf);
|
||||||
elog(ERROR, "invalid weight bitmap");
|
val_len = strlen(val);
|
||||||
|
|
||||||
/* XXX: We don't check that the CRC is valid. Actually, if we
|
/* Sanity checks */
|
||||||
* bothered to calculate it to verify, there would be no need
|
|
||||||
* to transfer it.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
if (weight > 0xF)
|
||||||
* Check that datalen doesn't grow too large. Without the
|
elog(ERROR, "invalid tsquery; invalid weight bitmap");
|
||||||
* check, a malicious client could induce a buffer overflow
|
|
||||||
* by sending a tsquery whose size exceeds 2GB. datalen
|
|
||||||
* would overflow, we would allocate a too small buffer below,
|
|
||||||
* and overflow the buffer. Because operand.length is a 20-bit
|
|
||||||
* field, adding one such value to datalen must exceed
|
|
||||||
* MaxAllocSize before wrapping over the 32-bit datalen field,
|
|
||||||
* so this check will protect from it.
|
|
||||||
*/
|
|
||||||
if (datalen > MAXSTRLEN)
|
|
||||||
elog(ERROR, "invalid tsquery; total operand length exceeded");
|
|
||||||
|
|
||||||
/* We can calculate distance from datalen, no need to send it
|
if (val_len > MAXSTRLEN)
|
||||||
* across the wire. If we did, we would have to check that
|
elog(ERROR, "invalid tsquery; operand too long");
|
||||||
* it's valid anyway.
|
|
||||||
*/
|
if (datalen > MAXSTRPOS)
|
||||||
item->operand.distance = datalen;
|
elog(ERROR, "invalid tsquery; total operand length exceeded");
|
||||||
|
|
||||||
datalen += item->operand.length + 1; /* \0 */
|
/* Looks valid. */
|
||||||
|
|
||||||
break;
|
INIT_CRC32(valcrc);
|
||||||
case QI_OPR:
|
COMP_CRC32(valcrc, val, val_len);
|
||||||
item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
|
FIN_CRC32(valcrc);
|
||||||
if (item->operator.oper != OP_NOT &&
|
|
||||||
item->operator.oper != OP_OR &&
|
|
||||||
item->operator.oper != OP_AND)
|
|
||||||
elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
|
|
||||||
|
|
||||||
/*
|
item->operand.weight = weight;
|
||||||
* Check that no previous operator node points to the right
|
item->operand.valcrc = (int32) valcrc;
|
||||||
* operand. That would mean that the operand node
|
item->operand.length = val_len;
|
||||||
* has two parents.
|
item->operand.distance = datalen;
|
||||||
*/
|
|
||||||
if (bms_is_member(i + 1, parentset))
|
|
||||||
elog(ERROR, "malformed query tree");
|
|
||||||
|
|
||||||
parentset = bms_add_member(parentset, i + 1);
|
/*
|
||||||
|
* Operand strings are copied to the final struct after this loop;
|
||||||
|
* here we just collect them to an array
|
||||||
|
*/
|
||||||
|
operands[i] = val;
|
||||||
|
|
||||||
if(item->operator.oper != OP_NOT)
|
datalen += val_len + 1; /* + 1 for the '\0' terminator */
|
||||||
{
|
}
|
||||||
uint32 left = (uint32) pq_getmsgint(buf, sizeof(uint32));
|
else if (item->type == QI_OPR)
|
||||||
|
{
|
||||||
|
int8 oper;
|
||||||
|
oper = (int8) pq_getmsgint(buf, sizeof(int8));
|
||||||
|
if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
|
||||||
|
elog(ERROR, "invalid tsquery; unknown operator type %d", (int) oper);
|
||||||
|
if (i == size - 1)
|
||||||
|
elog(ERROR, "invalid pointer to right operand");
|
||||||
|
|
||||||
/*
|
item->operator.oper = oper;
|
||||||
* Right operand is implicitly at "this+1". Don't allow
|
|
||||||
* left to point to the right operand, or to self.
|
|
||||||
*/
|
|
||||||
if (left <= 1 || i + left >= size)
|
|
||||||
elog(ERROR, "invalid pointer to left operand");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check that no previous operator node points to the left
|
|
||||||
* operand.
|
|
||||||
*/
|
|
||||||
if (bms_is_member(i + left, parentset))
|
|
||||||
elog(ERROR, "malformed query tree");
|
|
||||||
|
|
||||||
parentset = bms_add_member(parentset, i + left);
|
|
||||||
|
|
||||||
item->operator.left = left;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
item->operator.left = 1; /* do not leave uninitialized fields */
|
|
||||||
|
|
||||||
if (i == size - 1)
|
|
||||||
elog(ERROR, "invalid pointer to right operand");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
elog(ERROR, "unknown tsquery node type %d", item->type);
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
elog(ERROR, "unknown tsquery node type %d", item->type);
|
||||||
|
|
||||||
item++;
|
item++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now check that each node, except the root, has a parent. We
|
/* Enlarge buffer to make room for the operand values. */
|
||||||
* already checked above that no node has more than one parent. */
|
|
||||||
if (bms_num_members(parentset) != size - 1 && size != 0)
|
|
||||||
elog(ERROR, "malformed query tree");
|
|
||||||
|
|
||||||
bms_free( parentset );
|
|
||||||
|
|
||||||
query = (TSQuery) repalloc(query, len + datalen);
|
query = (TSQuery) repalloc(query, len + datalen);
|
||||||
|
|
||||||
item = GETQUERY(query);
|
item = GETQUERY(query);
|
||||||
ptr = GETOPERAND(query);
|
ptr = GETOPERAND(query);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fill in the left-pointers. Checks that the tree is well-formed
|
||||||
|
* as a side-effect.
|
||||||
|
*/
|
||||||
|
findoprnd(item, size);
|
||||||
|
|
||||||
|
/* Copy operands to output struct */
|
||||||
for (i = 0; i < size; i++)
|
for (i = 0; i < size; i++)
|
||||||
{
|
{
|
||||||
if (item->type == QI_VAL)
|
if (item->type == QI_VAL)
|
||||||
{
|
{
|
||||||
memcpy(ptr,
|
memcpy(ptr, operands[i], item->operand.length + 1);
|
||||||
pq_getmsgbytes(buf, item->operand.length),
|
ptr += item->operand.length + 1;
|
||||||
item->operand.length);
|
|
||||||
ptr += item->operand.length;
|
|
||||||
*ptr++ = '\0';
|
|
||||||
}
|
}
|
||||||
item++;
|
item++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pfree(operands);
|
||||||
|
|
||||||
Assert(ptr - GETOPERAND(query) == datalen);
|
Assert(ptr - GETOPERAND(query) == datalen);
|
||||||
|
|
||||||
SET_VARSIZE(query, len + datalen);
|
SET_VARSIZE(query, len + datalen);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
#include "tsearch/ts_type.h"
|
#include "tsearch/ts_type.h"
|
||||||
#include "tsearch/ts_utils.h"
|
#include "tsearch/ts_utils.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
typedef struct NODE
|
typedef struct NODE
|
||||||
{
|
{
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -17,6 +17,7 @@
|
|||||||
#include "executor/spi.h"
|
#include "executor/spi.h"
|
||||||
#include "tsearch/ts_type.h"
|
#include "tsearch/ts_type.h"
|
||||||
#include "tsearch/ts_utils.h"
|
#include "tsearch/ts_utils.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "tsearch/ts_type.h"
|
#include "tsearch/ts_type.h"
|
||||||
#include "tsearch/ts_utils.h"
|
#include "tsearch/ts_utils.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
QTNode *
|
QTNode *
|
||||||
QT2QTN(QueryItem * in, char *operand)
|
QT2QTN(QueryItem * in, char *operand)
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.3 2007/09/07 15:35:10 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -18,6 +18,7 @@
|
|||||||
#include "tsearch/ts_type.h"
|
#include "tsearch/ts_type.h"
|
||||||
#include "tsearch/ts_utils.h"
|
#include "tsearch/ts_utils.h"
|
||||||
#include "utils/array.h"
|
#include "utils/array.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
|
||||||
|
|
||||||
static float weights[] = {0.1, 0.2, 0.4, 1.0};
|
static float weights[] = {0.1, 0.2, 0.4, 1.0};
|
||||||
@ -176,8 +177,9 @@ SortAndUniqItems(TSQuery q, int *size)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* A dummy WordEntryPos array to use when haspos is false */
|
||||||
static WordEntryPos POSNULL[] = {
|
static WordEntryPos POSNULL[] = {
|
||||||
0,
|
1, /* Number of elements that follow */
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -207,7 +209,6 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
|
|||||||
}
|
}
|
||||||
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
|
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
|
||||||
memset(pos, 0, sizeof(uint16 *) * q->size);
|
memset(pos, 0, sizeof(uint16 *) * q->size);
|
||||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
||||||
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
|
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
|
||||||
|
|
||||||
for (i = 0; i < size; i++)
|
for (i = 0; i < size; i++)
|
||||||
@ -265,7 +266,6 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
|
|||||||
QueryOperand **item;
|
QueryOperand **item;
|
||||||
int size = q->size;
|
int size = q->size;
|
||||||
|
|
||||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
||||||
item = SortAndUniqItems(q, &size);
|
item = SortAndUniqItems(q, &size);
|
||||||
|
|
||||||
for (i = 0; i < size; i++)
|
for (i = 0; i < size; i++)
|
||||||
@ -593,7 +593,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
|
|||||||
DocRepresentation *doc;
|
DocRepresentation *doc;
|
||||||
char *operand;
|
char *operand;
|
||||||
|
|
||||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
|
||||||
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
||||||
operand = GETOPERAND(query);
|
operand = GETOPERAND(query);
|
||||||
reset_istrue_flag(query);
|
reset_istrue_flag(query);
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -75,18 +75,20 @@ uniquePos(WordEntryPos * a, int l)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compareentry(const void *a, const void *b, void *arg)
|
compareentry(const void *va, const void *vb, void *arg)
|
||||||
{
|
{
|
||||||
char *BufferStr = (char *) arg;
|
char *BufferStr = (char *) arg;
|
||||||
|
WordEntryIN *a = (WordEntryIN *) va;
|
||||||
|
WordEntryIN *b = (WordEntryIN *) vb;
|
||||||
|
|
||||||
if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
|
if (a->entry.len == b->entry.len)
|
||||||
{
|
{
|
||||||
return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos],
|
return strncmp(&BufferStr[a->entry.pos],
|
||||||
&BufferStr[((WordEntryIN *) b)->entry.pos],
|
&BufferStr[b->entry.pos],
|
||||||
((WordEntryIN *) a)->entry.len);
|
a->entry.len);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
|
return (a->entry.len > b->entry.len) ? 1 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -104,6 +106,9 @@ uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
|
|||||||
a->poslen = uniquePos(a->pos, a->poslen);
|
a->poslen = uniquePos(a->pos, a->poslen);
|
||||||
*outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
|
*outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
*outbuflen = a->entry.len;
|
||||||
|
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
res = a;
|
res = a;
|
||||||
@ -118,10 +123,12 @@ uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
|
|||||||
{
|
{
|
||||||
if (res->entry.haspos)
|
if (res->entry.haspos)
|
||||||
{
|
{
|
||||||
|
*outbuflen += SHORTALIGN(res->entry.len);
|
||||||
res->poslen = uniquePos(res->pos, res->poslen);
|
res->poslen = uniquePos(res->pos, res->poslen);
|
||||||
*outbuflen += res->poslen * sizeof(WordEntryPos);
|
*outbuflen += res->poslen * sizeof(WordEntryPos);
|
||||||
}
|
}
|
||||||
*outbuflen += SHORTALIGN(res->entry.len);
|
else
|
||||||
|
*outbuflen += res->entry.len;
|
||||||
res++;
|
res++;
|
||||||
memcpy(res, ptr, sizeof(WordEntryIN));
|
memcpy(res, ptr, sizeof(WordEntryIN));
|
||||||
}
|
}
|
||||||
@ -147,12 +154,18 @@ uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
|
|||||||
}
|
}
|
||||||
ptr++;
|
ptr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* add last item */
|
||||||
|
|
||||||
if (res->entry.haspos)
|
if (res->entry.haspos)
|
||||||
{
|
{
|
||||||
|
*outbuflen += SHORTALIGN(res->entry.len);
|
||||||
|
|
||||||
res->poslen = uniquePos(res->pos, res->poslen);
|
res->poslen = uniquePos(res->pos, res->poslen);
|
||||||
*outbuflen += res->poslen * sizeof(WordEntryPos);
|
*outbuflen += res->poslen * sizeof(WordEntryPos);
|
||||||
}
|
}
|
||||||
*outbuflen += SHORTALIGN(res->entry.len);
|
else
|
||||||
|
*outbuflen += res->entry.len;
|
||||||
|
|
||||||
return res + 1 - a;
|
return res + 1 - a;
|
||||||
}
|
}
|
||||||
@ -367,6 +380,18 @@ tsvectorout(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_CSTRING(outbuf);
|
PG_RETURN_CSTRING(outbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Binary Input / Output functions. The binary format is as follows:
|
||||||
|
*
|
||||||
|
* uint32 number of lexemes
|
||||||
|
*
|
||||||
|
* for each lexeme:
|
||||||
|
* lexeme text in client encoding, null-terminated
|
||||||
|
* uint16 number of positions
|
||||||
|
* for each position:
|
||||||
|
* uint16 WordEntryPos
|
||||||
|
*/
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
tsvectorsend(PG_FUNCTION_ARGS)
|
tsvectorsend(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -381,18 +406,22 @@ tsvectorsend(PG_FUNCTION_ARGS)
|
|||||||
pq_sendint(&buf, vec->size, sizeof(int32));
|
pq_sendint(&buf, vec->size, sizeof(int32));
|
||||||
for (i = 0; i < vec->size; i++)
|
for (i = 0; i < vec->size; i++)
|
||||||
{
|
{
|
||||||
/*
|
uint16 npos;
|
||||||
* We are sure that sizeof(WordEntry) == sizeof(int32)
|
|
||||||
*/
|
|
||||||
pq_sendint(&buf, *(int32 *) weptr, sizeof(int32));
|
|
||||||
|
|
||||||
pq_sendbytes(&buf, STRPTR(vec) + weptr->pos, weptr->len);
|
/* the strings in the TSVector array are not null-terminated, so
|
||||||
if (weptr->haspos)
|
* we have to send the null-terminator separately
|
||||||
|
*/
|
||||||
|
pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
|
||||||
|
pq_sendbyte(&buf, '\0');
|
||||||
|
|
||||||
|
npos = POSDATALEN(vec, weptr);
|
||||||
|
pq_sendint(&buf, npos, sizeof(uint16));
|
||||||
|
|
||||||
|
if(npos > 0)
|
||||||
{
|
{
|
||||||
WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
|
WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
|
||||||
|
|
||||||
pq_sendint(&buf, POSDATALEN(vec, weptr), sizeof(WordEntryPos));
|
for (j = 0; j < npos; j++)
|
||||||
for (j = 0; j < POSDATALEN(vec, weptr); j++)
|
|
||||||
pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
|
pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
|
||||||
}
|
}
|
||||||
weptr++;
|
weptr++;
|
||||||
@ -407,71 +436,92 @@ tsvectorrecv(PG_FUNCTION_ARGS)
|
|||||||
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
||||||
TSVector vec;
|
TSVector vec;
|
||||||
int i;
|
int i;
|
||||||
uint32 size;
|
int32 nentries;
|
||||||
WordEntry *weptr;
|
int datalen; /* number of bytes used in the variable size area
|
||||||
int datalen = 0;
|
* after fixed size TSVector header and WordEntries
|
||||||
Size len;
|
*/
|
||||||
|
Size hdrlen;
|
||||||
|
Size len; /* allocated size of vec */
|
||||||
|
|
||||||
size = pq_getmsgint(buf, sizeof(uint32));
|
nentries = pq_getmsgint(buf, sizeof(int32));
|
||||||
if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
|
if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
|
||||||
elog(ERROR, "invalid size of tsvector");
|
elog(ERROR, "invalid size of tsvector");
|
||||||
|
|
||||||
len = DATAHDRSIZE + sizeof(WordEntry) * size;
|
hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries;
|
||||||
|
|
||||||
len = len * 2; /* times two to make room for lexemes */
|
len = hdrlen * 2; /* times two to make room for lexemes */
|
||||||
vec = (TSVector) palloc0(len);
|
vec = (TSVector) palloc0(len);
|
||||||
vec->size = size;
|
vec->size = nentries;
|
||||||
|
|
||||||
weptr = ARRPTR(vec);
|
datalen = 0;
|
||||||
for (i = 0; i < size; i++)
|
for (i = 0; i < nentries; i++)
|
||||||
{
|
{
|
||||||
int32 tmp;
|
const char *lexeme;
|
||||||
|
uint16 npos;
|
||||||
|
size_t lex_len;
|
||||||
|
|
||||||
weptr = ARRPTR(vec) + i;
|
lexeme = pq_getmsgstring(buf);
|
||||||
|
npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
|
||||||
|
|
||||||
|
/* sanity checks */
|
||||||
|
|
||||||
|
lex_len = strlen(lexeme);
|
||||||
|
if (lex_len < 0 || lex_len > MAXSTRLEN)
|
||||||
|
elog(ERROR, "invalid tsvector; lexeme too long");
|
||||||
|
|
||||||
|
if (datalen > MAXSTRPOS)
|
||||||
|
elog(ERROR, "invalid tsvector; maximum total lexeme length exceeded");
|
||||||
|
|
||||||
|
if (npos > MAXNUMPOS)
|
||||||
|
elog(ERROR, "unexpected number of positions");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are sure that sizeof(WordEntry) == sizeof(int32)
|
* Looks valid. Fill the WordEntry struct, and copy lexeme.
|
||||||
|
*
|
||||||
|
* But make sure the buffer is large enough first.
|
||||||
*/
|
*/
|
||||||
tmp = pq_getmsgint(buf, sizeof(int32));
|
while (hdrlen + SHORTALIGN(datalen + lex_len) +
|
||||||
*weptr = *(WordEntry *) & tmp;
|
(npos + 1) * sizeof(WordEntryPos) >= len)
|
||||||
|
|
||||||
while (CALCDATASIZE(size, datalen + SHORTALIGN(weptr->len)) >= len)
|
|
||||||
{
|
{
|
||||||
len *= 2;
|
len *= 2;
|
||||||
vec = (TSVector) repalloc(vec, len);
|
vec = (TSVector) repalloc(vec, len);
|
||||||
weptr = ARRPTR(vec) + i;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(STRPTR(vec) + weptr->pos,
|
vec->entries[i].haspos = (npos > 0) ? 1 : 0;
|
||||||
pq_getmsgbytes(buf, weptr->len),
|
vec->entries[i].len = lex_len;
|
||||||
weptr->len);
|
vec->entries[i].pos = datalen;
|
||||||
datalen += SHORTALIGN(weptr->len);
|
|
||||||
|
|
||||||
if (i > 0 && WordEntryCMP(weptr, weptr - 1, STRPTR(vec)) <= 0)
|
memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
|
||||||
|
|
||||||
|
datalen += lex_len;
|
||||||
|
|
||||||
|
if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0)
|
||||||
elog(ERROR, "lexemes are unordered");
|
elog(ERROR, "lexemes are unordered");
|
||||||
|
|
||||||
if (weptr->haspos)
|
/* Receive positions */
|
||||||
|
|
||||||
|
if (npos > 0)
|
||||||
{
|
{
|
||||||
uint16 j,
|
uint16 j;
|
||||||
npos;
|
|
||||||
WordEntryPos *wepptr;
|
WordEntryPos *wepptr;
|
||||||
|
|
||||||
npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
|
/*
|
||||||
if (npos > MAXNUMPOS)
|
* Pad to 2-byte alignment if necessary. Though we used palloc0
|
||||||
elog(ERROR, "unexpected number of positions");
|
* for the initial allocation, subsequent repalloc'd memory
|
||||||
|
* areas are not initialized to zero.
|
||||||
while (CALCDATASIZE(size, datalen + (npos + 1) * sizeof(WordEntryPos)) >= len)
|
*/
|
||||||
|
if (datalen != SHORTALIGN(datalen))
|
||||||
{
|
{
|
||||||
len *= 2;
|
*(STRPTR(vec) + datalen) = '\0';
|
||||||
vec = (TSVector) repalloc(vec, len);
|
datalen = SHORTALIGN(datalen);
|
||||||
weptr = ARRPTR(vec) + i;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(_POSDATAPTR(vec, weptr), &npos, sizeof(int16));
|
memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
|
||||||
wepptr = POSDATAPTR(vec, weptr);
|
|
||||||
|
wepptr = POSDATAPTR(vec, &vec->entries[i]);
|
||||||
for (j = 0; j < npos; j++)
|
for (j = 0; j < npos; j++)
|
||||||
{
|
{
|
||||||
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(int16));
|
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
|
||||||
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
|
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
|
||||||
elog(ERROR, "position information is unordered");
|
elog(ERROR, "position information is unordered");
|
||||||
}
|
}
|
||||||
@ -480,7 +530,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SET_VARSIZE(vec, CALCDATASIZE(vec->size, datalen));
|
SET_VARSIZE(vec, hdrlen + datalen);
|
||||||
|
|
||||||
PG_RETURN_TSVECTOR(vec);
|
PG_RETURN_TSVECTOR(vec);
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -165,7 +165,7 @@ tsvector_strip(PG_FUNCTION_ARGS)
|
|||||||
char *cur;
|
char *cur;
|
||||||
|
|
||||||
for (i = 0; i < in->size; i++)
|
for (i = 0; i < in->size; i++)
|
||||||
len += SHORTALIGN(arrin[i].len);
|
len += arrin[i].len;
|
||||||
|
|
||||||
len = CALCDATASIZE(in->size, len);
|
len = CALCDATASIZE(in->size, len);
|
||||||
out = (TSVector) palloc0(len);
|
out = (TSVector) palloc0(len);
|
||||||
@ -179,7 +179,7 @@ tsvector_strip(PG_FUNCTION_ARGS)
|
|||||||
arrout[i].haspos = 0;
|
arrout[i].haspos = 0;
|
||||||
arrout[i].len = arrin[i].len;
|
arrout[i].len = arrin[i].len;
|
||||||
arrout[i].pos = cur - STRPTR(out);
|
arrout[i].pos = cur - STRPTR(out);
|
||||||
cur += SHORTALIGN(arrout[i].len);
|
cur += arrout[i].len;
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_FREE_IF_COPY(in, 0);
|
PG_FREE_IF_COPY(in, 0);
|
||||||
@ -351,12 +351,15 @@ tsvector_concat(PG_FUNCTION_ARGS)
|
|||||||
ptr->len = ptr1->len;
|
ptr->len = ptr1->len;
|
||||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||||
ptr->pos = cur - data;
|
ptr->pos = cur - data;
|
||||||
cur += SHORTALIGN(ptr1->len);
|
|
||||||
if (ptr->haspos)
|
if (ptr->haspos)
|
||||||
{
|
{
|
||||||
|
cur += SHORTALIGN(ptr1->len);
|
||||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||||
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
cur += ptr1->len;
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
ptr1++;
|
ptr1++;
|
||||||
i1--;
|
i1--;
|
||||||
@ -367,16 +370,20 @@ tsvector_concat(PG_FUNCTION_ARGS)
|
|||||||
ptr->len = ptr2->len;
|
ptr->len = ptr2->len;
|
||||||
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
||||||
ptr->pos = cur - data;
|
ptr->pos = cur - data;
|
||||||
cur += SHORTALIGN(ptr2->len);
|
|
||||||
if (ptr->haspos)
|
if (ptr->haspos)
|
||||||
{
|
{
|
||||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
||||||
|
|
||||||
|
cur += SHORTALIGN(ptr2->len);
|
||||||
|
|
||||||
if (addlen == 0)
|
if (addlen == 0)
|
||||||
ptr->haspos = 0;
|
ptr->haspos = 0;
|
||||||
else
|
else
|
||||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
cur += ptr2->len;
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
ptr2++;
|
ptr2++;
|
||||||
i2--;
|
i2--;
|
||||||
@ -387,9 +394,9 @@ tsvector_concat(PG_FUNCTION_ARGS)
|
|||||||
ptr->len = ptr1->len;
|
ptr->len = ptr1->len;
|
||||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||||
ptr->pos = cur - data;
|
ptr->pos = cur - data;
|
||||||
cur += SHORTALIGN(ptr1->len);
|
|
||||||
if (ptr->haspos)
|
if (ptr->haspos)
|
||||||
{
|
{
|
||||||
|
cur += SHORTALIGN(ptr1->len);
|
||||||
if (ptr1->haspos)
|
if (ptr1->haspos)
|
||||||
{
|
{
|
||||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||||
@ -407,6 +414,9 @@ tsvector_concat(PG_FUNCTION_ARGS)
|
|||||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
cur += ptr1->len;
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
ptr1++;
|
ptr1++;
|
||||||
ptr2++;
|
ptr2++;
|
||||||
@ -421,12 +431,15 @@ tsvector_concat(PG_FUNCTION_ARGS)
|
|||||||
ptr->len = ptr1->len;
|
ptr->len = ptr1->len;
|
||||||
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
memcpy(cur, data1 + ptr1->pos, ptr1->len);
|
||||||
ptr->pos = cur - data;
|
ptr->pos = cur - data;
|
||||||
cur += SHORTALIGN(ptr1->len);
|
|
||||||
if (ptr->haspos)
|
if (ptr->haspos)
|
||||||
{
|
{
|
||||||
|
cur += SHORTALIGN(ptr1->len);
|
||||||
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
|
||||||
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
cur += ptr1->len;
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
ptr1++;
|
ptr1++;
|
||||||
i1--;
|
i1--;
|
||||||
@ -438,16 +451,20 @@ tsvector_concat(PG_FUNCTION_ARGS)
|
|||||||
ptr->len = ptr2->len;
|
ptr->len = ptr2->len;
|
||||||
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
memcpy(cur, data2 + ptr2->pos, ptr2->len);
|
||||||
ptr->pos = cur - data;
|
ptr->pos = cur - data;
|
||||||
cur += SHORTALIGN(ptr2->len);
|
|
||||||
if (ptr->haspos)
|
if (ptr->haspos)
|
||||||
{
|
{
|
||||||
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
|
||||||
|
|
||||||
|
cur += SHORTALIGN(ptr2->len);
|
||||||
|
|
||||||
if (addlen == 0)
|
if (addlen == 0)
|
||||||
ptr->haspos = 0;
|
ptr->haspos = 0;
|
||||||
else
|
else
|
||||||
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
cur += ptr2->len;
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
ptr2++;
|
ptr2++;
|
||||||
i2--;
|
i2--;
|
||||||
@ -484,8 +501,8 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
|
|||||||
static bool
|
static bool
|
||||||
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
|
checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
|
||||||
{
|
{
|
||||||
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
|
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + SHORTALIGN(val->pos + val->len) + sizeof(uint16));
|
||||||
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
|
uint16 len = *((uint16 *) (chkval->values + SHORTALIGN(val->pos + val->len)));
|
||||||
|
|
||||||
while (len--)
|
while (len--)
|
||||||
{
|
{
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
|
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.3 2007/09/07 15:35:11 teodor Exp $
|
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.4 2007/09/07 16:03:40 teodor Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -62,26 +62,33 @@ typedef uint16 WordEntryPos;
|
|||||||
* bytes from end of WordEntry array to start of
|
* bytes from end of WordEntry array to start of
|
||||||
* corresponding lexeme.
|
* corresponding lexeme.
|
||||||
* 4) Lexeme's storage:
|
* 4) Lexeme's storage:
|
||||||
* SHORTALIGNED(lexeme) and position information if it exists
|
* lexeme (without null-terminator)
|
||||||
* Position information: first int2 - is a number of positions and it
|
* if haspos is true:
|
||||||
* follows array of WordEntryPos
|
* padding byte if necessary to make the number of positions 2-byte aligned
|
||||||
|
* uint16 number of positions that follow.
|
||||||
|
* uint16[] positions
|
||||||
|
*
|
||||||
|
* The positions must be sorted.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||||
uint32 size;
|
int32 size;
|
||||||
char data[1];
|
WordEntry entries[1]; /* var size */
|
||||||
|
/* lexemes follow */
|
||||||
} TSVectorData;
|
} TSVectorData;
|
||||||
|
|
||||||
typedef TSVectorData *TSVector;
|
typedef TSVectorData *TSVector;
|
||||||
|
|
||||||
#define DATAHDRSIZE (VARHDRSZ + sizeof(int4))
|
#define DATAHDRSIZE (offsetof(TSVectorData, entries))
|
||||||
#define CALCDATASIZE(x, lenstr) ( (x) * sizeof(WordEntry) + DATAHDRSIZE + (lenstr) )
|
#define CALCDATASIZE(x, lenstr) (DATAHDRSIZE + (x) * sizeof(WordEntry) + (lenstr) )
|
||||||
#define ARRPTR(x) ( (WordEntry*) ( (char*)(x) + DATAHDRSIZE ) )
|
#define ARRPTR(x) ( (x)->entries )
|
||||||
#define STRPTR(x) ( (char*)(x) + DATAHDRSIZE + ( sizeof(WordEntry) * ((TSVector)(x))->size ) )
|
|
||||||
#define STRSIZE(x) ( ((TSVector)(x))->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((TSVector)(x))->size ) )
|
/* returns a pointer to the beginning of lexemes */
|
||||||
#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
|
#define STRPTR(x) ( (char *) &(x)->entries[x->size] )
|
||||||
|
|
||||||
|
#define _POSDATAPTR(x,e) (STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))
|
||||||
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
|
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
|
||||||
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
|
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
|
||||||
|
|
||||||
@ -159,7 +166,7 @@ typedef int8 QueryItemType;
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
QueryItemType type; /* operand or kind of operator (ts_tokentype) */
|
QueryItemType type; /* operand or kind of operator (ts_tokentype) */
|
||||||
int8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
|
uint8 weight; /* weights of operand to search. It's a bitmask of allowed weights.
|
||||||
* if it =0 then any weight are allowed.
|
* if it =0 then any weight are allowed.
|
||||||
* Weights and bit map:
|
* Weights and bit map:
|
||||||
* A: 1<<3
|
* A: 1<<3
|
||||||
|
Loading…
x
Reference in New Issue
Block a user