mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 10:30:33 +03:00 
			
		
		
		
	typedef struct {} WordEntryPos;
to
typedef uint16 WordEntryPos
according to http://www.pgsql.ru/db/mw/msg.html?mid=2035188
Require re-fill all tsvector fields and reindex tsvector indexes.
		
	
		
			
				
	
	
		
			330 lines
		
	
	
		
			6.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			330 lines
		
	
	
		
			6.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Operations for tsvector type
 | |
|  * Teodor Sigaev <teodor@sigaev.ru>
 | |
|  */
 | |
| #include "postgres.h"
 | |
| 
 | |
| #include "access/gist.h"
 | |
| #include "access/itup.h"
 | |
| #include "utils/builtins.h"
 | |
| #include "storage/bufpage.h"
 | |
| #include "executor/spi.h"
 | |
| #include "commands/trigger.h"
 | |
| #include "nodes/pg_list.h"
 | |
| #include "catalog/namespace.h"
 | |
| 
 | |
| #include "utils/pg_locale.h"
 | |
| 
 | |
| #include <ctype.h>				/* tolower */
 | |
| #include "tsvector.h"
 | |
| #include "query.h"
 | |
| #include "ts_cfg.h"
 | |
| #include "common.h"
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(strip);
 | |
| Datum		strip(PG_FUNCTION_ARGS);
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(setweight);
 | |
| Datum		setweight(PG_FUNCTION_ARGS);
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(concat);
 | |
| Datum		concat(PG_FUNCTION_ARGS);
 | |
| 
 | |
| Datum
 | |
| strip(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	tsvector   *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
 | |
| 	tsvector   *out;
 | |
| 	int			i,
 | |
| 				len = 0;
 | |
| 	WordEntry  *arrin = ARRPTR(in),
 | |
| 			   *arrout;
 | |
| 	char	   *cur;
 | |
| 
 | |
| 	for (i = 0; i < in->size; i++)
 | |
| 		len += SHORTALIGN(arrin[i].len);
 | |
| 
 | |
| 	len = CALCDATASIZE(in->size, len);
 | |
| 	out = (tsvector *) palloc(len);
 | |
| 	memset(out, 0, len);
 | |
| 	out->len = len;
 | |
| 	out->size = in->size;
 | |
| 	arrout = ARRPTR(out);
 | |
| 	cur = STRPTR(out);
 | |
| 	for (i = 0; i < in->size; i++)
 | |
| 	{
 | |
| 		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
 | |
| 		arrout[i].haspos = 0;
 | |
| 		arrout[i].len = arrin[i].len;
 | |
| 		arrout[i].pos = cur - STRPTR(out);
 | |
| 		cur += SHORTALIGN(arrout[i].len);
 | |
| 	}
 | |
| 
 | |
| 	PG_FREE_IF_COPY(in, 0);
 | |
| 	PG_RETURN_POINTER(out);
 | |
| }
 | |
| 
 | |
| Datum
 | |
| setweight(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	tsvector   *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
 | |
| 	char		cw = PG_GETARG_CHAR(1);
 | |
| 	tsvector   *out;
 | |
| 	int			i,
 | |
| 				j;
 | |
| 	WordEntry  *entry;
 | |
| 	WordEntryPos *p;
 | |
| 	int			w = 0;
 | |
| 
 | |
| 	switch (tolower(cw))
 | |
| 	{
 | |
| 		case 'a':
 | |
| 			w = 3;
 | |
| 			break;
 | |
| 		case 'b':
 | |
| 			w = 2;
 | |
| 			break;
 | |
| 		case 'c':
 | |
| 			w = 1;
 | |
| 			break;
 | |
| 		case 'd':
 | |
| 			w = 0;
 | |
| 			break;
 | |
| 			/* internal error */
 | |
| 		default:
 | |
| 			elog(ERROR, "unrecognized weight");
 | |
| 	}
 | |
| 
 | |
| 	out = (tsvector *) palloc(in->len);
 | |
| 	memcpy(out, in, in->len);
 | |
| 	entry = ARRPTR(out);
 | |
| 	i = out->size;
 | |
| 	while (i--)
 | |
| 	{
 | |
| 		if ((j = POSDATALEN(out, entry)) != 0)
 | |
| 		{
 | |
| 			p = POSDATAPTR(out, entry);
 | |
| 			while (j--)
 | |
| 			{
 | |
| 				WEP_SETWEIGHT(*p,w);
 | |
| 				p++;
 | |
| 			}
 | |
| 		}
 | |
| 		entry++;
 | |
| 	}
 | |
| 
 | |
| 	PG_FREE_IF_COPY(in, 0);
 | |
| 	PG_RETURN_POINTER(out);
 | |
| }
 | |
| 
 | |
| static int
 | |
| compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b)
 | |
| {
 | |
| 	if (a->len == b->len)
 | |
| 	{
 | |
| 		return strncmp(
 | |
| 					   ptra + a->pos,
 | |
| 					   ptrb + b->pos,
 | |
| 					   a->len);
 | |
| 	}
 | |
| 	return (a->len > b->len) ? 1 : -1;
 | |
| }
 | |
| 
 | |
| static int4
 | |
| add_pos(tsvector * src, WordEntry * srcptr, tsvector * dest, WordEntry * destptr, int4 maxpos)
 | |
| {
 | |
| 	uint16	   *clen = (uint16 *) _POSDATAPTR(dest, destptr);
 | |
| 	int			i;
 | |
| 	uint16		slen = POSDATALEN(src, srcptr),
 | |
| 				startlen;
 | |
| 	WordEntryPos *spos = POSDATAPTR(src, srcptr),
 | |
| 			   *dpos = POSDATAPTR(dest, destptr);
 | |
| 
 | |
| 	if (!destptr->haspos)
 | |
| 		*clen = 0;
 | |
| 
 | |
| 	startlen = *clen;
 | |
| 	for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++)
 | |
| 	{
 | |
| 		WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
 | |
| 		WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
 | |
| 		(*clen)++;
 | |
| 	}
 | |
| 
 | |
| 	if (*clen != startlen)
 | |
| 		destptr->haspos = 1;
 | |
| 	return *clen - startlen;
 | |
| }
 | |
| 
 | |
| 
 | |
| Datum
 | |
| concat(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	tsvector   *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
 | |
| 	tsvector   *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
 | |
| 	tsvector   *out;
 | |
| 	WordEntry  *ptr;
 | |
| 	WordEntry  *ptr1,
 | |
| 			   *ptr2;
 | |
| 	WordEntryPos *p;
 | |
| 	int			maxpos = 0,
 | |
| 				i,
 | |
| 				j,
 | |
| 				i1,
 | |
| 				i2;
 | |
| 	char	   *cur;
 | |
| 	char	   *data,
 | |
| 			   *data1,
 | |
| 			   *data2;
 | |
| 
 | |
| 	ptr = ARRPTR(in1);
 | |
| 	i = in1->size;
 | |
| 	while (i--)
 | |
| 	{
 | |
| 		if ((j = POSDATALEN(in1, ptr)) != 0)
 | |
| 		{
 | |
| 			p = POSDATAPTR(in1, ptr);
 | |
| 			while (j--)
 | |
| 			{
 | |
| 				if (WEP_GETPOS(*p) > maxpos)
 | |
| 					maxpos = WEP_GETPOS(*p);
 | |
| 				p++;
 | |
| 			}
 | |
| 		}
 | |
| 		ptr++;
 | |
| 	}
 | |
| 
 | |
| 	ptr1 = ARRPTR(in1);
 | |
| 	ptr2 = ARRPTR(in2);
 | |
| 	data1 = STRPTR(in1);
 | |
| 	data2 = STRPTR(in2);
 | |
| 	i1 = in1->size;
 | |
| 	i2 = in2->size;
 | |
| 	out = (tsvector *) palloc(in1->len + in2->len);
 | |
| 	memset(out, 0, in1->len + in2->len);
 | |
| 	out->len = in1->len + in2->len;
 | |
| 	out->size = in1->size + in2->size;
 | |
| 	data = cur = STRPTR(out);
 | |
| 	ptr = ARRPTR(out);
 | |
| 	while (i1 && i2)
 | |
| 	{
 | |
| 		int			cmp = compareEntry(data1, ptr1, data2, ptr2);
 | |
| 
 | |
| 		if (cmp < 0)
 | |
| 		{						/* in1 first */
 | |
| 			ptr->haspos = ptr1->haspos;
 | |
| 			ptr->len = ptr1->len;
 | |
| 			memcpy(cur, data1 + ptr1->pos, ptr1->len);
 | |
| 			ptr->pos = cur - data;
 | |
| 			cur += SHORTALIGN(ptr1->len);
 | |
| 			if (ptr->haspos)
 | |
| 			{
 | |
| 				memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 | |
| 				cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 | |
| 			}
 | |
| 			ptr++;
 | |
| 			ptr1++;
 | |
| 			i1--;
 | |
| 		}
 | |
| 		else if (cmp > 0)
 | |
| 		{						/* in2 first */
 | |
| 			ptr->haspos = ptr2->haspos;
 | |
| 			ptr->len = ptr2->len;
 | |
| 			memcpy(cur, data2 + ptr2->pos, ptr2->len);
 | |
| 			ptr->pos = cur - data;
 | |
| 			cur += SHORTALIGN(ptr2->len);
 | |
| 			if (ptr->haspos)
 | |
| 			{
 | |
| 				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
 | |
| 
 | |
| 				if (addlen == 0)
 | |
| 					ptr->haspos = 0;
 | |
| 				else
 | |
| 					cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
 | |
| 			}
 | |
| 			ptr++;
 | |
| 			ptr2++;
 | |
| 			i2--;
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 			ptr->haspos = ptr1->haspos | ptr2->haspos;
 | |
| 			ptr->len = ptr1->len;
 | |
| 			memcpy(cur, data1 + ptr1->pos, ptr1->len);
 | |
| 			ptr->pos = cur - data;
 | |
| 			cur += SHORTALIGN(ptr1->len);
 | |
| 			if (ptr->haspos)
 | |
| 			{
 | |
| 				if (ptr1->haspos)
 | |
| 				{
 | |
| 					memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 | |
| 					cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 | |
| 					if (ptr2->haspos)
 | |
| 						cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
 | |
| 				}
 | |
| 				else if (ptr2->haspos)
 | |
| 				{
 | |
| 					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
 | |
| 
 | |
| 					if (addlen == 0)
 | |
| 						ptr->haspos = 0;
 | |
| 					else
 | |
| 						cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
 | |
| 				}
 | |
| 			}
 | |
| 			ptr++;
 | |
| 			ptr1++;
 | |
| 			ptr2++;
 | |
| 			i1--;
 | |
| 			i2--;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	while (i1)
 | |
| 	{
 | |
| 		ptr->haspos = ptr1->haspos;
 | |
| 		ptr->len = ptr1->len;
 | |
| 		memcpy(cur, data1 + ptr1->pos, ptr1->len);
 | |
| 		ptr->pos = cur - data;
 | |
| 		cur += SHORTALIGN(ptr1->len);
 | |
| 		if (ptr->haspos)
 | |
| 		{
 | |
| 			memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
 | |
| 			cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
 | |
| 		}
 | |
| 		ptr++;
 | |
| 		ptr1++;
 | |
| 		i1--;
 | |
| 	}
 | |
| 
 | |
| 	while (i2)
 | |
| 	{
 | |
| 		ptr->haspos = ptr2->haspos;
 | |
| 		ptr->len = ptr2->len;
 | |
| 		memcpy(cur, data2 + ptr2->pos, ptr2->len);
 | |
| 		ptr->pos = cur - data;
 | |
| 		cur += SHORTALIGN(ptr2->len);
 | |
| 		if (ptr->haspos)
 | |
| 		{
 | |
| 			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
 | |
| 
 | |
| 			if (addlen == 0)
 | |
| 				ptr->haspos = 0;
 | |
| 			else
 | |
| 				cur += addlen * sizeof(WordEntryPos) + sizeof(uint16);
 | |
| 		}
 | |
| 		ptr++;
 | |
| 		ptr2++;
 | |
| 		i2--;
 | |
| 	}
 | |
| 
 | |
| 	out->size = ptr - ARRPTR(out);
 | |
| 	out->len = CALCDATASIZE(out->size, cur - data);
 | |
| 	if (data != STRPTR(out))
 | |
| 		memmove(STRPTR(out), data, cur - data);
 | |
| 
 | |
| 	PG_FREE_IF_COPY(in1, 0);
 | |
| 	PG_FREE_IF_COPY(in2, 1);
 | |
| 	PG_RETURN_POINTER(out);
 | |
| }
 |