mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			387 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			387 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
#include "executor/spi.h"
 | 
						|
#include "commands/trigger.h"
 | 
						|
#include "c.h"		/* endof() macro */
 | 
						|
#include <ctype.h>	/* tolower */
 | 
						|
#include <stdio.h>	/* debugging */
 | 
						|
 | 
						|
/*
 | 
						|
 * Trigger function takes 2 arguments:
 | 
						|
 		1. relation in which to store the substrings
 | 
						|
 		2. field to extract substrings from
 | 
						|
 | 
						|
   The relation in which to insert *must* have the following layout:
 | 
						|
 | 
						|
   		string		varchar(#)
 | 
						|
   		id			oid
 | 
						|
 | 
						|
	Example:
 | 
						|
 | 
						|
create function fti() returns opaque as
 | 
						|
'/home/boekhold/src/postgresql-6.2/contrib/fti/fti.so' language 'c';
 | 
						|
 | 
						|
create table title_fti (string varchar(25), id oid);
 | 
						|
create index title_fti_idx on title_fti (string);
 | 
						|
 | 
						|
create trigger title_fti_trigger after update or insert or delete on product
 | 
						|
for each row execute procedure fti(title_fti, title);
 | 
						|
                                   ^^^^^^^^^
 | 
						|
								   where to store index in
 | 
						|
								              ^^^^^
 | 
						|
											  which column to index
 | 
						|
 | 
						|
ofcourse don't forget to create an index on title_idx, column string, else
 | 
						|
you won't notice much speedup :)
 | 
						|
 | 
						|
After populating 'product', try something like:
 | 
						|
 | 
						|
select p.* from product p, title_fti f1, title_fti f2 where
 | 
						|
	f1.string='slippery' and f2.string='wet' and f1.id=f2.id and p.oid=f1.id;
 | 
						|
*/
 | 
						|
 | 
						|
/*
 | 
						|
    march 4 1998 Changed breakup() to return less substrings. Only breakup
 | 
						|
	             in word parts which are in turn shortened from the start
 | 
						|
				 of the word (ie. word, ord, rd)
 | 
						|
				 Did allocation of substring buffer outside of breakup()
 | 
						|
	oct. 5 1997, fixed a bug in string breakup (where there are more nonalpha
 | 
						|
				 characters between words then 1).
 | 
						|
 | 
						|
	oct 4-5 1997 implemented the thing, at least the basic functionallity
 | 
						|
				 of it all....
 | 
						|
*/
 | 
						|
 | 
						|
/* IMPROVEMENTS:
 | 
						|
 | 
						|
   save a plan for deletes
 | 
						|
   create a function that will make the index *after* we have populated
 | 
						|
   the main table (probably first delete all contents to be sure there's
 | 
						|
   nothing in it, then re-populate the fti-table)
 | 
						|
 | 
						|
   can we do something with operator overloading or a seperate function
 | 
						|
   that can build the final query automatigally?
 | 
						|
   */
 | 
						|
 | 
						|
HeapTuple	fti(void);
 | 
						|
char *breakup(char*, char*);
 | 
						|
bool is_stopword(char*);
 | 
						|
 | 
						|
bool new_tuple = false;
 | 
						|
 | 
						|
 | 
						|
/* THIS LIST MUST BE IN SORTED ORDER, A BINARY SEARCH IS USED!!!! */
 | 
						|
char *StopWords[] = { 		/* list of words to skip in indexing */
 | 
						|
#ifdef SAMPLE_STOP_WORDS
 | 
						|
	"no"
 | 
						|
	"the",
 | 
						|
	"yes",
 | 
						|
#endif
 | 
						|
};
 | 
						|
 | 
						|
/* stuff for caching query-plans, stolen from contrib/spi/\*.c */
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	char	   *ident;
 | 
						|
	int			nplans;
 | 
						|
	void	  **splan;
 | 
						|
}			EPlan;
 | 
						|
 | 
						|
static EPlan *InsertPlans = NULL;
 | 
						|
static EPlan *DeletePlans = NULL;
 | 
						|
static int	nInsertPlans = 0;
 | 
						|
static int  nDeletePlans = 0;
 | 
						|
 | 
						|
static EPlan *find_plan(char *ident, EPlan ** eplan, int *nplans);
 | 
						|
 | 
						|
/***********************************************************************/
 | 
						|
HeapTuple
 | 
						|
fti()
 | 
						|
{
 | 
						|
	Trigger		*trigger;	/* to get trigger name */
 | 
						|
	int			nargs;		/* # of arguments */
 | 
						|
	char		**args;		/* arguments */
 | 
						|
	char		*relname;	/* triggered relation name */
 | 
						|
	Relation	rel;		/* triggered relation */
 | 
						|
	char		*indexname;	/* name of table for substrings */
 | 
						|
	HeapTuple	rettuple = NULL;
 | 
						|
	TupleDesc	tupdesc;	/* tuple description */
 | 
						|
	bool		isinsert=false;
 | 
						|
	bool		isdelete=false;
 | 
						|
	int			ret;
 | 
						|
	char		query[8192];
 | 
						|
	Oid			oid;
 | 
						|
	/*
 | 
						|
	  FILE		*debug;
 | 
						|
	  */
 | 
						|
 | 
						|
	/*
 | 
						|
	  debug = fopen("/dev/xconsole", "w");
 | 
						|
	  fprintf(debug, "FTI: entered function\n");
 | 
						|
	  fflush(debug);
 | 
						|
	  */
 | 
						|
 | 
						|
	if (!CurrentTriggerData)
 | 
						|
		elog(ERROR, "Full Text Indexing: triggers are not initialized");
 | 
						|
	if (TRIGGER_FIRED_FOR_STATEMENT(CurrentTriggerData->tg_event))
 | 
						|
		elog(ERROR, "Full Text Indexing: can't process STATEMENT events");
 | 
						|
	if (TRIGGER_FIRED_BEFORE(CurrentTriggerData->tg_event))
 | 
						|
		elog(ERROR, "Full Text Indexing: must be fired AFTER event");
 | 
						|
 | 
						|
	if (TRIGGER_FIRED_BY_INSERT(CurrentTriggerData->tg_event))
 | 
						|
		isinsert=true;
 | 
						|
	if (TRIGGER_FIRED_BY_UPDATE(CurrentTriggerData->tg_event))
 | 
						|
	{ isdelete=true;isinsert=true;}
 | 
						|
	if (TRIGGER_FIRED_BY_DELETE(CurrentTriggerData->tg_event))
 | 
						|
		isdelete=true;
 | 
						|
 | 
						|
	trigger = CurrentTriggerData->tg_trigger;
 | 
						|
	rel = CurrentTriggerData->tg_relation;
 | 
						|
	relname = SPI_getrelname(rel);
 | 
						|
	rettuple=CurrentTriggerData->tg_trigtuple;
 | 
						|
	if (isdelete&&isinsert) /* is an UPDATE */
 | 
						|
		rettuple=CurrentTriggerData->tg_newtuple;
 | 
						|
 | 
						|
	CurrentTriggerData = NULL; /* invalidate 'normal' calls to this function */
 | 
						|
 | 
						|
	if ((ret = SPI_connect()) <0)
 | 
						|
		elog(ERROR,"Full Text Indexing: SPI_connect failed, returned %d\n",ret);
 | 
						|
		
 | 
						|
	nargs = trigger->tgnargs;
 | 
						|
	if (nargs != 2)
 | 
						|
		elog(ERROR, "Full Text Indexing: trigger can only have 2 arguments");
 | 
						|
		
 | 
						|
	args = trigger->tgargs;
 | 
						|
	indexname = args[0];
 | 
						|
	tupdesc = rel->rd_att;	/* what the tuple looks like (?) */
 | 
						|
 | 
						|
	/* get oid of current tuple, needed by all, so place here */
 | 
						|
	oid = rettuple->t_oid;
 | 
						|
	if (!OidIsValid(oid))
 | 
						|
	    elog(ERROR,"Full Text Indexing: oid of current tuple is NULL");
 | 
						|
 | 
						|
	if (isdelete) {
 | 
						|
		void *pplan;
 | 
						|
		Oid *argtypes;
 | 
						|
		Datum values[1];
 | 
						|
		EPlan *plan;
 | 
						|
 | 
						|
		sprintf(query, "D%s$%s", args[0], args[1]);
 | 
						|
		plan = find_plan(query, &DeletePlans, &nDeletePlans);
 | 
						|
		if (plan->nplans <= 0) {
 | 
						|
			argtypes = (Oid *)palloc(sizeof(Oid));
 | 
						|
 | 
						|
			argtypes[0] = OIDOID;
 | 
						|
 | 
						|
			sprintf(query, "DELETE FROM %s WHERE id = $1", indexname);
 | 
						|
			pplan = SPI_prepare(query, 1, argtypes);
 | 
						|
			if (!pplan)
 | 
						|
				elog(ERROR, "Full Text Indexing: SPI_prepare returned NULL "
 | 
						|
					 "in delete");
 | 
						|
			pplan = SPI_saveplan(pplan);
 | 
						|
			if (pplan == NULL)
 | 
						|
				elog(ERROR, "Full Text Indexing: SPI_saveplan returned NULL "
 | 
						|
					 "in delete");
 | 
						|
 | 
						|
			plan->splan = (void **)malloc(sizeof(void*));
 | 
						|
			*(plan->splan) = pplan;
 | 
						|
			plan->nplans = 1;
 | 
						|
		}
 | 
						|
 | 
						|
		values[0] = oid;
 | 
						|
 | 
						|
		ret = SPI_execp(*(plan->splan), values, NULL, 0);
 | 
						|
		if (ret != SPI_OK_DELETE)
 | 
						|
			elog(ERROR, "Full Text Indexing: error executing plan in delete");
 | 
						|
	}
 | 
						|
	
 | 
						|
	if (isinsert) {
 | 
						|
		char *substring, *column;
 | 
						|
		void *pplan;
 | 
						|
		Oid  *argtypes;
 | 
						|
		Datum values[2];
 | 
						|
		int  colnum;
 | 
						|
		struct varlena *data;
 | 
						|
		EPlan *plan;
 | 
						|
 | 
						|
		sprintf(query, "I%s$%s", args[0], args[1]);
 | 
						|
		plan = find_plan(query, &InsertPlans, &nInsertPlans);
 | 
						|
 | 
						|
		/* no plan yet, so allocate mem for argtypes */
 | 
						|
		if (plan->nplans <= 0) {
 | 
						|
			argtypes = (Oid *)palloc(2*sizeof(Oid));
 | 
						|
 | 
						|
			argtypes[0]	=	VARCHAROID; /*create table t_name
 | 
						|
										        (string varchar, */
 | 
						|
			argtypes[1]	=	OIDOID;     /*       id     oid);    */
 | 
						|
 | 
						|
			/* prepare plan to gain speed */
 | 
						|
			sprintf(query, "INSERT INTO %s (string, id) VALUES ($1, $2)",
 | 
						|
					indexname);
 | 
						|
			pplan = SPI_prepare(query, 2, argtypes);
 | 
						|
			if (!pplan)
 | 
						|
				elog(ERROR, "Full Text Indexing: SPI_prepare returned NULL "
 | 
						|
					 "in insert");
 | 
						|
 | 
						|
			pplan = SPI_saveplan(pplan);
 | 
						|
			if (pplan == NULL)
 | 
						|
				elog(ERROR, "Full Text Indexing: SPI_saveplan returned NULL"
 | 
						|
					 " in insert");
 | 
						|
 | 
						|
			plan->splan = (void **)malloc(sizeof(void*));
 | 
						|
			*(plan->splan) = pplan;
 | 
						|
			plan->nplans = 1;
 | 
						|
		}
 | 
						|
		
 | 
						|
		
 | 
						|
		/* prepare plan for query */
 | 
						|
		colnum=SPI_fnumber(tupdesc, args[1]);
 | 
						|
		if (colnum == SPI_ERROR_NOATTRIBUTE)
 | 
						|
			elog(ERROR, "Full Text Indexing: column '%s' of '%s' not found",
 | 
						|
				 args[1], args[0]);
 | 
						|
		
 | 
						|
		/* Get the char* representation of the column with name args[1] */
 | 
						|
		column = SPI_getvalue(rettuple, tupdesc, colnum);
 | 
						|
		
 | 
						|
		if (column) { /* make sure we don't try to index NULL's */
 | 
						|
			char *buff;
 | 
						|
			char *string = column;
 | 
						|
			
 | 
						|
			while(*string != '\0') { /* placed 'really' inline. */
 | 
						|
				*string = tolower(*string); /* some compilers will choke */
 | 
						|
				string++;					/* on 'inline' keyword */
 | 
						|
			}
 | 
						|
 | 
						|
			data = (struct varlena*)palloc(sizeof(int32)+strlen(column)+1);
 | 
						|
			buff = palloc(strlen(column) + 1);
 | 
						|
			/* saves lots of calls in while-loop and in breakup()*/
 | 
						|
 | 
						|
			new_tuple=true;
 | 
						|
			while ((substring = breakup(column, buff))) {
 | 
						|
				int l;
 | 
						|
 | 
						|
				l = strlen(substring);
 | 
						|
 | 
						|
				data->vl_len = l+sizeof(int32);
 | 
						|
				memcpy(VARDATA(data), substring, l);
 | 
						|
				values[0] = PointerGetDatum(data);
 | 
						|
				values[1] = oid;
 | 
						|
				
 | 
						|
				ret = SPI_execp(*(plan->splan), values, NULL, 0);
 | 
						|
				if (ret != SPI_OK_INSERT)
 | 
						|
					elog(ERROR, "Full Text Indexing: error executing plan "
 | 
						|
						 "in insert");
 | 
						|
			}
 | 
						|
			pfree(buff);
 | 
						|
			pfree(data);
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	SPI_finish();
 | 
						|
	return (rettuple);
 | 
						|
}
 | 
						|
 | 
						|
char *breakup(char *string, char *substring)
 | 
						|
{
 | 
						|
	static char *last_start;
 | 
						|
	static char *cur_pos;
 | 
						|
 | 
						|
	if (new_tuple)
 | 
						|
	{
 | 
						|
		cur_pos=last_start=&string[strlen(string)-1];
 | 
						|
		new_tuple=false; /* don't initialize this next time */
 | 
						|
	}
 | 
						|
 | 
						|
	while (cur_pos > string) /* don't read before start of 'string' */
 | 
						|
	{
 | 
						|
		/* skip pieces at the end of a string that are not
 | 
						|
		   alfa-numeric (ie. 'string$%^&', last_start first points to
 | 
						|
		   '&', and after this to 'g' */
 | 
						|
		if (!isalnum((int)*last_start)) {
 | 
						|
			while (!isalnum((int)*last_start) &&
 | 
						|
				   last_start > string)
 | 
						|
				last_start--;
 | 
						|
			cur_pos=last_start;
 | 
						|
		}
 | 
						|
 | 
						|
		cur_pos--; /* substrings are at minimum 2 characters long */
 | 
						|
 | 
						|
		if (isalnum((int)*cur_pos))
 | 
						|
		{
 | 
						|
			/* Houston, we have a substring! :) */
 | 
						|
			memcpy(substring, cur_pos, last_start - cur_pos + 1);
 | 
						|
			substring[last_start-cur_pos+1]='\0';
 | 
						|
			if (!is_stopword(substring)) return substring;
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			last_start=cur_pos-1;
 | 
						|
			cur_pos = last_start;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return NULL; /* we've processed all of 'string' */
 | 
						|
}
 | 
						|
 | 
						|
/* copied from src/backend/parser/keywords.c and adjusted for our situation*/
 | 
						|
bool
 | 
						|
is_stopword(char *text)
 | 
						|
{
 | 
						|
	char **StopLow;		/* for list of stop-words */
 | 
						|
	char **StopHigh;
 | 
						|
	char **StopMiddle;
 | 
						|
    unsigned int         difference;
 | 
						|
 | 
						|
	StopLow = &StopWords[0];		/* initialize stuff for binary search */
 | 
						|
	StopHigh = endof(StopWords);
 | 
						|
 | 
						|
	if (lengthof(StopWords) == 0)
 | 
						|
		return false;
 | 
						|
		
 | 
						|
    while (StopLow <= StopHigh)
 | 
						|
    {
 | 
						|
        StopMiddle = StopLow + (StopHigh - StopLow) / 2;
 | 
						|
        difference = strcmp(*StopMiddle, text);
 | 
						|
        if (difference == 0)
 | 
						|
            return (true);
 | 
						|
        else if (difference < 0)
 | 
						|
            StopLow = StopMiddle + 1;
 | 
						|
        else
 | 
						|
            StopHigh = StopMiddle - 1;
 | 
						|
    }
 | 
						|
 | 
						|
    return (false);
 | 
						|
}
 | 
						|
 | 
						|
/* for caching of query plans, stolen from contrib/spi/\*.c */
 | 
						|
static EPlan *
 | 
						|
find_plan(char *ident, EPlan ** eplan, int *nplans)
 | 
						|
{
 | 
						|
	EPlan	   *newp;
 | 
						|
	int			i;
 | 
						|
 | 
						|
	if (*nplans > 0)
 | 
						|
	{
 | 
						|
		for (i = 0; i < *nplans; i++)
 | 
						|
		{
 | 
						|
			if (strcmp((*eplan)[i].ident, ident) == 0)
 | 
						|
				break;
 | 
						|
		}
 | 
						|
		if (i != *nplans)
 | 
						|
			return (*eplan + i);
 | 
						|
		*eplan = (EPlan *) realloc(*eplan, (i + 1) * sizeof(EPlan));
 | 
						|
		newp = *eplan + i;
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		newp = *eplan = (EPlan *) malloc(sizeof(EPlan));
 | 
						|
		(*nplans) = i = 0;
 | 
						|
	}
 | 
						|
 | 
						|
	newp->ident = (char *) malloc(strlen(ident) + 1);
 | 
						|
	strcpy(newp->ident, ident);
 | 
						|
	newp->nplans = 0;
 | 
						|
	newp->splan = NULL;
 | 
						|
	(*nplans)++;
 | 
						|
 | 
						|
	return (newp);
 | 
						|
}
 |