mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			353 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			353 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/********************************************************
 | 
						|
 * Interface code to parse an XML document using expat
 | 
						|
 ********************************************************/
 | 
						|
 | 
						|
#include "postgres.h"
 | 
						|
#include "fmgr.h"
 | 
						|
 | 
						|
#include "expat.h"
 | 
						|
#include "pgxml.h"
 | 
						|
 | 
						|
/* Memory management - we make expat use standard pg MM */
 | 
						|
 | 
						|
XML_Memory_Handling_Suite mhs;
 | 
						|
 | 
						|
/* passthrough functions (palloc is a macro) */
 | 
						|
 | 
						|
static void *
 | 
						|
pgxml_palloc(size_t size)
 | 
						|
{
 | 
						|
	return palloc(size);
 | 
						|
}
 | 
						|
 | 
						|
static void *
 | 
						|
pgxml_repalloc(void *ptr, size_t size)
 | 
						|
{
 | 
						|
	return repalloc(ptr, size);
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
pgxml_pfree(void *ptr)
 | 
						|
{
 | 
						|
	return pfree(ptr);
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
pgxml_mhs_init()
 | 
						|
{
 | 
						|
	mhs.malloc_fcn = pgxml_palloc;
 | 
						|
	mhs.realloc_fcn = pgxml_repalloc;
 | 
						|
	mhs.free_fcn = pgxml_pfree;
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
pgxml_handler_init()
 | 
						|
{
 | 
						|
	/*
 | 
						|
	 * This code should set up the relevant handlers from  user-supplied
 | 
						|
	 * settings. Quite how these settings are made is another matter :)
 | 
						|
	 */
 | 
						|
}
 | 
						|
 | 
						|
/* Returns true if document is well-formed */
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(pgxml_parse);
 | 
						|
 | 
						|
Datum
 | 
						|
pgxml_parse(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	/* called as pgxml_parse(document) */
 | 
						|
	XML_Parser	p;
 | 
						|
	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
 | 
						|
	int32		docsize = VARSIZE(t) - VARHDRSZ;
 | 
						|
 | 
						|
	pgxml_mhs_init();
 | 
						|
 | 
						|
	pgxml_handler_init();
 | 
						|
 | 
						|
	p = XML_ParserCreate_MM(NULL, &mhs, NULL);
 | 
						|
	if (!p)
 | 
						|
	{
 | 
						|
		ereport(ERROR,
 | 
						|
				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
 | 
						|
				 errmsg("could not create expat parser")));
 | 
						|
		PG_RETURN_NULL();		/* seems appropriate if we couldn't parse */
 | 
						|
	}
 | 
						|
 | 
						|
	if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
 | 
						|
	{
 | 
						|
		/*
 | 
						|
		 * elog(WARNING, "Parse error at line %d:%s",
 | 
						|
		 * XML_GetCurrentLineNumber(p),
 | 
						|
		 * XML_ErrorString(XML_GetErrorCode(p)));
 | 
						|
		 */
 | 
						|
		XML_ParserFree(p);
 | 
						|
		PG_RETURN_BOOL(false);
 | 
						|
	}
 | 
						|
 | 
						|
	XML_ParserFree(p);
 | 
						|
	PG_RETURN_BOOL(true);
 | 
						|
}
 | 
						|
 | 
						|
/* XPath handling functions */
 | 
						|
 | 
						|
/* XPath support here is for a very skeletal kind of XPath!
 | 
						|
   It was easy to program though... */
 | 
						|
 | 
						|
/* This first is the core function that builds a result set. The
 | 
						|
   actual functions called by the user manipulate that result set
 | 
						|
   in various ways.
 | 
						|
*/
 | 
						|
 | 
						|
static XPath_Results *
 | 
						|
build_xpath_results(text *doc, text *pathstr)
 | 
						|
{
 | 
						|
	XPath_Results *xpr;
 | 
						|
	char	   *res;
 | 
						|
	pgxml_udata *udata;
 | 
						|
	XML_Parser	p;
 | 
						|
	int32		docsize;
 | 
						|
 | 
						|
	xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
 | 
						|
	memset((void *) xpr, 0, sizeof(XPath_Results));
 | 
						|
	xpr->rescount = 0;
 | 
						|
 | 
						|
	docsize = VARSIZE(doc) - VARHDRSZ;
 | 
						|
 | 
						|
	/* res isn't going to be the real return type, it is just a buffer */
 | 
						|
 | 
						|
	res = (char *) palloc(docsize);
 | 
						|
	memset((void *) res, 0, docsize);
 | 
						|
 | 
						|
	xpr->resbuf = res;
 | 
						|
 | 
						|
	udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
 | 
						|
	memset((void *) udata, 0, sizeof(pgxml_udata));
 | 
						|
 | 
						|
	udata->currentpath[0] = '\0';
 | 
						|
	udata->textgrab = 0;
 | 
						|
 | 
						|
	udata->path = (char *) palloc(VARSIZE(pathstr));
 | 
						|
	memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
 | 
						|
 | 
						|
	udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
 | 
						|
 | 
						|
	udata->resptr = res;
 | 
						|
	udata->reslen = 0;
 | 
						|
 | 
						|
	udata->xpres = xpr;
 | 
						|
 | 
						|
	/* Now fire up the parser */
 | 
						|
	pgxml_mhs_init();
 | 
						|
 | 
						|
	p = XML_ParserCreate_MM(NULL, &mhs, NULL);
 | 
						|
	if (!p)
 | 
						|
	{
 | 
						|
		ereport(ERROR,
 | 
						|
				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
 | 
						|
				 errmsg("could not create expat parser")));
 | 
						|
		pfree(xpr);
 | 
						|
		pfree(udata->path);
 | 
						|
		pfree(udata);
 | 
						|
		pfree(res);
 | 
						|
		return NULL;
 | 
						|
	}
 | 
						|
	XML_SetUserData(p, (void *) udata);
 | 
						|
 | 
						|
	/* Set the handlers */
 | 
						|
 | 
						|
	XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
 | 
						|
	XML_SetCharacterDataHandler(p, pgxml_charhandler);
 | 
						|
 | 
						|
	if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
 | 
						|
	{
 | 
						|
		/*
 | 
						|
		 * elog(WARNING, "Parse error at line %d:%s",
 | 
						|
		 * XML_GetCurrentLineNumber(p),
 | 
						|
		 * XML_ErrorString(XML_GetErrorCode(p)));
 | 
						|
		 */
 | 
						|
		XML_ParserFree(p);
 | 
						|
		pfree(xpr);
 | 
						|
		pfree(udata->path);
 | 
						|
		pfree(udata);
 | 
						|
 | 
						|
		return NULL;
 | 
						|
	}
 | 
						|
 | 
						|
	pfree(udata->path);
 | 
						|
	pfree(udata);
 | 
						|
	XML_ParserFree(p);
 | 
						|
	return xpr;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(pgxml_xpath);
 | 
						|
 | 
						|
Datum
 | 
						|
pgxml_xpath(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	/* called as pgxml_xpath(document,pathstr, index) for the moment */
 | 
						|
 | 
						|
	XPath_Results *xpresults;
 | 
						|
	text	   *restext;
 | 
						|
 | 
						|
	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
 | 
						|
	text	   *t2 = PG_GETARG_TEXT_P(1);
 | 
						|
	int32		ind = PG_GETARG_INT32(2) - 1;
 | 
						|
 | 
						|
	xpresults = build_xpath_results(t, t2);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * This needs to be changed depending on the mechanism for returning
 | 
						|
	 * our set of results.
 | 
						|
	 */
 | 
						|
 | 
						|
	if (xpresults == NULL)		/* parse error (not WF or parser failure) */
 | 
						|
		PG_RETURN_NULL();
 | 
						|
 | 
						|
	if (ind >= (xpresults->rescount))
 | 
						|
		PG_RETURN_NULL();
 | 
						|
 | 
						|
	restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
 | 
						|
	memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
 | 
						|
 | 
						|
	VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
 | 
						|
 | 
						|
	pfree(xpresults->resbuf);
 | 
						|
	pfree(xpresults);
 | 
						|
 | 
						|
	PG_RETURN_TEXT_P(restext);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
pgxml_pathcompare(void *userData)
 | 
						|
{
 | 
						|
	char	   *matchpos;
 | 
						|
 | 
						|
	matchpos = strstr(UD->currentpath, UD->path);
 | 
						|
 | 
						|
	if (matchpos == NULL)
 | 
						|
	{							/* Should we have more logic here ? */
 | 
						|
		if (UD->textgrab)
 | 
						|
		{
 | 
						|
			UD->textgrab = 0;
 | 
						|
			pgxml_finalisegrabbedtext(userData);
 | 
						|
		}
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * OK, we have a match of some sort. Now we need to check that our
 | 
						|
	 * match is anchored to the *end* of the string AND that it is
 | 
						|
	 * immediately preceded by a '/'
 | 
						|
	 */
 | 
						|
 | 
						|
	/*
 | 
						|
	 * This test wouldn't work if strlen (UD->path) overran the length of
 | 
						|
	 * the currentpath, but that's not possible because we got a match!
 | 
						|
	 */
 | 
						|
 | 
						|
	if ((matchpos + strlen(UD->path))[0] == '\0')
 | 
						|
	{
 | 
						|
		if ((UD->path)[0] == '/')
 | 
						|
		{
 | 
						|
			if (matchpos == UD->currentpath)
 | 
						|
				UD->textgrab = 1;
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			if ((matchpos - 1)[0] == '/')
 | 
						|
				UD->textgrab = 1;
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
pgxml_starthandler(void *userData, const XML_Char * name,
 | 
						|
				   const XML_Char ** atts)
 | 
						|
{
 | 
						|
 | 
						|
	char		sepstr[] = "/";
 | 
						|
 | 
						|
	if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
 | 
						|
		elog(WARNING, "path too long");
 | 
						|
	else
 | 
						|
	{
 | 
						|
		strncat(UD->currentpath, sepstr, 1);
 | 
						|
		strcat(UD->currentpath, name);
 | 
						|
	}
 | 
						|
	if (UD->textgrab)
 | 
						|
	{
 | 
						|
		/*
 | 
						|
		 * Depending on user preference, should we "reconstitute" the
 | 
						|
		 * element into the result text?
 | 
						|
		 */
 | 
						|
	}
 | 
						|
	else
 | 
						|
		pgxml_pathcompare(userData);
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
pgxml_endhandler(void *userData, const XML_Char * name)
 | 
						|
{
 | 
						|
	/*
 | 
						|
	 * Start by removing the current element off the end of the
 | 
						|
	 * currentpath
 | 
						|
	 */
 | 
						|
 | 
						|
	char	   *sepptr;
 | 
						|
 | 
						|
	sepptr = strrchr(UD->currentpath, '/');
 | 
						|
	if (sepptr == NULL)
 | 
						|
	{
 | 
						|
		/* internal error */
 | 
						|
		elog(ERROR, "did not find '/'");
 | 
						|
		sepptr = UD->currentpath;
 | 
						|
	}
 | 
						|
	if (strcmp(name, sepptr + 1) != 0)
 | 
						|
	{
 | 
						|
		elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
 | 
						|
		/* unmatched entry, so do nothing */
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		sepptr[0] = '\0';		/* Chop that element off the end */
 | 
						|
	}
 | 
						|
 | 
						|
	if (UD->textgrab)
 | 
						|
		pgxml_pathcompare(userData);
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
pgxml_charhandler(void *userData, const XML_Char * s, int len)
 | 
						|
{
 | 
						|
	if (UD->textgrab)
 | 
						|
	{
 | 
						|
		if (len > 0)
 | 
						|
		{
 | 
						|
			memcpy(UD->resptr, s, len);
 | 
						|
			UD->resptr += len;
 | 
						|
			UD->reslen += len;
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/* Should I be using PG list types here? */
 | 
						|
 | 
						|
static void
 | 
						|
pgxml_finalisegrabbedtext(void *userData)
 | 
						|
{
 | 
						|
	/* In res/reslen, we have a single result. */
 | 
						|
	UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
 | 
						|
	UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
 | 
						|
	UD->reslen = 0;
 | 
						|
	UD->xpres->rescount++;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * This effectively concatenates all the results together but we do
 | 
						|
	 * know where one ends and the next begins
 | 
						|
	 */
 | 
						|
}
 |