mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-29 22:49:41 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			353 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			353 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /********************************************************
 | |
|  * Interface code to parse an XML document using expat
 | |
|  ********************************************************/
 | |
| 
 | |
| #include "postgres.h"
 | |
| #include "fmgr.h"
 | |
| 
 | |
| #include "expat.h"
 | |
| #include "pgxml.h"
 | |
| 
 | |
| /* Memory management - we make expat use standard pg MM */
 | |
| 
 | |
| XML_Memory_Handling_Suite mhs;
 | |
| 
 | |
| /* passthrough functions (palloc is a macro) */
 | |
| 
 | |
| static void *
 | |
| pgxml_palloc(size_t size)
 | |
| {
 | |
| 	return palloc(size);
 | |
| }
 | |
| 
 | |
| static void *
 | |
| pgxml_repalloc(void *ptr, size_t size)
 | |
| {
 | |
| 	return repalloc(ptr, size);
 | |
| }
 | |
| 
 | |
| static void
 | |
| pgxml_pfree(void *ptr)
 | |
| {
 | |
| 	return pfree(ptr);
 | |
| }
 | |
| 
 | |
| static void
 | |
| pgxml_mhs_init()
 | |
| {
 | |
| 	mhs.malloc_fcn = pgxml_palloc;
 | |
| 	mhs.realloc_fcn = pgxml_repalloc;
 | |
| 	mhs.free_fcn = pgxml_pfree;
 | |
| }
 | |
| 
 | |
| static void
 | |
| pgxml_handler_init()
 | |
| {
 | |
| 	/*
 | |
| 	 * This code should set up the relevant handlers from  user-supplied
 | |
| 	 * settings. Quite how these settings are made is another matter :)
 | |
| 	 */
 | |
| }
 | |
| 
 | |
| /* Returns true if document is well-formed */
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(pgxml_parse);
 | |
| 
 | |
| Datum
 | |
| pgxml_parse(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	/* called as pgxml_parse(document) */
 | |
| 	XML_Parser	p;
 | |
| 	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
 | |
| 	int32		docsize = VARSIZE(t) - VARHDRSZ;
 | |
| 
 | |
| 	pgxml_mhs_init();
 | |
| 
 | |
| 	pgxml_handler_init();
 | |
| 
 | |
| 	p = XML_ParserCreate_MM(NULL, &mhs, NULL);
 | |
| 	if (!p)
 | |
| 	{
 | |
| 		ereport(ERROR,
 | |
| 				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
 | |
| 				 errmsg("could not create expat parser")));
 | |
| 		PG_RETURN_NULL();		/* seems appropriate if we couldn't parse */
 | |
| 	}
 | |
| 
 | |
| 	if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
 | |
| 	{
 | |
| 		/*
 | |
| 		 * elog(WARNING, "Parse error at line %d:%s",
 | |
| 		 * XML_GetCurrentLineNumber(p),
 | |
| 		 * XML_ErrorString(XML_GetErrorCode(p)));
 | |
| 		 */
 | |
| 		XML_ParserFree(p);
 | |
| 		PG_RETURN_BOOL(false);
 | |
| 	}
 | |
| 
 | |
| 	XML_ParserFree(p);
 | |
| 	PG_RETURN_BOOL(true);
 | |
| }
 | |
| 
 | |
| /* XPath handling functions */
 | |
| 
 | |
| /* XPath support here is for a very skeletal kind of XPath!
 | |
|    It was easy to program though... */
 | |
| 
 | |
| /* This first is the core function that builds a result set. The
 | |
|    actual functions called by the user manipulate that result set
 | |
|    in various ways.
 | |
| */
 | |
| 
 | |
| static XPath_Results *
 | |
| build_xpath_results(text *doc, text *pathstr)
 | |
| {
 | |
| 	XPath_Results *xpr;
 | |
| 	char	   *res;
 | |
| 	pgxml_udata *udata;
 | |
| 	XML_Parser	p;
 | |
| 	int32		docsize;
 | |
| 
 | |
| 	xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
 | |
| 	memset((void *) xpr, 0, sizeof(XPath_Results));
 | |
| 	xpr->rescount = 0;
 | |
| 
 | |
| 	docsize = VARSIZE(doc) - VARHDRSZ;
 | |
| 
 | |
| 	/* res isn't going to be the real return type, it is just a buffer */
 | |
| 
 | |
| 	res = (char *) palloc(docsize);
 | |
| 	memset((void *) res, 0, docsize);
 | |
| 
 | |
| 	xpr->resbuf = res;
 | |
| 
 | |
| 	udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
 | |
| 	memset((void *) udata, 0, sizeof(pgxml_udata));
 | |
| 
 | |
| 	udata->currentpath[0] = '\0';
 | |
| 	udata->textgrab = 0;
 | |
| 
 | |
| 	udata->path = (char *) palloc(VARSIZE(pathstr));
 | |
| 	memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
 | |
| 
 | |
| 	udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
 | |
| 
 | |
| 	udata->resptr = res;
 | |
| 	udata->reslen = 0;
 | |
| 
 | |
| 	udata->xpres = xpr;
 | |
| 
 | |
| 	/* Now fire up the parser */
 | |
| 	pgxml_mhs_init();
 | |
| 
 | |
| 	p = XML_ParserCreate_MM(NULL, &mhs, NULL);
 | |
| 	if (!p)
 | |
| 	{
 | |
| 		ereport(ERROR,
 | |
| 				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
 | |
| 				 errmsg("could not create expat parser")));
 | |
| 		pfree(xpr);
 | |
| 		pfree(udata->path);
 | |
| 		pfree(udata);
 | |
| 		pfree(res);
 | |
| 		return NULL;
 | |
| 	}
 | |
| 	XML_SetUserData(p, (void *) udata);
 | |
| 
 | |
| 	/* Set the handlers */
 | |
| 
 | |
| 	XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
 | |
| 	XML_SetCharacterDataHandler(p, pgxml_charhandler);
 | |
| 
 | |
| 	if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
 | |
| 	{
 | |
| 		/*
 | |
| 		 * elog(WARNING, "Parse error at line %d:%s",
 | |
| 		 * XML_GetCurrentLineNumber(p),
 | |
| 		 * XML_ErrorString(XML_GetErrorCode(p)));
 | |
| 		 */
 | |
| 		XML_ParserFree(p);
 | |
| 		pfree(xpr);
 | |
| 		pfree(udata->path);
 | |
| 		pfree(udata);
 | |
| 
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	pfree(udata->path);
 | |
| 	pfree(udata);
 | |
| 	XML_ParserFree(p);
 | |
| 	return xpr;
 | |
| }
 | |
| 
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(pgxml_xpath);
 | |
| 
 | |
| Datum
 | |
| pgxml_xpath(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	/* called as pgxml_xpath(document,pathstr, index) for the moment */
 | |
| 
 | |
| 	XPath_Results *xpresults;
 | |
| 	text	   *restext;
 | |
| 
 | |
| 	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
 | |
| 	text	   *t2 = PG_GETARG_TEXT_P(1);
 | |
| 	int32		ind = PG_GETARG_INT32(2) - 1;
 | |
| 
 | |
| 	xpresults = build_xpath_results(t, t2);
 | |
| 
 | |
| 	/*
 | |
| 	 * This needs to be changed depending on the mechanism for returning
 | |
| 	 * our set of results.
 | |
| 	 */
 | |
| 
 | |
| 	if (xpresults == NULL)		/* parse error (not WF or parser failure) */
 | |
| 		PG_RETURN_NULL();
 | |
| 
 | |
| 	if (ind >= (xpresults->rescount))
 | |
| 		PG_RETURN_NULL();
 | |
| 
 | |
| 	restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
 | |
| 	memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
 | |
| 
 | |
| 	VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
 | |
| 
 | |
| 	pfree(xpresults->resbuf);
 | |
| 	pfree(xpresults);
 | |
| 
 | |
| 	PG_RETURN_TEXT_P(restext);
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| pgxml_pathcompare(void *userData)
 | |
| {
 | |
| 	char	   *matchpos;
 | |
| 
 | |
| 	matchpos = strstr(UD->currentpath, UD->path);
 | |
| 
 | |
| 	if (matchpos == NULL)
 | |
| 	{							/* Should we have more logic here ? */
 | |
| 		if (UD->textgrab)
 | |
| 		{
 | |
| 			UD->textgrab = 0;
 | |
| 			pgxml_finalisegrabbedtext(userData);
 | |
| 		}
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * OK, we have a match of some sort. Now we need to check that our
 | |
| 	 * match is anchored to the *end* of the string AND that it is
 | |
| 	 * immediately preceded by a '/'
 | |
| 	 */
 | |
| 
 | |
| 	/*
 | |
| 	 * This test wouldn't work if strlen (UD->path) overran the length of
 | |
| 	 * the currentpath, but that's not possible because we got a match!
 | |
| 	 */
 | |
| 
 | |
| 	if ((matchpos + strlen(UD->path))[0] == '\0')
 | |
| 	{
 | |
| 		if ((UD->path)[0] == '/')
 | |
| 		{
 | |
| 			if (matchpos == UD->currentpath)
 | |
| 				UD->textgrab = 1;
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 			if ((matchpos - 1)[0] == '/')
 | |
| 				UD->textgrab = 1;
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static void
 | |
| pgxml_starthandler(void *userData, const XML_Char * name,
 | |
| 				   const XML_Char ** atts)
 | |
| {
 | |
| 
 | |
| 	char		sepstr[] = "/";
 | |
| 
 | |
| 	if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
 | |
| 		elog(WARNING, "path too long");
 | |
| 	else
 | |
| 	{
 | |
| 		strncat(UD->currentpath, sepstr, 1);
 | |
| 		strcat(UD->currentpath, name);
 | |
| 	}
 | |
| 	if (UD->textgrab)
 | |
| 	{
 | |
| 		/*
 | |
| 		 * Depending on user preference, should we "reconstitute" the
 | |
| 		 * element into the result text?
 | |
| 		 */
 | |
| 	}
 | |
| 	else
 | |
| 		pgxml_pathcompare(userData);
 | |
| }
 | |
| 
 | |
| static void
 | |
| pgxml_endhandler(void *userData, const XML_Char * name)
 | |
| {
 | |
| 	/*
 | |
| 	 * Start by removing the current element off the end of the
 | |
| 	 * currentpath
 | |
| 	 */
 | |
| 
 | |
| 	char	   *sepptr;
 | |
| 
 | |
| 	sepptr = strrchr(UD->currentpath, '/');
 | |
| 	if (sepptr == NULL)
 | |
| 	{
 | |
| 		/* internal error */
 | |
| 		elog(ERROR, "did not find '/'");
 | |
| 		sepptr = UD->currentpath;
 | |
| 	}
 | |
| 	if (strcmp(name, sepptr + 1) != 0)
 | |
| 	{
 | |
| 		elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
 | |
| 		/* unmatched entry, so do nothing */
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		sepptr[0] = '\0';		/* Chop that element off the end */
 | |
| 	}
 | |
| 
 | |
| 	if (UD->textgrab)
 | |
| 		pgxml_pathcompare(userData);
 | |
| 
 | |
| }
 | |
| 
 | |
| static void
 | |
| pgxml_charhandler(void *userData, const XML_Char * s, int len)
 | |
| {
 | |
| 	if (UD->textgrab)
 | |
| 	{
 | |
| 		if (len > 0)
 | |
| 		{
 | |
| 			memcpy(UD->resptr, s, len);
 | |
| 			UD->resptr += len;
 | |
| 			UD->reslen += len;
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /* Should I be using PG list types here? */
 | |
| 
 | |
| static void
 | |
| pgxml_finalisegrabbedtext(void *userData)
 | |
| {
 | |
| 	/* In res/reslen, we have a single result. */
 | |
| 	UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
 | |
| 	UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
 | |
| 	UD->reslen = 0;
 | |
| 	UD->xpres->rescount++;
 | |
| 
 | |
| 	/*
 | |
| 	 * This effectively concatenates all the results together but we do
 | |
| 	 * know where one ends and the next begins
 | |
| 	 */
 | |
| }
 |