mirror of
https://github.com/postgres/postgres.git
synced 2025-04-29 13:56:47 +03:00
Allow XML fragment to contain a XML declaration. For that, we need a small
hand-crafted parser for the XML declaration, because libxml doesn't seem to allow this.
This commit is contained in:
parent
324297dfbb
commit
53a11befc8
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.11 2007/01/06 19:18:36 petere Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.12 2007/01/07 00:13:55 petere Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -489,6 +489,122 @@ xml_init(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SQL/XML allows storing "XML documents" or "XML content". "XML
|
||||||
|
* documents" are specified by the XML specification and are parsed
|
||||||
|
* easily by libxml. "XML content" is specified by SQL/XML as the
|
||||||
|
* production "XMLDecl? content". But libxml can only parse the
|
||||||
|
* "content" part, so we have to parse the XML declaration ourselves
|
||||||
|
* to complete this.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define CHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED
|
||||||
|
#define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
|
||||||
|
|
||||||
|
static int
|
||||||
|
parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone)
|
||||||
|
{
|
||||||
|
const xmlChar *p;
|
||||||
|
const xmlChar *save_p;
|
||||||
|
|
||||||
|
p = str;
|
||||||
|
|
||||||
|
if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
|
||||||
|
goto finished;
|
||||||
|
|
||||||
|
p += 5;
|
||||||
|
|
||||||
|
/* version */
|
||||||
|
CHECK_XML_SPACE(p);
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (xmlStrncmp(p, (xmlChar *)"version", 7) != 0)
|
||||||
|
return XML_ERR_VERSION_MISSING;
|
||||||
|
p += 7;
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (*p != '=')
|
||||||
|
return XML_ERR_VERSION_MISSING;
|
||||||
|
p += 1;
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0)
|
||||||
|
return XML_ERR_VERSION_MISSING;
|
||||||
|
p += 5;
|
||||||
|
|
||||||
|
/* encoding */
|
||||||
|
save_p = p;
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (xmlStrncmp(p, (xmlChar *)"encoding", 8) == 0)
|
||||||
|
{
|
||||||
|
CHECK_XML_SPACE(save_p);
|
||||||
|
p += 8;
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (*p != '=')
|
||||||
|
return XML_ERR_MISSING_ENCODING;
|
||||||
|
p += 1;
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
|
||||||
|
if (*p == '\'' || *p == '"')
|
||||||
|
{
|
||||||
|
const xmlChar *q;
|
||||||
|
|
||||||
|
q = xmlStrchr(p + 1, *p);
|
||||||
|
if (!q)
|
||||||
|
return XML_ERR_MISSING_ENCODING;
|
||||||
|
|
||||||
|
*encoding = xmlStrndup(p + 1, q - p - 1);
|
||||||
|
p = q + 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return XML_ERR_MISSING_ENCODING;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p = save_p;
|
||||||
|
*encoding = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* standalone */
|
||||||
|
save_p = p;
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (xmlStrncmp(p, (xmlChar *)"standalone", 10) == 0)
|
||||||
|
{
|
||||||
|
CHECK_XML_SPACE(save_p);
|
||||||
|
p += 10;
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (*p != '=')
|
||||||
|
return XML_ERR_STANDALONE_VALUE;
|
||||||
|
p += 1;
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (xmlStrncmp(p, (xmlChar *)"'yes'", 5) == 0 || xmlStrncmp(p, (xmlChar *)"\"yes\"", 5) == 0)
|
||||||
|
{
|
||||||
|
*standalone = 1;
|
||||||
|
p += 5;
|
||||||
|
}
|
||||||
|
else if (xmlStrncmp(p, (xmlChar *)"'no'", 4) == 0 || xmlStrncmp(p, (xmlChar *)"\"no\"", 4) == 0)
|
||||||
|
{
|
||||||
|
*standalone = 0;
|
||||||
|
p += 4;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return XML_ERR_STANDALONE_VALUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p = save_p;
|
||||||
|
*standalone = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SKIP_XML_SPACE(p);
|
||||||
|
if (xmlStrncmp(p, (xmlChar *)"?>", 2) != 0)
|
||||||
|
return XML_ERR_XMLDECL_NOT_FINISHED;
|
||||||
|
p += 2;
|
||||||
|
|
||||||
|
finished:
|
||||||
|
if (len)
|
||||||
|
*len = (p - str);
|
||||||
|
return XML_ERR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convert a C string to XML internal representation
|
* Convert a C string to XML internal representation
|
||||||
*
|
*
|
||||||
@ -536,19 +652,23 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
size_t count;
|
||||||
|
xmlChar *encoding = NULL;
|
||||||
|
int standalone = -1;
|
||||||
|
|
||||||
doc = xmlNewDoc(NULL);
|
doc = xmlNewDoc(NULL);
|
||||||
|
|
||||||
/*
|
res_code = parse_xml_decl(string, &count, &encoding, &standalone);
|
||||||
* FIXME: An XMLDecl is supposed to be accepted before the
|
|
||||||
* content, but libxml doesn't allow this. Parse that
|
|
||||||
* ourselves?
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
|
/* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
|
||||||
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL);
|
if (res_code == 0)
|
||||||
|
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL);
|
||||||
if (res_code != 0)
|
if (res_code != 0)
|
||||||
xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
|
xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
|
||||||
"invalid XML content", res_code);
|
"invalid XML content", res_code);
|
||||||
|
|
||||||
|
doc->encoding = encoding;
|
||||||
|
doc->standalone = standalone;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO encoding issues
|
/* TODO encoding issues
|
||||||
|
Loading…
x
Reference in New Issue
Block a user