mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-30 22:43:14 +03:00
- parser.c parserInternals.c encoding.c: Since Notepad on Win2k
outputs a BOM in UTF8, an errata has been issued to avoid the problem, that was the most reasonable solution... Add support for a leading UTF8 BOM in entities. Daniel
This commit is contained in:
@ -1,3 +1,10 @@
|
|||||||
|
Wed Jun 20 19:37:25 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
|
||||||
|
|
||||||
|
* parser.c parserInternals.c encoding.c: Since Notepad on Win2k
|
||||||
|
outputs a BOM in UTF8, an errata has been issued to avoid the
|
||||||
|
problem, that was the most reasonable solution... Add support
|
||||||
|
for a leading UTF8 BOM in entities.
|
||||||
|
|
||||||
Wed Jun 20 15:38:59 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
|
Wed Jun 20 15:38:59 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
|
||||||
|
|
||||||
* valid.c: fixed a bug found when post validating an entity ref
|
* valid.c: fixed a bug found when post validating an entity ref
|
||||||
|
@ -1131,6 +1131,15 @@ xmlDetectCharEncoding(const unsigned char* in, int len)
|
|||||||
(in[2] == 0x78) && (in[3] == 0x6D))
|
(in[2] == 0x78) && (in[3] == 0x6D))
|
||||||
return(XML_CHAR_ENCODING_UTF8);
|
return(XML_CHAR_ENCODING_UTF8);
|
||||||
}
|
}
|
||||||
|
if (len >= 3) {
|
||||||
|
/*
|
||||||
|
* Errata on XML-1.0 June 20 2001
|
||||||
|
* We now allow an UTF8 encoded BOM
|
||||||
|
*/
|
||||||
|
if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
|
||||||
|
(in[2] == 0xBF))
|
||||||
|
return(XML_CHAR_ENCODING_UTF8);
|
||||||
|
}
|
||||||
if (len >= 2) {
|
if (len >= 2) {
|
||||||
if ((in[0] == 0xFE) && (in[1] == 0xFF))
|
if ((in[0] == 0xFE) && (in[1] == 0xFF))
|
||||||
return(XML_CHAR_ENCODING_UTF16BE);
|
return(XML_CHAR_ENCODING_UTF16BE);
|
||||||
|
73
parser.c
73
parser.c
@ -768,6 +768,9 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
|||||||
} else {
|
} else {
|
||||||
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
|
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
|
||||||
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
|
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
|
||||||
|
xmlChar start[4];
|
||||||
|
xmlCharEncoding enc;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* handle the extra spaces added before and after
|
* handle the extra spaces added before and after
|
||||||
* c.f. http://www.w3.org/TR/REC-xml#as-PE
|
* c.f. http://www.w3.org/TR/REC-xml#as-PE
|
||||||
@ -775,6 +778,22 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
|||||||
*/
|
*/
|
||||||
input = xmlNewEntityInputStream(ctxt, entity);
|
input = xmlNewEntityInputStream(ctxt, entity);
|
||||||
xmlPushInput(ctxt, input);
|
xmlPushInput(ctxt, input);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the 4 first bytes and decode the charset
|
||||||
|
* if enc != XML_CHAR_ENCODING_NONE
|
||||||
|
* plug some encoding conversion routines.
|
||||||
|
*/
|
||||||
|
GROW
|
||||||
|
start[0] = RAW;
|
||||||
|
start[1] = NXT(1);
|
||||||
|
start[2] = NXT(2);
|
||||||
|
start[3] = NXT(3);
|
||||||
|
enc = xmlDetectCharEncoding(start, 4);
|
||||||
|
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||||
|
xmlSwitchEncoding(ctxt, enc);
|
||||||
|
}
|
||||||
|
|
||||||
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
|
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
|
||||||
(RAW == '<') && (NXT(1) == '?') &&
|
(RAW == '<') && (NXT(1) == '?') &&
|
||||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||||
@ -8585,6 +8604,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
|
|||||||
xmlDtdPtr ret = NULL;
|
xmlDtdPtr ret = NULL;
|
||||||
xmlParserCtxtPtr ctxt;
|
xmlParserCtxtPtr ctxt;
|
||||||
xmlParserInputPtr pinput = NULL;
|
xmlParserInputPtr pinput = NULL;
|
||||||
|
xmlChar start[4];
|
||||||
|
|
||||||
if (input == NULL)
|
if (input == NULL)
|
||||||
return(NULL);
|
return(NULL);
|
||||||
@ -8634,6 +8654,23 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
|
|||||||
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
|
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
|
||||||
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
|
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
|
||||||
BAD_CAST "none", BAD_CAST "none");
|
BAD_CAST "none", BAD_CAST "none");
|
||||||
|
|
||||||
|
if (enc == XML_CHAR_ENCODING_NONE) {
|
||||||
|
/*
|
||||||
|
* Get the 4 first bytes and decode the charset
|
||||||
|
* if enc != XML_CHAR_ENCODING_NONE
|
||||||
|
* plug some encoding conversion routines.
|
||||||
|
*/
|
||||||
|
start[0] = RAW;
|
||||||
|
start[1] = NXT(1);
|
||||||
|
start[2] = NXT(2);
|
||||||
|
start[3] = NXT(3);
|
||||||
|
enc = xmlDetectCharEncoding(start, 4);
|
||||||
|
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||||
|
xmlSwitchEncoding(ctxt, enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
|
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
|
||||||
|
|
||||||
if (ctxt->myDoc != NULL) {
|
if (ctxt->myDoc != NULL) {
|
||||||
@ -8785,6 +8822,8 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
|
|||||||
xmlDocPtr newDoc;
|
xmlDocPtr newDoc;
|
||||||
xmlSAXHandlerPtr oldsax = NULL;
|
xmlSAXHandlerPtr oldsax = NULL;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
xmlChar start[4];
|
||||||
|
xmlCharEncoding enc;
|
||||||
|
|
||||||
if (ctx->depth > 40) {
|
if (ctx->depth > 40) {
|
||||||
return(XML_ERR_ENTITY_LOOP);
|
return(XML_ERR_ENTITY_LOOP);
|
||||||
@ -8832,10 +8871,24 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
|
|||||||
newDoc->children->doc = ctx->myDoc;
|
newDoc->children->doc = ctx->myDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the 4 first bytes and decode the charset
|
||||||
|
* if enc != XML_CHAR_ENCODING_NONE
|
||||||
|
* plug some encoding conversion routines.
|
||||||
|
*/
|
||||||
|
GROW
|
||||||
|
start[0] = RAW;
|
||||||
|
start[1] = NXT(1);
|
||||||
|
start[2] = NXT(2);
|
||||||
|
start[3] = NXT(3);
|
||||||
|
enc = xmlDetectCharEncoding(start, 4);
|
||||||
|
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||||
|
xmlSwitchEncoding(ctxt, enc);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Parse a possible text declaration first
|
* Parse a possible text declaration first
|
||||||
*/
|
*/
|
||||||
GROW;
|
|
||||||
if ((RAW == '<') && (NXT(1) == '?') &&
|
if ((RAW == '<') && (NXT(1) == '?') &&
|
||||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||||
@ -8946,6 +8999,8 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
|
|||||||
xmlDocPtr newDoc;
|
xmlDocPtr newDoc;
|
||||||
xmlSAXHandlerPtr oldsax = NULL;
|
xmlSAXHandlerPtr oldsax = NULL;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
xmlChar start[4];
|
||||||
|
xmlCharEncoding enc;
|
||||||
|
|
||||||
if (depth > 40) {
|
if (depth > 40) {
|
||||||
return(XML_ERR_ENTITY_LOOP);
|
return(XML_ERR_ENTITY_LOOP);
|
||||||
@ -9015,10 +9070,24 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
|
|||||||
newDoc->children->doc = doc;
|
newDoc->children->doc = doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the 4 first bytes and decode the charset
|
||||||
|
* if enc != XML_CHAR_ENCODING_NONE
|
||||||
|
* plug some encoding conversion routines.
|
||||||
|
*/
|
||||||
|
GROW;
|
||||||
|
start[0] = RAW;
|
||||||
|
start[1] = NXT(1);
|
||||||
|
start[2] = NXT(2);
|
||||||
|
start[3] = NXT(3);
|
||||||
|
enc = xmlDetectCharEncoding(start, 4);
|
||||||
|
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||||
|
xmlSwitchEncoding(ctxt, enc);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Parse a possible text declaration first
|
* Parse a possible text declaration first
|
||||||
*/
|
*/
|
||||||
GROW;
|
|
||||||
if ((RAW == '<') && (NXT(1) == '?') &&
|
if ((RAW == '<') && (NXT(1) == '?') &&
|
||||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||||
|
@ -1569,6 +1569,17 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
|||||||
case XML_CHAR_ENCODING_UTF8:
|
case XML_CHAR_ENCODING_UTF8:
|
||||||
/* default encoding, no conversion should be needed */
|
/* default encoding, no conversion should be needed */
|
||||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Errata on XML-1.0 June 20 2001
|
||||||
|
* Specific handling of the Byte Order Mark for
|
||||||
|
* UTF-8
|
||||||
|
*/
|
||||||
|
if ((ctxt->input->cur[0] == 0xEF) &&
|
||||||
|
(ctxt->input->cur[1] == 0xBB) &&
|
||||||
|
(ctxt->input->cur[2] == 0xBF)) {
|
||||||
|
ctxt->input->cur += 3;
|
||||||
|
}
|
||||||
return(0);
|
return(0);
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@ -1739,6 +1750,18 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
|||||||
(ctxt->input->cur[1] == 0xFF)) {
|
(ctxt->input->cur[1] == 0xFF)) {
|
||||||
ctxt->input->cur += 2;
|
ctxt->input->cur += 2;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* Errata on XML-1.0 June 20 2001
|
||||||
|
* Specific handling of the Byte Order Mark for
|
||||||
|
* UTF-8
|
||||||
|
*/
|
||||||
|
if ((handler->name != NULL) &&
|
||||||
|
(!strcmp(handler->name, "UTF-8")) &&
|
||||||
|
(ctxt->input->cur[0] == 0xEF) &&
|
||||||
|
(ctxt->input->cur[1] == 0xBB) &&
|
||||||
|
(ctxt->input->cur[1] == 0xBF)) {
|
||||||
|
ctxt->input->cur += 3;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Shring the current input buffer.
|
* Shring the current input buffer.
|
||||||
|
Reference in New Issue
Block a user