mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set when xmlSwitchEncoding is called. The parser can use the flag to reliably detect whether an encoding was already set via user override, BOM or other auto-detection. In this case, the encoding declaration won't be used to switch the encoding. Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding and ctxt->input->buf->encoder was used. Introduce private helper functions to switch encodings used by both the XML and HTML parser: - xmlDetectEncoding which skips over the BOM, allowing to remove the BOM checks from other encoding functions. - xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns about encoding mismatches. If users override the encoding, store the declared instead of the actual encoding in xmlDoc. In this case, the actual encoding is known and the raw value from the doc is more useful. Also use the input flags to store the ISO-8859-1 fallback state. Restrict the fallback to cases where no encoding was specified. (The fallback is only useful in recovery mode and these days broken UTF-8 is probably more likely than ISO-8859-1, so it might eventually be removed completely.) The 'charset' member of xmlParserCtxt is now unused. The 'encoding' member of xmlParserInput is now unused. The 'standalone' member of xmlParserInput is renamed to 'flags'. A new parser state XML_PARSER_XML_DECL is added for the push parser.
This commit is contained in:
337
parser.c
337
parser.c
@@ -281,7 +281,7 @@ xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
||||
*
|
||||
* Handle a warning.
|
||||
*/
|
||||
static void LIBXML_ATTR_FORMAT(3,0)
|
||||
void LIBXML_ATTR_FORMAT(3,0)
|
||||
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
||||
const char *msg, const xmlChar *str1, const xmlChar *str2)
|
||||
{
|
||||
@@ -2313,6 +2313,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
|
||||
return;
|
||||
case XML_PARSER_PROLOG:
|
||||
case XML_PARSER_START:
|
||||
case XML_PARSER_XML_DECL:
|
||||
case XML_PARSER_MISC:
|
||||
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
|
||||
return;
|
||||
@@ -6682,7 +6683,6 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
|
||||
void
|
||||
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
|
||||
xmlChar *version;
|
||||
const xmlChar *encoding;
|
||||
int oldstate;
|
||||
|
||||
/*
|
||||
@@ -6721,7 +6721,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
|
||||
/*
|
||||
* We must have the encoding declaration
|
||||
*/
|
||||
encoding = xmlParseEncodingDecl(ctxt);
|
||||
xmlParseEncodingDecl(ctxt);
|
||||
if (ctxt->instate == XML_PARSER_EOF)
|
||||
return;
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
@@ -6731,10 +6731,6 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
|
||||
ctxt->instate = oldstate;
|
||||
return;
|
||||
}
|
||||
if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
|
||||
xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
|
||||
"Missing encoding in text declaration\n");
|
||||
}
|
||||
|
||||
SKIP_BLANKS;
|
||||
if ((RAW == '?') && (NXT(1) == '>')) {
|
||||
@@ -6773,21 +6769,8 @@ void
|
||||
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
|
||||
const xmlChar *SystemID) {
|
||||
xmlDetectSAX2(ctxt);
|
||||
GROW;
|
||||
|
||||
if ((ctxt->encoding == NULL) &&
|
||||
(ctxt->input->end - ctxt->input->cur >= 4)) {
|
||||
xmlChar start[4];
|
||||
xmlCharEncoding enc;
|
||||
|
||||
start[0] = RAW;
|
||||
start[1] = NXT(1);
|
||||
start[2] = NXT(2);
|
||||
start[3] = NXT(3);
|
||||
enc = xmlDetectCharEncoding(start, 4);
|
||||
if (enc != XML_CHAR_ENCODING_NONE)
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
}
|
||||
xmlDetectEncoding(ctxt);
|
||||
|
||||
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
|
||||
xmlParseTextDecl(ctxt);
|
||||
@@ -7727,8 +7710,6 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
|
||||
"Internal: %%%s; is not a parameter entity\n",
|
||||
name, NULL);
|
||||
} else {
|
||||
xmlChar start[4];
|
||||
xmlCharEncoding enc;
|
||||
unsigned long parentConsumed;
|
||||
xmlEntityPtr oldEnt;
|
||||
|
||||
@@ -7769,28 +7750,7 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
|
||||
input->parentConsumed = parentConsumed;
|
||||
|
||||
if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
|
||||
/*
|
||||
* Get the 4 first bytes and decode the charset
|
||||
* if enc != XML_CHAR_ENCODING_NONE
|
||||
* plug some encoding conversion routines.
|
||||
* Note that, since we may have some non-UTF8
|
||||
* encoding (like UTF16, bug 135229), the 'length'
|
||||
* is not known, but we can calculate based upon
|
||||
* the amount of data in the buffer.
|
||||
*/
|
||||
GROW
|
||||
if (ctxt->instate == XML_PARSER_EOF)
|
||||
return;
|
||||
if ((ctxt->input->end - ctxt->input->cur)>=4) {
|
||||
start[0] = RAW;
|
||||
start[1] = NXT(1);
|
||||
start[2] = NXT(2);
|
||||
start[3] = NXT(3);
|
||||
enc = xmlDetectCharEncoding(start, 4);
|
||||
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
}
|
||||
}
|
||||
xmlDetectEncoding(ctxt);
|
||||
|
||||
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
|
||||
(IS_BLANK_CH(NXT(5)))) {
|
||||
@@ -10094,101 +10054,45 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
|
||||
xmlChar *encoding = NULL;
|
||||
|
||||
SKIP_BLANKS;
|
||||
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
|
||||
SKIP(8);
|
||||
SKIP_BLANKS;
|
||||
if (RAW != '=') {
|
||||
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
|
||||
return(NULL);
|
||||
}
|
||||
NEXT;
|
||||
SKIP_BLANKS;
|
||||
if (RAW == '"') {
|
||||
NEXT;
|
||||
encoding = xmlParseEncName(ctxt);
|
||||
if (RAW != '"') {
|
||||
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
|
||||
xmlFree((xmlChar *) encoding);
|
||||
return(NULL);
|
||||
} else
|
||||
NEXT;
|
||||
} else if (RAW == '\''){
|
||||
NEXT;
|
||||
encoding = xmlParseEncName(ctxt);
|
||||
if (RAW != '\'') {
|
||||
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
|
||||
xmlFree((xmlChar *) encoding);
|
||||
return(NULL);
|
||||
} else
|
||||
NEXT;
|
||||
} else {
|
||||
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
|
||||
}
|
||||
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
|
||||
return(NULL);
|
||||
|
||||
/*
|
||||
* Non standard parsing, allowing the user to ignore encoding
|
||||
*/
|
||||
if (ctxt->options & XML_PARSE_IGNORE_ENC) {
|
||||
xmlFree((xmlChar *) encoding);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* UTF-16 encoding switch has already taken place at this stage,
|
||||
* more over the little-endian/big-endian selection is already done
|
||||
*/
|
||||
if ((encoding != NULL) &&
|
||||
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
|
||||
(!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
|
||||
/*
|
||||
* If no encoding was passed to the parser, that we are
|
||||
* using UTF-16 and no decoder is present i.e. the
|
||||
* document is apparently UTF-8 compatible, then raise an
|
||||
* encoding mismatch fatal error
|
||||
*/
|
||||
if ((ctxt->encoding == NULL) &&
|
||||
(ctxt->input->buf != NULL) &&
|
||||
(ctxt->input->buf->encoder == NULL)) {
|
||||
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
|
||||
"Document labelled UTF-16 but has UTF-8 content\n");
|
||||
}
|
||||
if (ctxt->encoding != NULL)
|
||||
xmlFree((xmlChar *) ctxt->encoding);
|
||||
ctxt->encoding = encoding;
|
||||
}
|
||||
/*
|
||||
* UTF-8 encoding is handled natively
|
||||
*/
|
||||
else if ((encoding != NULL) &&
|
||||
((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
|
||||
(!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
|
||||
/* TODO: Check for encoding mismatch. */
|
||||
if (ctxt->encoding != NULL)
|
||||
xmlFree((xmlChar *) ctxt->encoding);
|
||||
ctxt->encoding = encoding;
|
||||
}
|
||||
else if (encoding != NULL) {
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
|
||||
if (ctxt->input->encoding != NULL)
|
||||
xmlFree((xmlChar *) ctxt->input->encoding);
|
||||
ctxt->input->encoding = encoding;
|
||||
|
||||
handler = xmlFindCharEncodingHandler((const char *) encoding);
|
||||
if (handler != NULL) {
|
||||
if (xmlSwitchToEncoding(ctxt, handler) < 0) {
|
||||
/* failed to convert */
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
return(NULL);
|
||||
}
|
||||
} else {
|
||||
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"Unsupported encoding %s\n", encoding);
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
SKIP(8);
|
||||
SKIP_BLANKS;
|
||||
if (RAW != '=') {
|
||||
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
|
||||
return(NULL);
|
||||
}
|
||||
return(encoding);
|
||||
NEXT;
|
||||
SKIP_BLANKS;
|
||||
if (RAW == '"') {
|
||||
NEXT;
|
||||
encoding = xmlParseEncName(ctxt);
|
||||
if (RAW != '"') {
|
||||
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
|
||||
xmlFree((xmlChar *) encoding);
|
||||
return(NULL);
|
||||
} else
|
||||
NEXT;
|
||||
} else if (RAW == '\''){
|
||||
NEXT;
|
||||
encoding = xmlParseEncName(ctxt);
|
||||
if (RAW != '\'') {
|
||||
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
|
||||
xmlFree((xmlChar *) encoding);
|
||||
return(NULL);
|
||||
} else
|
||||
NEXT;
|
||||
} else {
|
||||
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
|
||||
}
|
||||
|
||||
if (encoding == NULL)
|
||||
return(NULL);
|
||||
|
||||
xmlSetDeclaredEncoding(ctxt, encoding);
|
||||
|
||||
return(ctxt->encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -10365,7 +10269,7 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
|
||||
/*
|
||||
* We may have the standalone status.
|
||||
*/
|
||||
if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
|
||||
if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
|
||||
if ((RAW == '?') && (NXT(1) == '>')) {
|
||||
SKIP(2);
|
||||
return;
|
||||
@@ -10443,9 +10347,6 @@ xmlParseMisc(xmlParserCtxtPtr ctxt) {
|
||||
|
||||
int
|
||||
xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
||||
xmlChar start[4];
|
||||
xmlCharEncoding enc;
|
||||
|
||||
xmlInitParser();
|
||||
|
||||
if ((ctxt == NULL) || (ctxt->input == NULL))
|
||||
@@ -10466,23 +10367,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
||||
if (ctxt->instate == XML_PARSER_EOF)
|
||||
return(-1);
|
||||
|
||||
if ((ctxt->encoding == NULL) &&
|
||||
((ctxt->input->end - ctxt->input->cur) >= 4)) {
|
||||
/*
|
||||
* Get the 4 first bytes and decode the charset
|
||||
* if enc != XML_CHAR_ENCODING_NONE
|
||||
* plug some encoding conversion routines.
|
||||
*/
|
||||
start[0] = RAW;
|
||||
start[1] = NXT(1);
|
||||
start[2] = NXT(2);
|
||||
start[3] = NXT(3);
|
||||
enc = xmlDetectCharEncoding(&start[0], 4);
|
||||
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
}
|
||||
}
|
||||
|
||||
xmlDetectEncoding(ctxt);
|
||||
|
||||
if (CUR == 0) {
|
||||
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
|
||||
@@ -10626,38 +10511,18 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
||||
|
||||
int
|
||||
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
|
||||
xmlChar start[4];
|
||||
xmlCharEncoding enc;
|
||||
|
||||
if ((ctxt == NULL) || (ctxt->input == NULL))
|
||||
return(-1);
|
||||
|
||||
xmlDetectSAX2(ctxt);
|
||||
|
||||
GROW;
|
||||
|
||||
/*
|
||||
* SAX: beginning of the document processing.
|
||||
*/
|
||||
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
|
||||
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
|
||||
|
||||
/*
|
||||
* Get the 4 first bytes and decode the charset
|
||||
* if enc != XML_CHAR_ENCODING_NONE
|
||||
* plug some encoding conversion routines.
|
||||
*/
|
||||
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
|
||||
start[0] = RAW;
|
||||
start[1] = NXT(1);
|
||||
start[2] = NXT(2);
|
||||
start[3] = NXT(3);
|
||||
enc = xmlDetectCharEncoding(start, 4);
|
||||
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
}
|
||||
}
|
||||
|
||||
xmlDetectEncoding(ctxt);
|
||||
|
||||
if (CUR == 0) {
|
||||
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
|
||||
@@ -11076,6 +10941,9 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
||||
case XML_PARSER_START:
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"PP: try START\n"); break;
|
||||
case XML_PARSER_XML_DECL:
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"PP: try XML_DECL\n"); break;
|
||||
case XML_PARSER_MISC:
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"PP: try MISC\n");break;
|
||||
@@ -11164,39 +11032,25 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
||||
*/
|
||||
goto done;
|
||||
case XML_PARSER_START:
|
||||
if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
|
||||
xmlChar start[4];
|
||||
xmlCharEncoding enc;
|
||||
/*
|
||||
* Very first chars read from the document flow.
|
||||
*/
|
||||
if (avail < 4)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Very first chars read from the document flow.
|
||||
*/
|
||||
if (avail < 4)
|
||||
goto done;
|
||||
/*
|
||||
* We need more bytes to detect EBCDIC code pages.
|
||||
* See xmlDetectEBCDIC.
|
||||
*/
|
||||
if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
|
||||
(!terminate) && (avail < 200))
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Get the 4 first bytes and decode the charset
|
||||
* if enc != XML_CHAR_ENCODING_NONE
|
||||
* plug some encoding conversion routines,
|
||||
* else xmlSwitchEncoding will set to (default)
|
||||
* UTF8.
|
||||
*/
|
||||
start[0] = RAW;
|
||||
start[1] = NXT(1);
|
||||
start[2] = NXT(2);
|
||||
start[3] = NXT(3);
|
||||
enc = xmlDetectCharEncoding(start, 4);
|
||||
/*
|
||||
* We need more bytes to detect EBCDIC code pages.
|
||||
* See xmlDetectEBCDIC.
|
||||
*/
|
||||
if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
|
||||
(!terminate) && (avail < 200))
|
||||
goto done;
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
break;
|
||||
}
|
||||
xmlDetectEncoding(ctxt);
|
||||
ctxt->instate = XML_PARSER_XML_DECL;
|
||||
break;
|
||||
|
||||
case XML_PARSER_XML_DECL:
|
||||
if (avail < 2)
|
||||
goto done;
|
||||
cur = ctxt->input->cur[0];
|
||||
@@ -11242,9 +11096,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
||||
xmlHaltParser(ctxt);
|
||||
return(0);
|
||||
}
|
||||
if ((ctxt->encoding == NULL) &&
|
||||
(ctxt->input->encoding != NULL))
|
||||
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
|
||||
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
|
||||
(!ctxt->disableSAX))
|
||||
ctxt->sax->startDocument(ctxt->userData);
|
||||
@@ -11978,13 +11829,6 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
|
||||
xmlBufResetInput(inputStream->buf->buffer, inputStream);
|
||||
inputPush(ctxt, inputStream);
|
||||
|
||||
/*
|
||||
* If the caller didn't provide an initial 'chunk' for determining
|
||||
* the encoding, we set the context to XML_CHAR_ENCODING_NONE so
|
||||
* that it can be automatically determined later
|
||||
*/
|
||||
ctxt->charset = XML_CHAR_ENCODING_NONE;
|
||||
|
||||
if ((size != 0) && (chunk != NULL) &&
|
||||
(ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
|
||||
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
|
||||
@@ -12092,7 +11936,6 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
|
||||
xmlDtdPtr ret = NULL;
|
||||
xmlParserCtxtPtr ctxt;
|
||||
xmlParserInputPtr pinput = NULL;
|
||||
xmlChar start[4];
|
||||
|
||||
if (input == NULL)
|
||||
return(NULL);
|
||||
@@ -12150,22 +11993,7 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
|
||||
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
|
||||
BAD_CAST "none", BAD_CAST "none");
|
||||
|
||||
if ((enc == XML_CHAR_ENCODING_NONE) &&
|
||||
((ctxt->input->end - ctxt->input->cur) >= 4)) {
|
||||
/*
|
||||
* Get the 4 first bytes and decode the charset
|
||||
* if enc != XML_CHAR_ENCODING_NONE
|
||||
* plug some encoding conversion routines.
|
||||
*/
|
||||
start[0] = RAW;
|
||||
start[1] = NXT(1);
|
||||
start[2] = NXT(2);
|
||||
start[3] = NXT(3);
|
||||
enc = xmlDetectCharEncoding(start, 4);
|
||||
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
}
|
||||
}
|
||||
xmlDetectEncoding(ctxt);
|
||||
|
||||
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
|
||||
|
||||
@@ -12213,7 +12041,6 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
|
||||
xmlDtdPtr ret = NULL;
|
||||
xmlParserCtxtPtr ctxt;
|
||||
xmlParserInputPtr input = NULL;
|
||||
xmlCharEncoding enc;
|
||||
xmlChar* systemIdCanonic;
|
||||
|
||||
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
|
||||
@@ -12258,10 +12085,8 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
|
||||
xmlFree(systemIdCanonic);
|
||||
return(NULL);
|
||||
}
|
||||
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
|
||||
enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
}
|
||||
|
||||
xmlDetectEncoding(ctxt);
|
||||
|
||||
if (input->filename == NULL)
|
||||
input->filename = (char *) systemIdCanonic;
|
||||
@@ -12399,8 +12224,6 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
|
||||
xmlDocPtr newDoc;
|
||||
xmlNodePtr newRoot;
|
||||
xmlParserErrors ret = XML_ERR_OK;
|
||||
xmlChar start[4];
|
||||
xmlCharEncoding enc;
|
||||
|
||||
if (((depth > 40) &&
|
||||
((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
|
||||
@@ -12461,22 +12284,7 @@ xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
|
||||
newRoot->doc = doc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the 4 first bytes and decode the charset
|
||||
* if enc != XML_CHAR_ENCODING_NONE
|
||||
* plug some encoding conversion routines.
|
||||
*/
|
||||
GROW;
|
||||
if ((ctxt->input->end - ctxt->input->cur) >= 4) {
|
||||
start[0] = RAW;
|
||||
start[1] = NXT(1);
|
||||
start[2] = NXT(2);
|
||||
start[3] = NXT(3);
|
||||
enc = xmlDetectCharEncoding(start, 4);
|
||||
if (enc != XML_CHAR_ENCODING_NONE) {
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
}
|
||||
}
|
||||
xmlDetectEncoding(ctxt);
|
||||
|
||||
/*
|
||||
* Parse a possible text declaration first
|
||||
@@ -12963,10 +12771,6 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
|
||||
if (doc->encoding != NULL) {
|
||||
xmlCharEncodingHandlerPtr hdlr;
|
||||
|
||||
if (ctxt->encoding != NULL)
|
||||
xmlFree((xmlChar *) ctxt->encoding);
|
||||
ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
|
||||
|
||||
hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
|
||||
if (hdlr != NULL) {
|
||||
xmlSwitchToEncoding(ctxt, hdlr);
|
||||
@@ -14273,7 +14077,6 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
|
||||
ctxt->inSubset = 0;
|
||||
ctxt->errNo = XML_ERR_OK;
|
||||
ctxt->depth = 0;
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
ctxt->catalogs = NULL;
|
||||
ctxt->sizeentities = 0;
|
||||
ctxt->sizeentcopy = 0;
|
||||
@@ -14374,10 +14177,6 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
|
||||
if (encoding != NULL) {
|
||||
xmlCharEncodingHandlerPtr hdlr;
|
||||
|
||||
if (ctxt->encoding != NULL)
|
||||
xmlFree((xmlChar *) ctxt->encoding);
|
||||
ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
|
||||
|
||||
hdlr = xmlFindCharEncodingHandler(encoding);
|
||||
if (hdlr != NULL) {
|
||||
xmlSwitchToEncoding(ctxt, hdlr);
|
||||
|
||||
Reference in New Issue
Block a user