From bd9d5e39ec31e935bb79308c0c83f9d03ef4b6f7 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Wed, 9 Jul 2025 13:10:31 +0200 Subject: [PATCH] parser: Fix handling of invalid char refs in recovery mode Revert to the old behavior which handles invalid char refs more gracefully. Probably regressed with 37c6618b (version 2.13.0). --- parser.c | 7 +++---- testparser.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/parser.c b/parser.c index 67a5be8c..f3cb9afa 100644 --- a/parser.c +++ b/parser.c @@ -2628,14 +2628,13 @@ xmlParseCharRef(xmlParserCtxt *ctxt) { xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, "xmlParseCharRef: character reference out of bounds\n", val); - } else if (IS_CHAR(val)) { - return(val); - } else { + val = 0xFFFD; + } else if (!IS_CHAR(val)) { xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, "xmlParseCharRef: invalid xmlChar value %d\n", val); } - return(0); + return(val); } /** diff --git a/testparser.c b/testparser.c index 4a9568da..aa8c9243 100644 --- a/testparser.c +++ b/testparser.c @@ -183,6 +183,24 @@ testUndeclEntInContent(void) { return err; } +static int +testInvalidCharRecovery(void) { + const char *xml = ""; + xmlDoc *doc; + int err = 0; + + doc = xmlReadDoc(BAD_CAST xml, NULL, NULL, XML_PARSE_RECOVER); + + if (strcmp((char *) doc->children->children->content, "\x10") != 0) { + fprintf(stderr, "Failed to recover from invalid char ref\n"); + err = 1; + } + + xmlFreeDoc(doc); + + return err; +} + #ifdef LIBXML_VALID_ENABLED static void testSwitchDtdExtSubset(void *vctxt, const xmlChar *name ATTRIBUTE_UNUSED, @@ -1409,6 +1427,7 @@ main(void) { err |= testNodeGetContent(); err |= testCFileIO(); err |= testUndeclEntInContent(); + err |= testInvalidCharRecovery(); #ifdef LIBXML_VALID_ENABLED err |= testSwitchDtd(); #endif