From 43b511fa714df875dc4f40d108061eede0d4d76b Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 26 Nov 2023 14:31:39 +0100 Subject: [PATCH] parser: Make CRLF increment line number Partial revert of cb927e85 fixing CRLFs not incrementing the line number. This requires to rework xmlParseQNameHashed. The original implementation prompted the change to xmlCurrentChar which really shouldn't modify the 'cur' pointer as side effect. But the NEXTL macro relies on this behavior. Ultimately, we should reintroduce the change to xmlCurrentChar and fix the NEXTL macro. This will lead to single CRs incrementing the line number as well which seems more consistent. Fixes #628. --- parser.c | 15 +++++++++------ parserInternals.c | 9 ++++++++- result/errors/name3.xml.ent | 3 +++ result/errors/name3.xml.err | 3 +++ result/errors/name3.xml.str | 4 ++++ test/errors/name3.xml | 1 + 6 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 result/errors/name3.xml.ent create mode 100644 result/errors/name3.xml.err create mode 100644 result/errors/name3.xml.str create mode 100644 test/errors/name3.xml diff --git a/parser.c b/parser.c index 6b0eb421..4843a728 100644 --- a/parser.c +++ b/parser.c @@ -8949,7 +8949,7 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt) { static xmlHashedString xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) { xmlHashedString l, p; - int start; + int start, isNCName = 0; l.name = NULL; p.name = NULL; @@ -8960,10 +8960,13 @@ xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) { start = CUR_PTR - BASE_PTR; l = xmlParseNCName(ctxt); - if ((l.name != NULL) && (CUR == ':')) { - NEXT; - p = l; - l = xmlParseNCName(ctxt); + if (l.name != NULL) { + isNCName = 1; + if (CUR == ':') { + NEXT; + p = l; + l = xmlParseNCName(ctxt); + } } if ((l.name == NULL) || (CUR == ':')) { xmlChar *tmp; @@ -8972,7 +8975,7 @@ xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) { p.name = NULL; if (ctxt->instate == XML_PARSER_EOF) return(l); - if ((CUR != ':') && (CUR_PTR <= BASE_PTR + start)) + if ((isNCName == 0) && (CUR != ':')) return(l); tmp = xmlParseNmtoken(ctxt); if (tmp != NULL) diff --git a/parserInternals.c b/parserInternals.c index 29deebe4..1eff6c71 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -849,7 +849,14 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { * the single character #xA. */ if (c == '\r') { - *len = ((cur[1] == '\n') ? 2 : 1); + /* + * TODO: This function shouldn't change the 'cur' pointer + * as side effect, but the NEXTL macro in parser.c relies + * on this behavior when incrementing line numbers. + */ + if (cur[1] == '\n') + ctxt->input->cur++; + *len = 1; c = '\n'; } else if (c == 0) { if (ctxt->input->cur >= ctxt->input->end) { diff --git a/result/errors/name3.xml.ent b/result/errors/name3.xml.ent new file mode 100644 index 00000000..3925f9da --- /dev/null +++ b/result/errors/name3.xml.ent @@ -0,0 +1,3 @@ +./test/errors/name3.xml:1: parser error : StartTag: invalid element name +<.name/> + ^ diff --git a/result/errors/name3.xml.err b/result/errors/name3.xml.err new file mode 100644 index 00000000..3925f9da --- /dev/null +++ b/result/errors/name3.xml.err @@ -0,0 +1,3 @@ +./test/errors/name3.xml:1: parser error : StartTag: invalid element name +<.name/> + ^ diff --git a/result/errors/name3.xml.str b/result/errors/name3.xml.str new file mode 100644 index 00000000..460b31a3 --- /dev/null +++ b/result/errors/name3.xml.str @@ -0,0 +1,4 @@ +./test/errors/name3.xml:1: parser error : StartTag: invalid element name +<.name/> + ^ +./test/errors/name3.xml : failed to parse diff --git a/test/errors/name3.xml b/test/errors/name3.xml new file mode 100644 index 00000000..2f5f96e7 --- /dev/null +++ b/test/errors/name3.xml @@ -0,0 +1 @@ +<.name/>