diff --git a/include/libxml/parser.h b/include/libxml/parser.h index bc8f5f69..b6d6fcf6 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -111,7 +111,11 @@ struct _xmlParserInput { const char *directory XML_DEPRECATED_MEMBER; /* Base of the array to parse */ const xmlChar *base; - /* Current char being parsed */ + /** + * @deprecated Use #xmlCtxtGetInputWindow + * + * Current char being parsed + */ const xmlChar *cur; /* end of the array to parse */ const xmlChar *end; @@ -1949,6 +1953,12 @@ XMLPUBFUN int int *line, int *col, unsigned long *bytePos); +XMLPUBFUN int + xmlCtxtGetInputWindow (xmlParserCtxt *ctxt, + int inputIndex, + const xmlChar **startOut, + int *sizeInOut, + int *offsetOut); XMLPUBFUN void xmlCtxtSetErrorHandler (xmlParserCtxt *ctxt, xmlStructuredErrorFunc handler, diff --git a/parserInternals.c b/parserInternals.c index 03502500..e68af7a6 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -3436,13 +3436,60 @@ xmlCtxtGetInputPosition(xmlParserCtxt *ctxt, int inputIndex, unsigned long consumed; consumed = input->consumed; - xmlSaturatedAddSizeT(&consumed, input->end - input->base); + xmlSaturatedAddSizeT(&consumed, input->cur - input->base); *utf8BytePos = consumed; } return 0; } +/** + * Return window into input data. + * + * Should only be used by error handlers or SAX callbacks. + * The returned pointer is only valid until the callback returns. + * + * Because of entities, there can be multiple inputs. Non-negative + * values of `inputIndex` (0, 1, 2, ...) select inputs starting + * from the outermost input. Negative values (-1, -2, ...) select + * inputs starting from the innermost input. + * + * @since 2.15.0 + * + * @param ctxt parser context + * @param inputIndex input index + * @param startOut start of window (output) + * @param sizeInOut maximum size of window (in) + * actual size of window (out) + * @param offsetOut offset of current position inside + * window (out) + * @returns 0 on success, -1 if arguments are invalid + */ +int +xmlCtxtGetInputWindow(xmlParserCtxt *ctxt, int inputIndex, + const xmlChar **startOut, + int *sizeInOut, int *offsetOut) { + xmlParserInput *input; + + if (ctxt == NULL || startOut == NULL || sizeInOut == NULL || + offsetOut == NULL) + return -1; + + if (inputIndex < 0) { + inputIndex += ctxt->inputNr; + if (inputIndex < 0) + return -1; + } + if (inputIndex >= ctxt->inputNr) + return -1; + + input = ctxt->inputTab[inputIndex]; + + xmlParserInputGetWindow(input, startOut, sizeInOut, offsetOut); + + return 0; +} + /************************************************************************ * * * Handling of node information * diff --git a/testparser.c b/testparser.c index aa8c9243..a926611d 100644 --- a/testparser.c +++ b/testparser.c @@ -201,6 +201,75 @@ testInvalidCharRecovery(void) { return err; } +static void +testCtxtInputGetterError(void *errCtxt, const xmlError *error) { + int *err = errCtxt; + xmlParserCtxt *ctxt = error->ctxt; + const char *filename; + int line, col; + unsigned long bytePos; + const xmlChar *start; + int size, offset; + + xmlCtxtGetInputPosition(ctxt, 0, &filename, &line, &col, &bytePos); + + if (strcmp(filename, "test.xml") != 0 || + line != 4 || col != 11 || bytePos != 62) { + fprintf(stderr, "unexpected position: %s %d %d %lu\n", + filename, line, col, bytePos); + *err = 1; + } + + size = 80; + xmlCtxtGetInputWindow(ctxt, 0, &start, &size, &offset); + + if (strncmp((char *) start, "&ent;", 10) != 0 || + size != 16 || offset != 10) { + fprintf(stderr, "unexpected window: %.10s %d %d\n", + start, size, offset); + *err = 1; + } + + xmlCtxtGetInputPosition(ctxt, -1, &filename, &line, &col, &bytePos); + + if (filename != NULL || + line != 1 || col != 11 || bytePos != 10) { + fprintf(stderr, "unexpected position: %s %d %d %lu\n", + filename, line, col, bytePos); + *err = 1; + } + + size = 80; + xmlCtxtGetInputWindow(ctxt, -1, &start, &size, &offset); + + if (strncmp((char *) start, "xxx &fail;", 10) != 0 || + size != 14 || offset != 10) { + fprintf(stderr, "unexpected window: %.10s %d %d\n", + start, size, offset); + *err = 1; + } +} + +static int +testCtxtInputGetters(void) { + const char *xml = + "\n" + "]>\n" + "&ent;\n"; + xmlParserCtxt *ctxt; + xmlDoc *doc; + int err = 0; + + ctxt = xmlNewParserCtxt(); + xmlCtxtSetErrorHandler(ctxt, testCtxtInputGetterError, &err); + doc = xmlCtxtReadDoc(ctxt, BAD_CAST xml, "test.xml", NULL, 0); + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); + + return err; +} + #ifdef LIBXML_VALID_ENABLED static void testSwitchDtdExtSubset(void *vctxt, const xmlChar *name ATTRIBUTE_UNUSED, @@ -1428,6 +1497,7 @@ main(void) { err |= testCFileIO(); err |= testUndeclEntInContent(); err |= testInvalidCharRecovery(); + err |= testCtxtInputGetters(); #ifdef LIBXML_VALID_ENABLED err |= testSwitchDtd(); #endif