From 7e0bbbc143e02ec5f4f90ee1cfe8b1578d3f4af6 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Wed, 27 Dec 2023 18:33:30 +0100 Subject: [PATCH] parser: New input API Provide a new set of functions to create xmlParserInputs. These can be used for the document entity or from external entity loaders. - Don't require xmlParserInputBuffer. - All functions take a base URI. - All functions take an encoding as string. - xmlNewInputURL also takes a public ID. - xmlNewInputMemory takes a size_t. - Optimization hints for memory buffers. Improve documentation. Only call xmlInitParser before allocating a new parser context. Call xmlCtxtUseOptions as early as possible. --- HTMLparser.c | 728 ++++++++++++++----------------- include/libxml/HTMLparser.h | 2 + include/libxml/parser.h | 42 ++ include/private/io.h | 6 +- include/private/parser.h | 4 + parser.c | 827 +++++++++++++++--------------------- parserInternals.c | 444 +++++++++++++++---- xmlIO.c | 110 ++--- xmllint.c | 2 - 9 files changed, 1151 insertions(+), 1014 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index 15cf91bb..b9a257bd 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2243,45 +2243,6 @@ htmlEncodeEntities(unsigned char* out, int *outlen, return(0); } -/************************************************************************ - * * - * Commodity functions to handle streams * - * * - ************************************************************************/ - -#ifdef LIBXML_PUSH_ENABLED -/** - * htmlNewInputStream: - * @ctxt: an HTML parser context - * - * Create a new input stream structure - * Returns the new input stream or NULL - */ -static htmlParserInputPtr -htmlNewInputStream(htmlParserCtxtPtr ctxt) { - htmlParserInputPtr input; - - input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput)); - if (input == NULL) { - htmlErrMemory(ctxt); - return(NULL); - } - memset(input, 0, sizeof(htmlParserInput)); - input->filename = NULL; - input->base = NULL; - input->cur = NULL; - input->buf = NULL; - input->line = 1; - input->col = 1; - input->buf = NULL; - input->free = NULL; - input->version = NULL; - input->consumed = 0; - return(input); -} -#endif - - /************************************************************************ * * * Commodity functions, cleanup needed ? * @@ -3272,12 +3233,11 @@ htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) { } /** - * xmlParsePI: - * @ctxt: an XML parser context + * htmlParsePI: + * @ctxt: an HTML parser context * - * parse an XML Processing Instruction. - * - * [16] PI ::= '' Char*)))? '?>' + * Parse an XML Processing Instruction. HTML5 doesn't allow processing + * instructions, so this will be removed at some point. */ static void htmlParsePI(htmlParserCtxtPtr ctxt) { @@ -3388,9 +3348,7 @@ done: * htmlParseComment: * @ctxt: an HTML parser context * - * Parse an XML (SGML) comment - * - * [15] Comment ::= '' + * Parse an HTML comment */ static void htmlParseComment(htmlParserCtxtPtr ctxt) { @@ -4781,19 +4739,17 @@ __htmlParseContent(void *ctxt) { * htmlParseDocument: * @ctxt: an HTML parser context * - * parse an HTML document (and build a tree if using the standard SAX - * interface). + * Parse an HTML document and invoke the SAX handlers. This is useful + * if you're only interested in custom SAX callbacks. If you want a + * document tree, use htmlCtxtParseDocument. * - * Returns 0, -1 in case of error. the parser context is augmented - * as a result of the parsing. + * Returns 0, -1 in case of error. */ int htmlParseDocument(htmlParserCtxtPtr ctxt) { xmlDtdPtr dtd; - xmlInitParser(); - if ((ctxt == NULL) || (ctxt->input == NULL)) return(-1); @@ -5007,7 +4963,19 @@ htmlFreeParserCtxt(htmlParserCtxtPtr ctxt) /** * htmlNewParserCtxt: * - * Allocate and initialize a new parser context. + * Allocate and initialize a new HTML parser context. + * + * This can be used to parse HTML documents into DOM trees with + * functions like xmlCtxtReadFile or xmlCtxtReadMemory. + * + * See htmlCtxtUseOptions for parser options. + * + * See xmlCtxtSetErrorHandler for advanced error handling. + * + * See xmlNewInputURL, xmlNewInputMemory, xmlNewInputIO and similar + * functions for advanced input control. + * + * See htmlNewSAXParserCtxt for custom SAX parsers. * * Returns the htmlParserCtxtPtr or NULL in case of allocation error */ @@ -5023,8 +4991,14 @@ htmlNewParserCtxt(void) * @sax: SAX handler * @userData: user data * - * Allocate and initialize a new SAX parser context. If userData is NULL, - * the parser context will be passed as user data. + * Allocate and initialize a new HTML SAX parser context. If userData + * is NULL, the parser context will be passed as user data. + * + * Available since 2.11.0. If you want support older versions, + * it's best to invoke htmlNewParserCtxt and set ctxt->sax with + * struct assignment. + * + * Also see htmlNewParserCtxt. * * Returns the htmlParserCtxtPtr or NULL in case of allocation error */ @@ -5034,6 +5008,8 @@ htmlNewSAXParserCtxt(const htmlSAXHandler *sax, void *userData) { xmlParserCtxtPtr ctxt; + xmlInitParser(); + ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); if (ctxt == NULL) return(NULL); @@ -5045,68 +5021,65 @@ htmlNewSAXParserCtxt(const htmlSAXHandler *sax, void *userData) return(ctxt); } -/** - * htmlCreateMemoryParserCtxt: - * @buffer: a pointer to a char array - * @size: the size of the array - * - * Create a parser context for an HTML in-memory document. The input buffer - * must not contain a terminating null byte. - * - * Returns the new parser context or NULL - */ -htmlParserCtxtPtr -htmlCreateMemoryParserCtxt(const char *buffer, int size) { +static htmlParserCtxtPtr +htmlCreateMemoryParserCtxtInternal(const char *url, + const char *buffer, size_t size, + const char *encoding) { xmlParserCtxtPtr ctxt; xmlParserInputPtr input; - xmlParserInputBufferPtr buf; if (buffer == NULL) return(NULL); - if (size <= 0) - return(NULL); ctxt = htmlNewParserCtxt(); if (ctxt == NULL) return(NULL); - buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); - if (buf == NULL) { + input = xmlNewInputMemory(ctxt, url, buffer, size, encoding, 0); + if (input == NULL) { xmlFreeParserCtxt(ctxt); return(NULL); } - input = xmlNewInputStream(ctxt); - if (input == NULL) { - xmlFreeParserInputBuffer(buf); - xmlFreeParserCtxt(ctxt); - return(NULL); - } - - input->filename = NULL; - input->buf = buf; - xmlBufResetInput(buf->buffer, input); - inputPush(ctxt, input); + return(ctxt); } +/** + * htmlCreateMemoryParserCtxt: + * @buffer: a pointer to a char array + * @size: the size of the array + * + * DEPRECATED: Use htmlNewParserCtxt and htmlCtxtReadMemory. + * + * Create a parser context for an HTML in-memory document. The input + * buffer must not contain any terminating null bytes. + * + * Returns the new parser context or NULL + */ +htmlParserCtxtPtr +htmlCreateMemoryParserCtxt(const char *buffer, int size) { + if (size <= 0) + return(NULL); + + return(htmlCreateMemoryParserCtxtInternal(NULL, buffer, size, NULL)); +} + /** * htmlCreateDocParserCtxt: * @str: a pointer to an array of xmlChar - * @encoding: optional encoding + * @encoding: encoding (optional) * * Create a parser context for a null-terminated string. * - * A non-NULL encoding overrides encoding declarations in the document. - * * Returns the new parser context or NULL if a memory allocation failed. */ static htmlParserCtxtPtr -htmlCreateDocParserCtxt(const xmlChar *str, const char *encoding) { +htmlCreateDocParserCtxt(const xmlChar *str, const char *url, + const char *encoding) { xmlParserCtxtPtr ctxt; xmlParserInputPtr input; - xmlParserInputBufferPtr buf; if (str == NULL) return(NULL); @@ -5115,28 +5088,14 @@ htmlCreateDocParserCtxt(const xmlChar *str, const char *encoding) { if (ctxt == NULL) return(NULL); - buf = xmlParserInputBufferCreateString(str); - if (buf == NULL) { - xmlFreeParserCtxt(ctxt); - return(NULL); - } - - input = xmlNewInputStream(ctxt); + input = xmlNewInputString(ctxt, url, (const char *) str, encoding, 0); if (input == NULL) { - xmlFreeParserInputBuffer(buf); xmlFreeParserCtxt(ctxt); return(NULL); } - input->filename = NULL; - input->buf = buf; - xmlBufResetInput(buf->buffer, input); - inputPush(ctxt, input); - if (encoding != NULL) - xmlSwitchEncodingName(ctxt, encoding); - return(ctxt); } @@ -5784,13 +5743,23 @@ done: /** * htmlParseChunk: * @ctxt: an HTML parser context - * @chunk: an char array - * @size: the size in byte of the chunk + * @chunk: chunk of memory + * @size: size of chunk in bytes * @terminate: last chunk indicator * - * Parse a Chunk of memory + * Parse a chunk of memory in push parser mode. * - * Returns zero if no error, the xmlParserErrors otherwise. + * Assumes that the parser context was initialized with + * htmlCreatePushParserCtxt. + * + * The last chunk, which will often be empty, must be marked with + * the @terminate flag. With the default SAX callbacks, the resulting + * document will be available in ctxt->myDoc. This pointer will not + * be freed by the library. + * + * If the document isn't well-formed, ctxt->myDoc is set to NULL. + * + * Returns an xmlParserErrors code (0 on success). */ int htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, @@ -5832,72 +5801,37 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, /** * htmlCreatePushParserCtxt: - * @sax: a SAX handler - * @user_data: The user data returned on SAX callbacks - * @chunk: a pointer to an array of chars + * @sax: a SAX handler (optional) + * @user_data: The user data returned on SAX callbacks (optional) + * @chunk: a pointer to an array of chars (optional) * @size: number of chars in the array - * @filename: an optional file name or URI - * @enc: an optional encoding + * @filename: only used for error reporting (optional) + * @enc: encoding (deprecated, pass XML_CHAR_ENCODING_NONE) * - * Create a parser context for using the HTML parser in push mode - * The value of @filename is used for fetching external entities - * and error/warning reports. + * Create a parser context for using the HTML parser in push mode. * - * Returns the new parser context or NULL + * Returns the new parser context or NULL if a memory allocation + * failed. */ htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename, xmlCharEncoding enc) { htmlParserCtxtPtr ctxt; - htmlParserInputPtr inputStream; - xmlParserInputBufferPtr buf; - - xmlInitParser(); - - buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); - if (buf == NULL) return(NULL); + htmlParserInputPtr input; + const char *encoding; ctxt = htmlNewSAXParserCtxt(sax, user_data); - if (ctxt == NULL) { - xmlFreeParserInputBuffer(buf); + if (ctxt == NULL) + return(NULL); + + encoding = xmlGetCharEncodingName(enc); + input = xmlNewInputPush(ctxt, filename, chunk, size, encoding); + if (input == NULL) { + htmlFreeParserCtxt(ctxt); return(NULL); } - inputStream = htmlNewInputStream(ctxt); - if (inputStream == NULL) { - xmlFreeParserCtxt(ctxt); - xmlFreeParserInputBuffer(buf); - return(NULL); - } - - inputStream->flags |= XML_INPUT_PROGRESSIVE; - - if (filename == NULL) - inputStream->filename = NULL; - else - inputStream->filename = (char *) - xmlCanonicPath((const xmlChar *) filename); - inputStream->buf = buf; - xmlBufResetInput(buf->buffer, inputStream); - - inputPush(ctxt, inputStream); - - if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && - (ctxt->input->buf != NULL)) { - size_t pos = ctxt->input->cur - ctxt->input->base; - int res; - - res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); - if (res < 0) { - htmlParseErr(ctxt, ctxt->input->buf->error, - "xmlParserInputBufferPush failed\n", NULL, NULL); - xmlHaltParser(ctxt); - } - } - - if (enc != XML_CHAR_ENCODING_NONE) - xmlSwitchEncoding(ctxt, enc); + inputPush(ctxt, input); return(ctxt); } @@ -5926,25 +5860,20 @@ htmlSAXParseDoc(const xmlChar *cur, const char *encoding, htmlDocPtr ret; htmlParserCtxtPtr ctxt; - xmlInitParser(); + if (cur == NULL) + return(NULL); - if (cur == NULL) return(NULL); + ctxt = htmlCreateDocParserCtxt(cur, NULL, encoding); + if (ctxt == NULL) + return(NULL); - - ctxt = htmlCreateDocParserCtxt(cur, encoding); - if (ctxt == NULL) return(NULL); if (sax != NULL) { - if (ctxt->sax != NULL) xmlFree (ctxt->sax); - ctxt->sax = sax; + *ctxt->sax = *sax; ctxt->userData = userData; } htmlParseDocument(ctxt); ret = ctxt->myDoc; - if (sax != NULL) { - ctxt->sax = NULL; - ctxt->userData = NULL; - } htmlFreeParserCtxt(ctxt); return(ret); @@ -5953,9 +5882,13 @@ htmlSAXParseDoc(const xmlChar *cur, const char *encoding, /** * htmlParseDoc: * @cur: a pointer to an array of xmlChar - * @encoding: a free form C string describing the HTML document encoding, or NULL + * @encoding: the encoding (optional) * - * parse an HTML in-memory document and build a tree. + * DEPRECATED: Use htmlReadDoc. + * + * Parse an HTML in-memory document and build a tree. + * + * This function uses deprecated global parser options. * * Returns the resulting document tree */ @@ -5971,6 +5904,8 @@ htmlParseDoc(const xmlChar *cur, const char *encoding) { * @filename: the filename * @encoding: optional encoding * + * DEPRECATED: Use htmlNewParserCtxt and htmlCtxtReadFile. + * * Create a parser context to read from a file. * * A non-NULL encoding overrides encoding declarations in the document. @@ -5984,8 +5919,7 @@ htmlParserCtxtPtr htmlCreateFileParserCtxt(const char *filename, const char *encoding) { htmlParserCtxtPtr ctxt; - htmlParserInputPtr inputStream; - char *canonicFilename; + htmlParserInputPtr input; if (filename == NULL) return(NULL); @@ -5994,23 +5928,13 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding) if (ctxt == NULL) { return(NULL); } - canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename); - if (canonicFilename == NULL) { + + input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0); + if (input == NULL) { xmlFreeParserCtxt(ctxt); return(NULL); } - - inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt); - xmlFree(canonicFilename); - if (inputStream == NULL) { - xmlFreeParserCtxt(ctxt); - return(NULL); - } - - inputPush(ctxt, inputStream); - - if (encoding != NULL) - xmlSwitchEncodingName(ctxt, encoding); + inputPush(ctxt, input); return(ctxt); } @@ -6018,7 +5942,7 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding) /** * htmlSAXParseFile: * @filename: the filename - * @encoding: a free form C string describing the HTML document encoding, or NULL + * @encoding: encoding (optional) * @sax: the SAX handler block * @userData: if using SAX, this pointer will be provided on callbacks. * @@ -6040,8 +5964,6 @@ htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr s htmlParserCtxtPtr ctxt; htmlSAXHandlerPtr oldsax = NULL; - xmlInitParser(); - ctxt = htmlCreateFileParserCtxt(filename, encoding); if (ctxt == NULL) return(NULL); if (sax != NULL) { @@ -6065,10 +5987,11 @@ htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr s /** * htmlParseFile: * @filename: the filename - * @encoding: a free form C string describing the HTML document encoding, or NULL + * @encoding: encoding (optional) * - * parse an HTML file and build a tree. Automatic support for ZLIB/Compress - * compressed document is provided by default if found at compile-time. + * Parse an HTML file and build a tree. + * + * See xmlNewInputURL for details. * * Returns the resulting document tree */ @@ -6239,7 +6162,6 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt) if (ctxt == NULL) return; - xmlInitParser(); dict = ctxt->dict; while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ @@ -6389,262 +6311,293 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options) } /** - * htmlDoRead: + * htmlCtxtParseDocument: * @ctxt: an HTML parser context - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL - * @options: a combination of htmlParserOption(s) - * @reuse: keep the context for reuse * - * Common front-end for the htmlRead functions + * Parse an HTML document and return the resulting document tree. * * Returns the resulting document tree or NULL */ -static htmlDocPtr -htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding, - int options, int reuse) +htmlDocPtr +htmlCtxtParseDocument(htmlParserCtxtPtr ctxt) { htmlDocPtr ret; - htmlCtxtUseOptions(ctxt, options); ctxt->html = 1; - if (encoding != NULL) - xmlSwitchEncodingName(ctxt, encoding); - if ((URL != NULL) && (ctxt->input != NULL) && - (ctxt->input->filename == NULL)) - ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); htmlParseDocument(ctxt); - if (ctxt->errNo == XML_ERR_NO_MEMORY) { + + if (ctxt->errNo != XML_ERR_NO_MEMORY) { + ret = ctxt->myDoc; + } else { + ret = NULL; xmlFreeDoc(ctxt->myDoc); - ctxt->myDoc = NULL; } - ret = ctxt->myDoc; ctxt->myDoc = NULL; - if (!reuse) { - if ((ctxt->dictNames) && - (ret != NULL) && - (ret->dict == ctxt->dict)) - ctxt->dict = NULL; - xmlFreeParserCtxt(ctxt); - } - return (ret); + + return(ret); } /** * htmlReadDoc: - * @cur: a pointer to a zero terminated string - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL - * @options: a combination of htmlParserOption(s) + * @str: a pointer to a zero terminated string + * @url: only used for error reporting (optoinal) + * @encoding: the document encoding (optional) + * @options: a combination of htmlParserOptions * - * parse an XML in-memory document and build a tree. + * Convenience function to parse an HTML document from a zero-terminated + * string. * - * Returns the resulting document tree + * See htmlCtxtReadDoc for details. + * + * Returns the resulting document tree. */ htmlDocPtr -htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) +htmlReadDoc(const xmlChar *str, const char *url, const char *encoding, + int options) { htmlParserCtxtPtr ctxt; + xmlParserInputPtr input; + htmlDocPtr doc; - if (cur == NULL) - return (NULL); - - xmlInitParser(); - ctxt = htmlCreateDocParserCtxt(cur, NULL); + ctxt = htmlNewParserCtxt(); if (ctxt == NULL) - return (NULL); - return (htmlDoRead(ctxt, URL, encoding, options, 0)); + return(NULL); + + htmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputString(ctxt, url, (const char *) str, encoding, + XML_INPUT_BUF_STATIC); + if (input == NULL) { + htmlFreeParserCtxt(ctxt); + return(NULL); + } + inputPush(ctxt, input); + + doc = htmlCtxtParseDocument(ctxt); + + htmlFreeParserCtxt(ctxt); + return(doc); } /** * htmlReadFile: * @filename: a file or URL - * @encoding: the document encoding, or NULL - * @options: a combination of htmlParserOption(s) + * @encoding: the document encoding (optional) + * @options: a combination of htmlParserOptions * - * parse an XML file from the filesystem or the network. + * Convenience function to parse an HTML file from the filesystem, + * the network or a global user-defined resource loader. * - * Returns the resulting document tree + * See htmlCtxtReadFile for details. + * + * Returns the resulting document tree. */ htmlDocPtr htmlReadFile(const char *filename, const char *encoding, int options) { htmlParserCtxtPtr ctxt; + xmlParserInputPtr input; + htmlDocPtr doc; - xmlInitParser(); - ctxt = htmlCreateFileParserCtxt(filename, encoding); + ctxt = htmlNewParserCtxt(); if (ctxt == NULL) - return (NULL); - return (htmlDoRead(ctxt, NULL, NULL, options, 0)); + return(NULL); + + htmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0); + if (input == NULL) { + htmlFreeParserCtxt(ctxt); + return(NULL); + } + inputPush(ctxt, input); + + doc = htmlCtxtParseDocument(ctxt); + + htmlFreeParserCtxt(ctxt); + return(doc); } /** * htmlReadMemory: * @buffer: a pointer to a char array * @size: the size of the array - * @URL: the base URL to use for the document + * @url: only used for error reporting (optional) * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * - * Parse an HTML in-memory document and build a tree. The input buffer must - * not contain a terminating null byte. + * Convenience function to parse an HTML document from memory. + * The input buffer must not contain any terminating null bytes. + * + * See htmlCtxtReadMemory for details. * * Returns the resulting document tree */ htmlDocPtr -htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) +htmlReadMemory(const char *buffer, int size, const char *url, + const char *encoding, int options) { htmlParserCtxtPtr ctxt; + xmlParserInputPtr input; + htmlDocPtr doc; - xmlInitParser(); - ctxt = htmlCreateMemoryParserCtxt(buffer, size); + if (size < 0) + return(NULL); + + ctxt = htmlNewParserCtxt(); if (ctxt == NULL) - return (NULL); - return (htmlDoRead(ctxt, URL, encoding, options, 0)); + return(NULL); + + htmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputMemory(ctxt, url, buffer, size, encoding, + XML_INPUT_BUF_STATIC); + if (input == NULL) { + htmlFreeParserCtxt(ctxt); + return(NULL); + } + inputPush(ctxt, input); + + doc = htmlCtxtParseDocument(ctxt); + + htmlFreeParserCtxt(ctxt); + return(doc); } /** * htmlReadFd: * @fd: an open file descriptor - * @URL: the base URL to use for the document + * @url: only used for error reporting (optional) * @encoding: the document encoding, or NULL - * @options: a combination of htmlParserOption(s) + * @options: a combination of htmlParserOptions + * + * Convenience function to parse an HTML document from a + * file descriptor. * - * parse an HTML from a file descriptor and build a tree. * NOTE that the file descriptor will not be closed when the - * reader is closed or reset. + * context is freed or reset. + * + * See htmlCtxtReadFd for details. * * Returns the resulting document tree */ htmlDocPtr -htmlReadFd(int fd, const char *URL, const char *encoding, int options) +htmlReadFd(int fd, const char *url, const char *encoding, int options) { htmlParserCtxtPtr ctxt; - xmlParserInputBufferPtr input; - htmlParserInputPtr stream; + xmlParserInputPtr input; + htmlDocPtr doc; - if (fd < 0) - return (NULL); - - xmlInitParser(); - input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); - if (input == NULL) - return (NULL); - input->closecallback = NULL; ctxt = htmlNewParserCtxt(); - if (ctxt == NULL) { - xmlFreeParserInputBuffer(input); - return (NULL); - } - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); + if (ctxt == NULL) + return(NULL); + + htmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputFd(ctxt, url, fd, encoding, 0); + if (input == NULL) { htmlFreeParserCtxt(ctxt); - return (NULL); + return(NULL); } - inputPush(ctxt, stream); - return (htmlDoRead(ctxt, URL, encoding, options, 0)); + input->buf->closecallback = NULL; + inputPush(ctxt, input); + + doc = htmlCtxtParseDocument(ctxt); + + htmlFreeParserCtxt(ctxt); + return(doc); } /** * htmlReadIO: * @ioread: an I/O read function - * @ioclose: an I/O close function + * @ioclose: an I/O close function (optional) * @ioctx: an I/O handler - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL + * @url: only used for error reporting (optional) + * @encoding: the document encoding (optional) * @options: a combination of htmlParserOption(s) * - * parse an HTML document from I/O functions and source and build a tree. + * Convenience function to parse an HTML document from I/O functions + * and context. + * + * See htmlCtxtReadIO for details. * * Returns the resulting document tree */ htmlDocPtr htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, - void *ioctx, const char *URL, const char *encoding, int options) + void *ioctx, const char *url, const char *encoding, int options) { htmlParserCtxtPtr ctxt; - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; + htmlDocPtr doc; - if (ioread == NULL) + ctxt = htmlNewParserCtxt(); + if (ctxt == NULL) return (NULL); - xmlInitParser(); - input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, - XML_CHAR_ENCODING_NONE); + htmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputIO(ctxt, url, ioread, ioclose, ioctx, encoding, 0); if (input == NULL) { if (ioclose != NULL) ioclose(ioctx); - return (NULL); + htmlFreeParserCtxt(ctxt); + return(NULL); } - ctxt = htmlNewParserCtxt(); - if (ctxt == NULL) { - xmlFreeParserInputBuffer(input); - return (NULL); - } - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - xmlFreeParserCtxt(ctxt); - return (NULL); - } - inputPush(ctxt, stream); - return (htmlDoRead(ctxt, URL, encoding, options, 0)); + inputPush(ctxt, input); + + doc = htmlCtxtParseDocument(ctxt); + + htmlFreeParserCtxt(ctxt); + return(doc); } /** * htmlCtxtReadDoc: * @ctxt: an HTML parser context * @str: a pointer to a zero terminated string - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL - * @options: a combination of htmlParserOption(s) + * @URL: only used for error reporting (optional) + * @encoding: the document encoding (optional) + * @options: a combination of htmlParserOptions * - * parse an XML in-memory document and build a tree. - * This reuses the existing @ctxt parser context + * Parse an HTML in-memory document and build a tree. + * + * See htmlCtxtUseOptions for details. * * Returns the resulting document tree */ htmlDocPtr htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar *str, - const char *URL, const char *encoding, int options) + const char *URL, const char *encoding, int options) { - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; if (ctxt == NULL) return (NULL); - if (str == NULL) - return (NULL); - xmlInitParser(); htmlCtxtReset(ctxt); + htmlCtxtUseOptions(ctxt, options); - input = xmlParserInputBufferCreateString(str); - if (input == NULL) { + input = xmlNewInputString(ctxt, URL, (const char *) str, encoding, 0); + if (input == NULL) return(NULL); - } + inputPush(ctxt, input); - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - return(NULL); - } - - inputPush(ctxt, stream); - return (htmlDoRead(ctxt, URL, encoding, options, 1)); + return(htmlCtxtParseDocument(ctxt)); } /** * htmlCtxtReadFile: * @ctxt: an HTML parser context * @filename: a file or URL - * @encoding: the document encoding, or NULL - * @options: a combination of htmlParserOption(s) + * @encoding: the document encoding (optional) + * @options: a combination of htmlParserOptions * - * parse an XML file from the filesystem or the network. - * This reuses the existing @ctxt parser context + * Parse an HTML file from the filesystem, the network or a + * user-defined resource loader. + * + * See xmlNewInputURL and htmlCtxtUseOptions for details. * * Returns the resulting document tree */ @@ -6652,22 +6605,20 @@ htmlDocPtr htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, const char *encoding, int options) { - xmlParserInputPtr stream; + xmlParserInputPtr input; - if (filename == NULL) - return (NULL); if (ctxt == NULL) return (NULL); - xmlInitParser(); htmlCtxtReset(ctxt); + htmlCtxtUseOptions(ctxt, options); - stream = xmlLoadExternalEntity(filename, NULL, ctxt); - if (stream == NULL) { + input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0); + if (input == NULL) return (NULL); - } - inputPush(ctxt, stream); - return (htmlDoRead(ctxt, NULL, encoding, options, 1)); + inputPush(ctxt, input); + + return(htmlCtxtParseDocument(ctxt)); } /** @@ -6675,13 +6626,14 @@ htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, * @ctxt: an HTML parser context * @buffer: a pointer to a char array * @size: the size of the array - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL - * @options: a combination of htmlParserOption(s) + * @URL: only used for error reporting (optional) + * @encoding: the document encoding (optinal) + * @options: a combination of htmlParserOptions * * Parse an HTML in-memory document and build a tree. The input buffer must - * not contain a terminating null byte. - * This reuses the existing @ctxt parser context + * not contain any terminating null bytes. + * + * See htmlCtxtUseOptions for details. * * Returns the resulting document tree */ @@ -6689,44 +6641,37 @@ htmlDocPtr htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, const char *URL, const char *encoding, int options) { - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; - if (ctxt == NULL) + if ((ctxt == NULL) || (size < 0)) return (NULL); - if (buffer == NULL) - return (NULL); - xmlInitParser(); htmlCtxtReset(ctxt); + htmlCtxtUseOptions(ctxt, options); - input = xmlParserInputBufferCreateStatic(buffer, size, - XML_CHAR_ENCODING_NONE); - if (input == NULL) { - htmlErrMemory(ctxt); + input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding, + XML_INPUT_BUF_STATIC); + if (input == NULL) return(NULL); - } + inputPush(ctxt, input); - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - return(NULL); - } - - inputPush(ctxt, stream); - return (htmlDoRead(ctxt, URL, encoding, options, 1)); + return(htmlCtxtParseDocument(ctxt)); } /** * htmlCtxtReadFd: * @ctxt: an HTML parser context * @fd: an open file descriptor - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL - * @options: a combination of htmlParserOption(s) + * @URL: only used for error reporting (optional) + * @encoding: the document encoding (optinal) + * @options: a combination of htmlParserOptions * - * parse an XML from a file descriptor and build a tree. - * This reuses the existing @ctxt parser context + * Parse an HTML from a file descriptor and build a tree. + * + * See htmlCtxtUseOptions for details. + * + * NOTE that the file descriptor will not be closed when the + * context is freed or reset. * * Returns the resulting document tree */ @@ -6734,29 +6679,21 @@ htmlDocPtr htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, const char *URL, const char *encoding, int options) { - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; - if (fd < 0) - return (NULL); if (ctxt == NULL) - return (NULL); - xmlInitParser(); + return(NULL); htmlCtxtReset(ctxt); + htmlCtxtUseOptions(ctxt, options); - - input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); + input = xmlNewInputFd(ctxt, URL, fd, encoding, 0); if (input == NULL) return (NULL); - input->closecallback = NULL; - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - return (NULL); - } - inputPush(ctxt, stream); - return (htmlDoRead(ctxt, URL, encoding, options, 1)); + input->buf->closecallback = NULL; + inputPush(ctxt, input); + + return(htmlCtxtParseDocument(ctxt)); } /** @@ -6769,8 +6706,9 @@ htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, * @encoding: the document encoding, or NULL * @options: a combination of htmlParserOption(s) * - * parse an HTML document from I/O functions and source and build a tree. - * This reuses the existing @ctxt parser context + * Parse an HTML document from I/O functions and source and build a tree. + * + * See xmlNewInputIO and htmlCtxtUseOptions for details. * * Returns the resulting document tree */ @@ -6780,31 +6718,23 @@ htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, const char *URL, const char *encoding, int options) { - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; - if (ioread == NULL) - return (NULL); if (ctxt == NULL) return (NULL); - xmlInitParser(); htmlCtxtReset(ctxt); + htmlCtxtUseOptions(ctxt, options); - input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, - XML_CHAR_ENCODING_NONE); + input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0); if (input == NULL) { if (ioclose != NULL) ioclose(ioctx); return (NULL); } - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - return (NULL); - } - inputPush(ctxt, stream); - return (htmlDoRead(ctxt, URL, encoding, options, 1)); + inputPush(ctxt, input); + + return(htmlCtxtParseDocument(ctxt)); } #endif /* LIBXML_HTML_ENABLED */ diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h index 0e97425a..47c7598d 100644 --- a/include/libxml/HTMLparser.h +++ b/include/libxml/HTMLparser.h @@ -246,6 +246,8 @@ XMLPUBFUN htmlDocPtr const char *URL, const char *encoding, int options); +XMLPUBFUN htmlDocPtr + htmlCtxtParseDocument (htmlParserCtxtPtr ctxt); XMLPUBFUN htmlDocPtr htmlCtxtReadDoc (xmlParserCtxtPtr ctxt, const xmlChar *cur, diff --git a/include/libxml/parser.h b/include/libxml/parser.h index eeac3bfe..e271733f 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -1309,6 +1309,8 @@ XMLPUBFUN xmlDocPtr const char *URL, const char *encoding, int options); +XMLPUBFUN xmlDocPtr + xmlCtxtParseDocument (xmlParserCtxtPtr ctxt); XMLPUBFUN xmlDocPtr xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, const xmlChar *cur, @@ -1342,6 +1344,46 @@ XMLPUBFUN xmlDocPtr const char *encoding, int options); +/** + * New input API (2.9.13) + */ + +#define XML_INPUT_BUF_STATIC (1u << 1) +#define XML_INPUT_BUF_ZERO_TERMINATED (1u << 2) + +XMLPUBFUN xmlParserInputPtr + xmlNewInputURL (xmlParserCtxtPtr ctxt, + const char *url, + const char *publicId, + const char *encoding, + int flags); +XMLPUBFUN xmlParserInputPtr + xmlNewInputMemory (xmlParserCtxtPtr ctxt, + const char *filename, + const void *mem, size_t size, + const char *encoding, + int flags); +XMLPUBFUN xmlParserInputPtr + xmlNewInputString (xmlParserCtxtPtr ctxt, + const char *filename, + const char *str, + const char *encoding, + int flags); +XMLPUBFUN xmlParserInputPtr + xmlNewInputFd (xmlParserCtxtPtr ctxt, + const char *filename, + int fd, + const char *encoding, + int flags); +XMLPUBFUN xmlParserInputPtr + xmlNewInputIO (xmlParserCtxtPtr ctxt, + const char *url, + xmlInputReadCallback ioRead, + xmlInputCloseCallback ioClose, + void *ioCtxt, + const char *encoding, + int flags); + /* * Library wide options */ diff --git a/include/private/io.h b/include/private/io.h index 375052aa..01c2cedf 100644 --- a/include/private/io.h +++ b/include/private/io.h @@ -17,8 +17,12 @@ xmlNoNetExists(const char *filename); XML_HIDDEN int xmlParserInputBufferCreateFilenameSafe(const char *URI, xmlCharEncoding enc, xmlParserInputBufferPtr *out); + XML_HIDDEN xmlParserInputBufferPtr -xmlParserInputBufferCreateString(const xmlChar *str); +xmlNewInputBufferString(const char *str, int flags); +XML_HIDDEN xmlParserInputBufferPtr +xmlNewInputBufferMemory(const void *mem, size_t size, int flags, + xmlCharEncoding enc); #ifdef LIBXML_OUTPUT_ENABLED XML_HIDDEN xmlOutputBufferPtr diff --git a/include/private/parser.h b/include/private/parser.h index 43f11892..10e65101 100644 --- a/include/private/parser.h +++ b/include/private/parser.h @@ -83,4 +83,8 @@ xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix, XML_HIDDEN void * xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix); +XML_HIDDEN xmlParserInputPtr +xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url, + const char *chunk, int size, const char *encoding); + #endif /* XML_PARSER_H_PRIVATE__ */ diff --git a/parser.c b/parser.c index 2dae4e96..466af837 100644 --- a/parser.c +++ b/parser.c @@ -199,10 +199,6 @@ static const char* const xmlW3CPIs[] = { static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str); -static int -xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, - const char *encoding); - static xmlParserErrors xmlCtxtParseEntity(xmlParserCtxtPtr oldctxt, xmlEntityPtr ent); @@ -2315,8 +2311,11 @@ xmlPopInput(xmlParserCtxtPtr ctxt) { * @ctxt: an XML parser context * @input: an XML parser input fragment (entity, XML fragment ...). * - * xmlPushInput: switch to a new input stream which is stacked on top - * of the previous one(s). + * Push an input stream onto the stack. + * + * This makes the parser use an input returned from advanced functions + * like xmlNewInputURL or xmlNewInputMemory. + * * Returns -1 in case of error or the index in the input stack */ int @@ -10461,21 +10460,15 @@ xmlFinishDocument(xmlParserCtxtPtr ctxt) { * xmlParseDocument: * @ctxt: an XML parser context * - * parse an XML document (and build a tree if using the standard SAX - * interface). + * Parse an XML document and invoke the SAX handlers. This is useful + * if you're only interested in custom SAX callbacks. If you want a + * document tree, use xmlCtxtParseDocument. * - * [1] document ::= prolog element Misc* - * - * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? - * - * Returns 0, -1 in case of error. the parser context is augmented - * as a result of the parsing. + * Returns 0, -1 in case of error. */ int xmlParseDocument(xmlParserCtxtPtr ctxt) { - xmlInitParser(); - if ((ctxt == NULL) || (ctxt->input == NULL)) return(-1); @@ -11480,25 +11473,36 @@ encoding_error: /** * xmlParseChunk: * @ctxt: an XML parser context - * @chunk: an char array - * @size: the size in byte of the chunk + * @chunk: chunk of memory + * @size: size of chunk in bytes * @terminate: last chunk indicator * - * Parse a Chunk of memory + * Parse a chunk of memory in push parser mode. * - * Returns zero if no error, the xmlParserErrors otherwise. + * Assumes that the parser context was initialized with + * xmlCreatePushParserCtxt. + * + * The last chunk, which will often be empty, must be marked with + * the @terminate flag. With the default SAX callbacks, the resulting + * document will be available in ctxt->myDoc. This pointer will not + * be freed by the library. + * + * If the document isn't well-formed, ctxt->myDoc is set to NULL. + * The push parser doesn't support recovery mode. + * + * Returns an xmlParserErrors code (0 on success). */ int xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate) { int end_in_lf = 0; - if (ctxt == NULL) - return(XML_ERR_INTERNAL_ERROR); + if ((ctxt == NULL) || (size < 0)) + return(XML_ERR_ARGUMENT); if (ctxt->disableSAX != 0) return(ctxt->errNo); if (ctxt->input == NULL) - return(-1); + return(XML_ERR_INTERNAL_ERROR); ctxt->input->flags |= XML_INPUT_PROGRESSIVE; if (ctxt->instate == XML_PARSER_START) @@ -11592,77 +11596,41 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, /** * xmlCreatePushParserCtxt: - * @sax: a SAX handler - * @user_data: The user data returned on SAX callbacks - * @chunk: a pointer to an array of chars - * @size: number of chars in the array - * @filename: an optional file name or URI + * @sax: a SAX handler (optional) + * @user_data: user data for SAX callbacks (optional) + * @chunk: initial chunk (optional, deprecated) + * @size: size of initial chunk in bytes + * @filename: file name or URI (optional) * * Create a parser context for using the XML parser in push mode. - * If @buffer and @size are non-NULL, the data is used to detect - * the encoding. The remaining characters will be parsed so they - * don't need to be fed in again through xmlParseChunk. - * To allow content encoding detection, @size should be >= 4 - * The value of @filename is used for fetching external entities - * and error/warning reports. + * See xmlParseChunk. * - * Returns the new parser context or NULL + * Passing an initial chunk is useless and deprecated. + * + * @filename is used as base URI to fetch external entities and for + * error reports. + * + * Returns the new parser context or NULL in case of error. */ xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, const char *chunk, int size, const char *filename) { xmlParserCtxtPtr ctxt; - xmlParserInputPtr inputStream; - xmlParserInputBufferPtr buf; - - buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); - if (buf == NULL) return(NULL); + xmlParserInputPtr input; ctxt = xmlNewSAXParserCtxt(sax, user_data); - if (ctxt == NULL) { - xmlFreeParserInputBuffer(buf); + if (ctxt == NULL) return(NULL); - } + ctxt->dictNames = 1; - inputStream = xmlNewInputStream(ctxt); - if (inputStream == NULL) { + input = xmlNewInputPush(ctxt, filename, chunk, size, NULL); + if (input == NULL) { xmlFreeParserCtxt(ctxt); - xmlFreeParserInputBuffer(buf); return(NULL); } - - inputStream->flags |= XML_INPUT_PROGRESSIVE; - - if (filename == NULL) - inputStream->filename = NULL; - else { - inputStream->filename = (char *) - xmlCanonicPath((const xmlChar *) filename); - if (inputStream->filename == NULL) { - xmlFreeInputStream(inputStream); - xmlFreeParserCtxt(ctxt); - xmlFreeParserInputBuffer(buf); - return(NULL); - } - } - inputStream->buf = buf; - xmlBufResetInput(inputStream->buf->buffer, inputStream); - inputPush(ctxt, inputStream); - - if ((size != 0) && (chunk != NULL) && - (ctxt->input != NULL) && (ctxt->input->buf != NULL)) { - size_t pos = ctxt->input->cur - ctxt->input->base; - int res; - - res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); - if (res < 0) { - xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL); - xmlHaltParser(ctxt); - } - } + inputPush(ctxt, input); return(ctxt); } @@ -11685,12 +11653,14 @@ xmlStopParser(xmlParserCtxtPtr ctxt) { /** * xmlCreateIOParserCtxt: - * @sax: a SAX handler - * @user_data: The user data returned on SAX callbacks + * @sax: a SAX handler (optional) + * @user_data: user data for SAX callbacks (optional) * @ioread: an I/O read function - * @ioclose: an I/O close function + * @ioclose: an I/O close function (optional) * @ioctx: an I/O handler - * @enc: the charset encoding if known + * @enc: the charset encoding if known (deprecated) + * + * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadIO. * * Create a parser context for using the XML parser with an existing * I/O stream @@ -11699,41 +11669,26 @@ xmlStopParser(xmlParserCtxtPtr ctxt) { */ xmlParserCtxtPtr xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data, - xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, - void *ioctx, xmlCharEncoding enc) { + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, xmlCharEncoding enc) { xmlParserCtxtPtr ctxt; - xmlParserInputPtr inputStream; - xmlParserInputBufferPtr buf; + xmlParserInputPtr input; + const char *encoding; - if (ioread == NULL) return(NULL); + ctxt = xmlNewSAXParserCtxt(sax, user_data); + if (ctxt == NULL) + return(NULL); - buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, - XML_CHAR_ENCODING_NONE); - if (buf == NULL) { + encoding = xmlGetCharEncodingName(enc); + input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0); + if (input == NULL) { + xmlFreeParserCtxt(ctxt); if (ioclose != NULL) ioclose(ioctx); return (NULL); } - - ctxt = xmlNewSAXParserCtxt(sax, user_data); - if (ctxt == NULL) { - xmlFreeParserInputBuffer(buf); - return(NULL); - } - - inputStream = xmlNewIOInputStream(ctxt, buf, enc); - if (inputStream == NULL) { - xmlFreeParserCtxt(ctxt); - return(NULL); - } - inputPush(ctxt, inputStream); - - if (enc != XML_CHAR_ENCODING_NONE) { - if (xmlSwitchEncoding(ctxt, enc) < 0) { - xmlFreeParserCtxt(ctxt); - return(NULL); - } - } + inputPush(ctxt, input); return(ctxt); } @@ -12409,7 +12364,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, if (doc->encoding != NULL) xmlSwitchEncodingName(ctxt, (const char *) doc->encoding); - xmlCtxtUseOptionsInternal(ctxt, options, NULL); + xmlCtxtUseOptions(ctxt, options); xmlDetectSAX2(ctxt); ctxt->myDoc = doc; /* parsing in context, i.e. as within existing content */ @@ -12519,19 +12474,18 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, * @user_data: The user data returned on SAX callbacks (possibly NULL) * @depth: Used for loop detection, use 0 * @string: the input string in UTF8 or ISO-Latin (zero terminated) - * @lst: the return value for the set of parsed nodes + * @list: the return value for the set of parsed nodes * @recover: return nodes even if the data is broken (use 0) * - * * Parse a well-balanced chunk of an XML document - * called by the parser + * * The allowed sequence for the Well Balanced Chunk is the one defined by * the content production in the XML grammar: * * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* * - * Returns 0 if the chunk is well balanced, -1 in case of args problem and - * the parser error code otherwise + * Returns 0 if the chunk is well balanced, or thehe parser error code + * otherwise. * * In case recover is set to 1, the nodelist will not be empty even if * the parsed chunk is not well balanced, assuming the parsing succeeded to @@ -12548,10 +12502,13 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, if (list != NULL) *list = NULL; - ctxt = xmlNewSAXParserCtxt(sax, user_data); - if ((ctxt == NULL) || (string == NULL)) + if (string == NULL) return(XML_ERR_ARGUMENT); + ctxt = xmlNewSAXParserCtxt(sax, user_data); + if (ctxt == NULL) + return(XML_ERR_NO_MEMORY); + xmlDetectSAX2(ctxt); ctxt->depth = depth; @@ -12598,23 +12555,24 @@ xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) { return(NULL); } if (sax != NULL) { - if (ctxt->sax != NULL) - xmlFree(ctxt->sax); - ctxt->sax = sax; + if (sax->initialized == XML_SAX2_MAGIC) { + *ctxt->sax = *sax; + } else { + memset(ctxt->sax, 0, sizeof(*ctxt->sax)); + memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); + } ctxt->userData = NULL; } xmlParseExtParsedEnt(ctxt); - if (ctxt->wellFormed) + if (ctxt->wellFormed) { ret = ctxt->myDoc; - else { + } else { ret = NULL; xmlFreeDoc(ctxt->myDoc); - ctxt->myDoc = NULL; } - if (sax != NULL) - ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); return(ret); @@ -12645,6 +12603,8 @@ xmlParseEntity(const char *filename) { * @ID: the entity PUBLIC ID * @base: a possible base for the target URI * + * DEPRECATED: Use xmlNewInputURL. + * * Create a parser context for an external entity * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time. @@ -12655,7 +12615,7 @@ xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, const xmlChar *base) { xmlParserCtxtPtr ctxt; - xmlParserInputPtr inputStream; + xmlParserInputPtr input; xmlChar *uri = NULL; ctxt = xmlNewParserCtxt(); @@ -12669,11 +12629,11 @@ xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID, URL = uri; } - inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); - if (inputStream == NULL) + input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt); + if (input == NULL) goto error; - if (inputPush(ctxt, inputStream) < 0) + if (inputPush(ctxt, input) < 0) goto error; xmlFree(uri); @@ -12696,6 +12656,8 @@ error: * @filename: the filename or URL * @options: a combination of xmlParserOption * + * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile. + * * Create a parser context for a file or URL content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time and for file accesses @@ -12706,23 +12668,21 @@ xmlParserCtxtPtr xmlCreateURLParserCtxt(const char *filename, int options) { xmlParserCtxtPtr ctxt; - xmlParserInputPtr inputStream; + xmlParserInputPtr input; ctxt = xmlNewParserCtxt(); if (ctxt == NULL) return(NULL); - if (options) - xmlCtxtUseOptionsInternal(ctxt, options, NULL); + xmlCtxtUseOptions(ctxt, options); ctxt->linenumbers = 1; - inputStream = xmlLoadExternalEntity(filename, NULL, ctxt); - if (inputStream == NULL) { + input = xmlLoadExternalEntity(filename, NULL, ctxt); + if (input == NULL) { xmlFreeParserCtxt(ctxt); return(NULL); } - - inputPush(ctxt, inputStream); + inputPush(ctxt, input); return(ctxt); } @@ -12731,6 +12691,8 @@ xmlCreateURLParserCtxt(const char *filename, int options) * xmlCreateFileParserCtxt: * @filename: the filename * + * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile. + * * Create a parser context for a file content. * Automatic support for ZLIB/Compress compressed document is provided * by default if found at compile-time. @@ -12771,16 +12733,17 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, xmlDocPtr ret; xmlParserCtxtPtr ctxt; - xmlInitParser(); - ctxt = xmlCreateFileParserCtxt(filename); if (ctxt == NULL) { return(NULL); } if (sax != NULL) { - if (ctxt->sax != NULL) - xmlFree(ctxt->sax); - ctxt->sax = sax; + if (sax->initialized == XML_SAX2_MAGIC) { + *ctxt->sax = *sax; + } else { + memset(ctxt->sax, 0, sizeof(*ctxt->sax)); + memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); + } } xmlDetectSAX2(ctxt); if (data!=NULL) { @@ -12789,24 +12752,8 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, ctxt->recovery = recovery; - xmlParseDocument(ctxt); + ret = xmlCtxtParseDocument(ctxt); - if ((ctxt->wellFormed) || recovery) { - ret = ctxt->myDoc; - if ((ret != NULL) && (ctxt->input->buf != NULL)) { - if (ctxt->input->buf->compressed > 0) - ret->compression = 9; - else - ret->compression = ctxt->input->buf->compressed; - } - } - else { - ret = NULL; - xmlFreeDoc(ctxt->myDoc); - ctxt->myDoc = NULL; - } - if (sax != NULL) - ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); return(ret); @@ -12914,15 +12861,9 @@ xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer, xmlClearParserCtxt(ctxt); - input = xmlNewInputStream(ctxt); + input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0); if (input == NULL) return; - - if (filename != NULL) - input->filename = (char *) xmlCanonicPath((const xmlChar *)filename); - input->base = buffer; - input->cur = buffer; - input->end = &buffer[xmlStrlen(buffer)]; inputPush(ctxt, input); } @@ -12948,9 +12889,12 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, ctxt = xmlCreateFileParserCtxt(filename); if (ctxt == NULL) return -1; if (sax != NULL) { - if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) - xmlFree(ctxt->sax); - ctxt->sax = sax; + if (sax->initialized == XML_SAX2_MAGIC) { + *ctxt->sax = *sax; + } else { + memset(ctxt->sax, 0, sizeof(*ctxt->sax)); + memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); + } ctxt->userData = user_data; } xmlDetectSAX2(ctxt); @@ -12995,35 +12939,21 @@ xmlParserCtxtPtr xmlCreateMemoryParserCtxt(const char *buffer, int size) { xmlParserCtxtPtr ctxt; xmlParserInputPtr input; - xmlParserInputBufferPtr buf; - if (buffer == NULL) - return(NULL); - if (size <= 0) + if (size < 0) return(NULL); ctxt = xmlNewParserCtxt(); if (ctxt == NULL) return(NULL); - buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); - if (buf == NULL) { - xmlFreeParserCtxt(ctxt); - return(NULL); - } - - input = xmlNewInputStream(ctxt); + input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0); if (input == NULL) { - xmlFreeParserInputBuffer(buf); xmlFreeParserCtxt(ctxt); return(NULL); } - - input->filename = NULL; - input->buf = buf; - xmlBufResetInput(input->buf->buffer, input); - inputPush(ctxt, input); + return(ctxt); } @@ -13055,14 +12985,15 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, xmlDocPtr ret; xmlParserCtxtPtr ctxt; - xmlInitParser(); - ctxt = xmlCreateMemoryParserCtxt(buffer, size); if (ctxt == NULL) return(NULL); if (sax != NULL) { - if (ctxt->sax != NULL) - xmlFree(ctxt->sax); - ctxt->sax = sax; + if (sax->initialized == XML_SAX2_MAGIC) { + *ctxt->sax = *sax; + } else { + memset(ctxt->sax, 0, sizeof(*ctxt->sax)); + memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); + } } xmlDetectSAX2(ctxt); if (data!=NULL) { @@ -13071,16 +13002,8 @@ xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer, ctxt->recovery = recovery; - xmlParseDocument(ctxt); + ret = xmlCtxtParseDocument(ctxt); - if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc; - else { - ret = NULL; - xmlFreeDoc(ctxt->myDoc); - ctxt->myDoc = NULL; - } - if (sax != NULL) - ctxt->sax = NULL; xmlFreeParserCtxt(ctxt); return(ret); @@ -13160,14 +13083,15 @@ int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data, int ret = 0; xmlParserCtxtPtr ctxt; - xmlInitParser(); - ctxt = xmlCreateMemoryParserCtxt(buffer, size); if (ctxt == NULL) return -1; if (sax != NULL) { - if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) - xmlFree(ctxt->sax); - ctxt->sax = sax; + if (sax->initialized == XML_SAX2_MAGIC) { + *ctxt->sax = *sax; + } else { + memset(ctxt->sax, 0, sizeof(*ctxt->sax)); + memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1)); + } ctxt->userData = user_data; } xmlDetectSAX2(ctxt); @@ -13204,33 +13128,18 @@ xmlParserCtxtPtr xmlCreateDocParserCtxt(const xmlChar *str) { xmlParserCtxtPtr ctxt; xmlParserInputPtr input; - xmlParserInputBufferPtr buf; - - if (str == NULL) - return(NULL); ctxt = xmlNewParserCtxt(); if (ctxt == NULL) return(NULL); - buf = xmlParserInputBufferCreateString(str); - if (buf == NULL) { - xmlFreeParserCtxt(ctxt); - return(NULL); - } - - input = xmlNewInputStream(ctxt); + input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0); if (input == NULL) { - xmlFreeParserInputBuffer(buf); xmlFreeParserCtxt(ctxt); return(NULL); } - - input->filename = NULL; - input->buf = buf; - xmlBufResetInput(input->buf->buffer, input); - inputPush(ctxt, input); + return(ctxt); } @@ -13433,81 +13342,38 @@ int xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, int size, const char *filename, const char *encoding) { - xmlParserInputPtr inputStream; - xmlParserInputBufferPtr buf; + xmlParserInputPtr input; if (ctxt == NULL) return(1); - buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); - if (buf == NULL) - return(1); - - if (ctxt == NULL) { - xmlFreeParserInputBuffer(buf); - return(1); - } - xmlCtxtReset(ctxt); - inputStream = xmlNewInputStream(ctxt); - if (inputStream == NULL) { - xmlFreeParserInputBuffer(buf); + input = xmlNewInputPush(ctxt, filename, chunk, size, encoding); + if (input == NULL) return(1); - } - - if (filename == NULL) - inputStream->filename = NULL; - else - inputStream->filename = (char *) - xmlCanonicPath((const xmlChar *) filename); - inputStream->buf = buf; - xmlBufResetInput(buf->buffer, inputStream); - - inputPush(ctxt, inputStream); - - if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && - (ctxt->input->buf != NULL)) { - size_t pos = ctxt->input->cur - ctxt->input->base; - int res; - - res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); - if (res < 0) { - xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL); - xmlHaltParser(ctxt); - return(1); - } - } - - if (encoding != NULL) - xmlSwitchEncodingName(ctxt, encoding); + inputPush(ctxt, input); return(0); } /** - * xmlCtxtUseOptionsInternal: + * xmlCtxtUseOptions: * @ctxt: an XML parser context * @options: a combination of xmlParserOption - * @encoding: the user provided encoding to use * * Applies the options to the parser context * * Returns 0 in case of success, the set of unknown or unimplemented options * in case of error. */ -static int -xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding) +int +xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) { if (ctxt == NULL) return(-1); - if (encoding != NULL) { - if (ctxt->encoding != NULL) - xmlFree((xmlChar *) ctxt->encoding); - ctxt->encoding = xmlStrdup((const xmlChar *) encoding); - } + if (options & XML_PARSE_RECOVER) { ctxt->recovery = 1; options -= XML_PARSE_RECOVER; @@ -13622,22 +13488,6 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi return (options); } -/** - * xmlCtxtUseOptions: - * @ctxt: an XML parser context - * @options: a combination of xmlParserOption - * - * Applies the options to the parser context - * - * Returns 0 in case of success, the set of unknown or unimplemented options - * in case of error. - */ -int -xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) -{ - return(xmlCtxtUseOptionsInternal(ctxt, options, NULL)); -} - /** * xmlCtxtSetMaxAmplification: * @ctxt: an XML parser context @@ -13658,87 +13508,84 @@ xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl) } /** - * xmlDoRead: + * xmlCtxtParseDocument: * @ctxt: an XML parser context - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL - * @options: a combination of xmlParserOption - * @reuse: keep the context for reuse * - * Common front-end for the xmlRead functions + * Parse an XML document and return the resulting document tree. * * Returns the resulting document tree or NULL */ -static xmlDocPtr -xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding, - int options, int reuse) +xmlDocPtr +xmlCtxtParseDocument(xmlParserCtxtPtr ctxt) { xmlDocPtr ret = NULL; - xmlCtxtUseOptionsInternal(ctxt, options, encoding); - if (encoding != NULL) - xmlSwitchEncodingName(ctxt, encoding); - if ((URL != NULL) && (ctxt->input != NULL) && - (ctxt->input->filename == NULL)) { - ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL); - if (ctxt->input->filename == NULL) { - xmlErrMemory(ctxt); - goto error; - } - } xmlParseDocument(ctxt); + if ((ctxt->wellFormed) || ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) { ret = ctxt->myDoc; } else { ret = NULL; - if (ctxt->myDoc != NULL) { - xmlFreeDoc(ctxt->myDoc); - } + xmlFreeDoc(ctxt->myDoc); } ctxt->myDoc = NULL; -error: - if (!reuse) { - xmlFreeParserCtxt(ctxt); - } - - return (ret); + return(ret); } /** * xmlReadDoc: * @cur: a pointer to a zero terminated string - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL + * @URL: base URL (optional) + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * - * parse an XML in-memory document and build a tree. + * Convenience function to parse an XML document from a + * zero-terminated string. + * + * See xmlCtxtReadDoc for details. * * Returns the resulting document tree */ xmlDocPtr -xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options) +xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding, + int options) { xmlParserCtxtPtr ctxt; + xmlParserInputPtr input; + xmlDocPtr doc; - if (cur == NULL) - return (NULL); - xmlInitParser(); - - ctxt = xmlCreateDocParserCtxt(cur); + ctxt = xmlNewParserCtxt(); if (ctxt == NULL) - return (NULL); - return (xmlDoRead(ctxt, URL, encoding, options, 0)); + return(NULL); + + xmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding, + XML_INPUT_BUF_STATIC); + if (input == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + inputPush(ctxt, input); + + doc = xmlCtxtParseDocument(ctxt); + + xmlFreeParserCtxt(ctxt); + return(doc); } /** * xmlReadFile: * @filename: a file or URL - * @encoding: the document encoding, or NULL + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * - * parse an XML file from the filesystem or the network. + * Convenience function to parse an XML file from the filesystem, + * the network or a global user-define resource loader. + * + * See xmlCtxtReadFile for details. * * Returns the resulting document tree */ @@ -13746,49 +13593,87 @@ xmlDocPtr xmlReadFile(const char *filename, const char *encoding, int options) { xmlParserCtxtPtr ctxt; + xmlParserInputPtr input; + xmlDocPtr doc; - xmlInitParser(); - ctxt = xmlCreateURLParserCtxt(filename, options); + ctxt = xmlNewParserCtxt(); if (ctxt == NULL) - return (NULL); - return (xmlDoRead(ctxt, NULL, encoding, options, 0)); + return(NULL); + + xmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0); + if (input == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + inputPush(ctxt, input); + + doc = xmlCtxtParseDocument(ctxt); + + xmlFreeParserCtxt(ctxt); + return(doc); } /** * xmlReadMemory: * @buffer: a pointer to a char array * @size: the size of the array - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL + * @url: base URL (optional) + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * * Parse an XML in-memory document and build a tree. The input buffer must * not contain a terminating null byte. * + * See xmlCtxtReadMemory for details. + * * Returns the resulting document tree */ xmlDocPtr -xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options) +xmlReadMemory(const char *buffer, int size, const char *url, + const char *encoding, int options) { xmlParserCtxtPtr ctxt; + xmlParserInputPtr input; + xmlDocPtr doc; - xmlInitParser(); - ctxt = xmlCreateMemoryParserCtxt(buffer, size); + if (size < 0) + return(NULL); + + ctxt = xmlNewParserCtxt(); if (ctxt == NULL) - return (NULL); - return (xmlDoRead(ctxt, URL, encoding, options, 0)); + return(NULL); + + xmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputMemory(ctxt, url, buffer, size, encoding, + XML_INPUT_BUF_STATIC); + if (input == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + inputPush(ctxt, input); + + doc = xmlCtxtParseDocument(ctxt); + + xmlFreeParserCtxt(ctxt); + return(doc); } /** * xmlReadFd: * @fd: an open file descriptor - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL + * @URL: base URL (optional) + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * - * parse an XML from a file descriptor and build a tree. + * Parse an XML from a file descriptor and build a tree. + * + * See xmlCtxtReadFd for details. + * * NOTE that the file descriptor will not be closed when the - * reader is closed or reset. + * context is freed or reset. * * Returns the resulting document tree */ @@ -13796,42 +13681,41 @@ xmlDocPtr xmlReadFd(int fd, const char *URL, const char *encoding, int options) { xmlParserCtxtPtr ctxt; - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; + xmlDocPtr doc; - if (fd < 0) - return (NULL); - xmlInitParser(); - - input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); - if (input == NULL) - return (NULL); - input->closecallback = NULL; ctxt = xmlNewParserCtxt(); - if (ctxt == NULL) { - xmlFreeParserInputBuffer(input); - return (NULL); - } - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); + if (ctxt == NULL) + return(NULL); + + xmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputFd(ctxt, URL, fd, encoding, 0); + if (input == NULL) { xmlFreeParserCtxt(ctxt); - return (NULL); + return(NULL); } - inputPush(ctxt, stream); - return (xmlDoRead(ctxt, URL, encoding, options, 0)); + input->buf->closecallback = NULL; + inputPush(ctxt, input); + + doc = xmlCtxtParseDocument(ctxt); + + xmlFreeParserCtxt(ctxt); + return(doc); } /** * xmlReadIO: * @ioread: an I/O read function - * @ioclose: an I/O close function + * @ioclose: an I/O close function (optional) * @ioctx: an I/O handler - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL + * @URL: base URL (optional) + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * - * parse an XML document from I/O functions and source and build a tree. + * Parse an XML document from I/O functions and context and build a tree. + * + * See xmlCtxtReadIO for details. * * Returns the resulting document tree */ @@ -13840,45 +13724,44 @@ xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, void *ioctx, const char *URL, const char *encoding, int options) { xmlParserCtxtPtr ctxt; - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; + xmlDocPtr doc; - if (ioread == NULL) - return (NULL); - xmlInitParser(); + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) + return(NULL); - input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, - XML_CHAR_ENCODING_NONE); + xmlCtxtUseOptions(ctxt, options); + + input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0); if (input == NULL) { + xmlFreeParserCtxt(ctxt); if (ioclose != NULL) ioclose(ioctx); return (NULL); } - ctxt = xmlNewParserCtxt(); - if (ctxt == NULL) { - xmlFreeParserInputBuffer(input); - return (NULL); - } - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - xmlFreeParserCtxt(ctxt); - return (NULL); - } - inputPush(ctxt, stream); - return (xmlDoRead(ctxt, URL, encoding, options, 0)); + inputPush(ctxt, input); + + doc = xmlCtxtParseDocument(ctxt); + + xmlFreeParserCtxt(ctxt); + return(doc); } /** * xmlCtxtReadDoc: * @ctxt: an XML parser context * @str: a pointer to a zero terminated string - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL + * @URL: base URL (optional) + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * - * parse an XML in-memory document and build a tree. - * This reuses the existing @ctxt parser context + * Parse an XML in-memory document and build a tree. + * + * @URL is used as base to resolve external entities and for error + * reporting. + * + * See xmlCtxtUseOptions for details. * * Returns the resulting document tree */ @@ -13886,41 +13769,34 @@ xmlDocPtr xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str, const char *URL, const char *encoding, int options) { - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; if (ctxt == NULL) - return (NULL); - if (str == NULL) - return (NULL); - xmlInitParser(); + return(NULL); xmlCtxtReset(ctxt); + xmlCtxtUseOptions(ctxt, options); - input = xmlParserInputBufferCreateString(str); - if (input == NULL) { + input = xmlNewInputString(ctxt, URL, (const char *) str, encoding, + XML_INPUT_BUF_STATIC); + if (input == NULL) return(NULL); - } + inputPush(ctxt, input); - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - return(NULL); - } - - inputPush(ctxt, stream); - return (xmlDoRead(ctxt, URL, encoding, options, 1)); + return(xmlCtxtParseDocument(ctxt)); } /** * xmlCtxtReadFile: * @ctxt: an XML parser context * @filename: a file or URL - * @encoding: the document encoding, or NULL + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * - * parse an XML file from the filesystem or the network. - * This reuses the existing @ctxt parser context + * Parse an XML file from the filesystem, the network or a user-defined + * resource loader. + * + * See xmlNewInputURL and xmlCtxtUseOptions for details. * * Returns the resulting document tree */ @@ -13928,22 +13804,20 @@ xmlDocPtr xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, const char *encoding, int options) { - xmlParserInputPtr stream; + xmlParserInputPtr input; - if (filename == NULL) - return (NULL); if (ctxt == NULL) - return (NULL); - xmlInitParser(); + return(NULL); xmlCtxtReset(ctxt); + xmlCtxtUseOptions(ctxt, options); - stream = xmlLoadExternalEntity(filename, NULL, ctxt); - if (stream == NULL) { - return (NULL); - } - inputPush(ctxt, stream); - return (xmlDoRead(ctxt, NULL, encoding, options, 1)); + input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0); + if (input == NULL) + return(NULL); + inputPush(ctxt, input); + + return(xmlCtxtParseDocument(ctxt)); } /** @@ -13951,13 +13825,17 @@ xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename, * @ctxt: an XML parser context * @buffer: a pointer to a char array * @size: the size of the array - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL + * @URL: base URL (optional) + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * * Parse an XML in-memory document and build a tree. The input buffer must * not contain a terminating null byte. - * This reuses the existing @ctxt parser context + * + * @URL is used as base to resolve external entities and for error + * reporting. + * + * See xmlCtxtUseOptions for details. * * Returns the resulting document tree */ @@ -13965,46 +13843,40 @@ xmlDocPtr xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size, const char *URL, const char *encoding, int options) { - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; - if (ctxt == NULL) - return (NULL); - if (buffer == NULL) - return (NULL); - xmlInitParser(); + if ((ctxt == NULL) || (size < 0)) + return(NULL); xmlCtxtReset(ctxt); + xmlCtxtUseOptions(ctxt, options); - input = xmlParserInputBufferCreateStatic(buffer, size, - XML_CHAR_ENCODING_NONE); - if (input == NULL) { - xmlErrMemory(ctxt); + input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding, + XML_INPUT_BUF_STATIC); + if (input == NULL) return(NULL); - } + inputPush(ctxt, input); - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - return(NULL); - } - - inputPush(ctxt, stream); - return (xmlDoRead(ctxt, URL, encoding, options, 1)); + return(xmlCtxtParseDocument(ctxt)); } /** * xmlCtxtReadFd: * @ctxt: an XML parser context * @fd: an open file descriptor - * @URL: the base URL to use for the document - * @encoding: the document encoding, or NULL + * @URL: base URL (optional) + * @encoding: the document encoding (optional) * @options: a combination of xmlParserOption * - * parse an XML from a file descriptor and build a tree. - * This reuses the existing @ctxt parser context + * Parse an XML document from a file descriptor and build a tree. + * * NOTE that the file descriptor will not be closed when the - * reader is closed or reset. + * context is freed or reset. + * + * @URL is used as base to resolve external entities and for error + * reporting. + * + * See xmlCtxtUseOptions for details. * * Returns the resulting document tree */ @@ -14012,29 +13884,21 @@ xmlDocPtr xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, const char *URL, const char *encoding, int options) { - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; - if (fd < 0) - return (NULL); if (ctxt == NULL) - return (NULL); - xmlInitParser(); + return(NULL); xmlCtxtReset(ctxt); + xmlCtxtUseOptions(ctxt, options); - - input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); + input = xmlNewInputFd(ctxt, URL, fd, encoding, 0); if (input == NULL) return (NULL); - input->closecallback = NULL; - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - return (NULL); - } - inputPush(ctxt, stream); - return (xmlDoRead(ctxt, URL, encoding, options, 1)); + input->buf->closecallback = NULL; + inputPush(ctxt, input); + + return(xmlCtxtParseDocument(ctxt)); } /** @@ -14050,6 +13914,11 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, * parse an XML document from I/O functions and source and build a tree. * This reuses the existing @ctxt parser context * + * @URL is used as base to resolve external entities and for error + * reporting. + * + * See xmlCtxtUseOptions for details. + * * Returns the resulting document tree */ xmlDocPtr @@ -14058,30 +13927,22 @@ xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread, const char *URL, const char *encoding, int options) { - xmlParserInputBufferPtr input; - xmlParserInputPtr stream; + xmlParserInputPtr input; - if (ioread == NULL) - return (NULL); if (ctxt == NULL) - return (NULL); - xmlInitParser(); + return(NULL); xmlCtxtReset(ctxt); + xmlCtxtUseOptions(ctxt, options); - input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, - XML_CHAR_ENCODING_NONE); + input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0); if (input == NULL) { if (ioclose != NULL) ioclose(ioctx); return (NULL); } - stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); - if (stream == NULL) { - xmlFreeParserInputBuffer(input); - return (NULL); - } - inputPush(ctxt, stream); - return (xmlDoRead(ctxt, URL, encoding, options, 1)); + inputPush(ctxt, input); + + return(xmlCtxtParseDocument(ctxt)); } diff --git a/parserInternals.c b/parserInternals.c index 0a915c32..cc37770d 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -309,23 +309,6 @@ xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain, va_end(ap); } -/** - * xmlErrInternal: - * @ctxt: an XML parser context - * @msg: the error message - * @str: error information - * - * Handle an internal error - */ -static void LIBXML_ATTR_FORMAT(2,0) -xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) -{ - if (ctxt == NULL) - return; - xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, - XML_ERR_FATAL, str, NULL, NULL, 0, msg, str); -} - /** * xmlFatalErr: * @ctxt: an XML parser context @@ -1084,6 +1067,32 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) return(ret); } +/** + * xmlSwitchEncodingName: + * @ctxt: the parser context, only for error reporting + * @input: the input strea, + * @encoding: the encoding name + * + * Returns 0 in case of success, -1 otherwise + */ +static int +xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, + const char *encoding) { + xmlCharEncodingHandlerPtr handler; + int res; + + if (encoding == NULL) + return(-1); + + res = xmlOpenCharEncodingHandler(encoding, &handler); + if (res != 0) { + xmlFatalErr(ctxt, res, encoding); + return(-1); + } + + return(xmlSwitchInputEncoding(ctxt, input, handler)); +} + /** * xmlSwitchEncodingName: * @ctxt: the parser context @@ -1101,24 +1110,12 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) */ int xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) { - xmlCharEncodingHandlerPtr handler; - int res; - - if (encoding == NULL) - return(-1); - - res = xmlOpenCharEncodingHandler(encoding, &handler); - if (res != 0) { - xmlFatalErr(ctxt, res, encoding); - return(-1); - } - - return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler)); + return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding)); } /** * xmlSwitchInputEncoding: - * @ctxt: the parser context + * @ctxt: the parser context, only for error reporting * @input: the input stream * @handler: the encoding handler * @@ -1467,36 +1464,341 @@ xmlNewInputStream(xmlParserCtxtPtr ctxt) { return(input); } +/** + * xmlNewInputURL: + * @ctxt: parser context + * @url: filename or URL + * @publicId: publid ID from doctype (optional) + * @encoding: character encoding (optional) + * @flags: unused, pass 0 + * + * Creates a new parser input from the filesystem, the network or + * a user-defined resource loader. + * + * @url is a filename or URL. If if contains the substring "://", + * it is assumed to be a Legacy Extended IRI. Otherwise, it is + * treated as a filesystem path. + * + * @publicId is an optional XML public ID, typically from a doctype + * declaration. It is used for catalog lookups. + * + * If @encoding is specified, it will override any encodings found + * in XML declarations, text declarations, BOMs, etc. Pass NULL + * for auto-detection. + * + * The following resource loaders will be called if they were + * registered (in order of precedence): + * + * - the global external entity loader set with + * xmlSetExternalEntityLoader + * - the per-thread xmlParserInputBufferCreateFilenameFunc set with + * xmlParserInputBufferCreateFilenameDefault + * - the default loader which will return + * - the result from a matching global input callback set with + * xmlRegisterInputCallbacks + * - a HTTP resource if support is compiled in. + * - a file opened from the filesystem, with automatic detection + * of compressed files if support is compiled in. + * + * The returned input should be push onto the input stack with + * xmlPushInput. + * + * This function should not be invoked from user-defined resource + * loaders to avoid infinite loops. + * + * Returns a new parser input. + */ +xmlParserInputPtr +xmlNewInputURL(xmlParserCtxtPtr ctxt, const char *url, const char *publicId, + const char *encoding, int flags ATTRIBUTE_UNUSED) { + xmlParserInputPtr input; + + if ((ctxt == NULL) || (url == NULL)) + return(NULL); + + input = xmlLoadExternalEntity(url, publicId, ctxt); + if (input == NULL) + return(NULL); + + if (encoding != NULL) + xmlSwitchInputEncodingName(ctxt, input, encoding); + + return(input); +} + +/** + * xmlNewInputInternal: + * @ctxt: parser context + * @buf: parser input buffer + * @filename: filename or URL + * @encoding: character encoding (optional) + * + * Internal helper function. + * + * Returns a new parser input. + */ +static xmlParserInputPtr +xmlNewInputInternal(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf, + const char *filename, const char *encoding) { + xmlParserInputPtr input; + + input = xmlNewInputStream(ctxt); + if (input == NULL) { + xmlFreeParserInputBuffer(buf); + return(NULL); + } + + input->buf = buf; + xmlBufResetInput(input->buf->buffer, input); + + if (filename != NULL) { + input->filename = xmlMemStrdup(filename); + if (input->filename == NULL) { + xmlCtxtErrMemory(ctxt); + xmlFreeInputStream(input); + return(NULL); + } + } + + if (encoding != NULL) { + if (xmlSwitchInputEncodingName(ctxt, input, encoding) < 0) { + xmlFreeInputStream(input); + return(NULL); + } + } + + return(input); +} + +/** + * xmlNewInputMemory: + * @ctxt: parser context + * @url: base URL (optional) + * @mem: pointer to char array + * @size: size of array + * @encoding: character encoding (optional) + * @flags: optimization hints + * + * Creates a new parser input to read from a memory area. + * + * @url is used as base to resolve external entities and for + * error reporting. + * + * If the XML_INPUT_BUF_STATIC flag is set, the memory area must + * stay unchanged until parsing has finished. This can avoid + * temporary copies. + * + * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory + * area must contain a zero byte after the buffer at position @size. + * This can avoid temporary copies. + * + * Returns a new parser input. + */ +xmlParserInputPtr +xmlNewInputMemory(xmlParserCtxtPtr ctxt, const char *url, + const void *mem, size_t size, + const char *encoding, int flags) { + xmlParserInputBufferPtr buf; + + if ((ctxt == NULL) || (mem == NULL)) + return(NULL); + + buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE); + if (buf == NULL) { + xmlCtxtErrMemory(ctxt); + return(NULL); + } + + return(xmlNewInputInternal(ctxt, buf, url, encoding)); +} + +/** + * xmlNewInputString: + * @ctxt: parser context + * @url: base URL (optional) + * @str: zero-terminated string + * @encoding: character encoding (optional) + * @flags: optimization hints + * + * Creates a new parser input to read from a zero-terminated string. + * + * @url is used as base to resolve external entities and for + * error reporting. + * + * If the XML_INPUT_BUF_STATIC flag is set, the string must + * stay unchanged until parsing has finished. This can avoid + * temporary copies. + * + * Returns a new parser input. + */ +xmlParserInputPtr +xmlNewInputString(xmlParserCtxtPtr ctxt, const char *url, + const char *str, const char *encoding, int flags) { + xmlParserInputBufferPtr buf; + + if ((ctxt == NULL) || (str == NULL)) + return(NULL); + + buf = xmlNewInputBufferString(str, flags); + if (buf == NULL) { + xmlCtxtErrMemory(ctxt); + return(NULL); + } + + return(xmlNewInputInternal(ctxt, buf, url, encoding)); +} + +/** + * xmlNewInputFd: + * @ctxt: parser context + * @url: base URL (optional) + * @fd: file descriptor + * @encoding: character encoding (optional) + * @flags: unused, pass 0 + * + * Creates a new parser input to read from a zero-terminated string. + * + * @url is used as base to resolve external entities and for + * error reporting. + * + * @fd is closed after parsing has finished. + * + * Returns a new parser input. + */ +xmlParserInputPtr +xmlNewInputFd(xmlParserCtxtPtr ctxt, const char *url, + int fd, const char *encoding, int flags ATTRIBUTE_UNUSED) { + xmlParserInputBufferPtr buf; + + if ((ctxt == NULL) || (fd < 0)) + return(NULL); + + buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); + if (buf == NULL) { + xmlCtxtErrMemory(ctxt); + return(NULL); + } + + return(xmlNewInputInternal(ctxt, buf, url, encoding)); +} + +/** + * xmlNewInputIO: + * @ctxt: parser context + * @url: base URL (optional) + * @ioRead: read callback + * @ioClose: close callback (optional) + * @ioCtxt: IO context + * @encoding: character encoding (optional) + * @flags: unused, pass 0 + * + * Creates a new parser input to read from input callbacks and + * cintext. + * + * @url is used as base to resolve external entities and for + * error reporting. + * + * @ioRead is called to read new data into a provided buffer. + * It must return the number of bytes written into the buffer + * ot a negative xmlParserErrors code on failure. + * + * @ioClose is called after parsing has finished. + * + * @ioCtxt is an opaque pointer passed to the callbacks. + * + * Returns a new parser input. + */ +xmlParserInputPtr +xmlNewInputIO(xmlParserCtxtPtr ctxt, const char *url, + xmlInputReadCallback ioRead, xmlInputCloseCallback ioClose, + void *ioCtxt, + const char *encoding, int flags ATTRIBUTE_UNUSED) { + xmlParserInputBufferPtr buf; + + if ((ctxt == NULL) || (ioRead == NULL)) + return(NULL); + + buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); + if (buf == NULL) { + xmlCtxtErrMemory(ctxt); + return(NULL); + } + + buf->context = ioCtxt; + buf->readcallback = ioRead; + buf->closecallback = ioClose; + + return(xmlNewInputInternal(ctxt, buf, url, encoding)); +} + +/** + * xmlNewInputPush: + * @ctxt: parser context + * @url: base URL (optional) + * @chunk: pointer to char array + * @size: size of array + * @encoding: character encoding (optional) + * + * Creates a new parser input for a push parser. + * + * Returns a new parser input. + */ +xmlParserInputPtr +xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url, + const char *chunk, int size, const char *encoding) { + xmlParserInputBufferPtr buf; + xmlParserInputPtr input; + + buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); + if (buf == NULL) { + xmlCtxtErrMemory(ctxt); + return(NULL); + } + + input = xmlNewInputInternal(ctxt, buf, url, encoding); + if (input == NULL) + return(NULL); + + input->flags |= XML_INPUT_PROGRESSIVE; + + if ((size > 0) && (chunk != NULL)) { + int res; + + res = xmlParserInputBufferPush(input->buf, size, chunk); + xmlBufResetInput(input->buf->buffer, input); + if (res < 0) { + xmlCtxtErrIO(ctxt, input->buf->error, NULL); + xmlFreeInputStream(input); + return(NULL); + } + } + + return(input); +} + /** * xmlNewIOInputStream: * @ctxt: an XML parser context * @input: an I/O Input * @enc: the charset encoding if known * + * DEPRECATED: Use xmlNewInputURL, xmlNewInputMemory, etc. + * * Create a new input stream structure encapsulating the @input into * a stream suitable for the parser. * * Returns the new input stream or NULL */ xmlParserInputPtr -xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, +xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf, xmlCharEncoding enc) { - xmlParserInputPtr inputStream; + const char *encoding; - if (input == NULL) return(NULL); - inputStream = xmlNewInputStream(ctxt); - if (inputStream == NULL) { - return(NULL); - } - inputStream->filename = NULL; - inputStream->buf = input; - xmlBufResetInput(inputStream->buf->buffer, inputStream); + if (buf == NULL) + return(NULL); - if (enc != XML_CHAR_ENCODING_NONE) { - xmlSwitchEncoding(ctxt, enc); - } - - return(inputStream); + encoding = xmlGetCharEncodingName(enc); + return(xmlNewInputInternal(ctxt, buf, NULL, encoding)); } /** @@ -1518,14 +1820,20 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) { return(NULL); if (ent->content != NULL) { - input = xmlNewStringInputStream(ctxt, ent->content); + input = xmlNewInputString(ctxt, NULL, (const char *) ent->content, + NULL, XML_INPUT_BUF_STATIC); } else if (ent->URI != NULL) { input = xmlLoadExternalEntity((char *) ent->URI, (char *) ent->ExternalID, ctxt); } else { - input = xmlNewStringInputStream(ctxt, ""); + input = xmlNewInputMemory(ctxt, NULL, "", 0, NULL, + XML_INPUT_BUF_STATIC | + XML_INPUT_BUF_ZERO_TERMINATED); } + if (input == NULL) + return(NULL); + input->entity = ent; return(input); @@ -1536,35 +1844,18 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) { * @ctxt: an XML parser context * @buffer: an memory buffer * + * DEPRECATED: Use xmlNewInputString. + * * Create a new input stream based on a memory buffer. + * * Returns the new input stream */ xmlParserInputPtr xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { - xmlParserInputPtr input; - xmlParserInputBufferPtr buf; - - if (buffer == NULL) { - xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", - NULL); - return(NULL); - } - buf = xmlParserInputBufferCreateString(buffer); - if (buf == NULL) { - xmlCtxtErrMemory(ctxt); - return(NULL); - } - input = xmlNewInputStream(ctxt); - if (input == NULL) { - xmlCtxtErrMemory(ctxt); - xmlFreeParserInputBuffer(buf); - return(NULL); - } - input->buf = buf; - xmlBufResetInput(input->buf->buffer, input); - return(input); + return(xmlNewInputString(ctxt, NULL, (const char *) buffer, NULL, 0)); } + /**************************************************************** * * * External entities loading * @@ -1716,6 +2007,8 @@ xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) { * @ctxt: an XML parser context * @filename: the filename to use as entity * + * DEPRECATED: Use xmlNewInputURL. + * * Create a new input stream based on a file or an URL. * * Returns the new input stream or NULL in case of error @@ -1891,8 +2184,7 @@ xmlGetExternalEntityLoader(void) { * @ID: the Public ID for the entity to load * @ctxt: the context in which the entity is called or NULL * - * Load an external entity, note that the use of this function for - * unparsed entities may generate problems + * DEPRECATED: Use xmlNewInputURL. * * Returns the xmlParserInputPtr or NULL */ @@ -1942,8 +2234,6 @@ xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax, if (ctxt == NULL) return(-1); - xmlInitParser(); - if (ctxt->dict == NULL) ctxt->dict = xmlDictCreate(); if (ctxt->dict == NULL) @@ -2210,6 +2500,10 @@ xmlNewParserCtxt(void) * Allocate and initialize a new SAX parser context. If userData is NULL, * the parser context will be passed as user data. * + * Available since 2.11.0. If you want support older versions, + * it's best to invoke xmlNewParserCtxt and set ctxt->sax with + * struct assignment. + * * Returns the xmlParserCtxtPtr or NULL if memory allocation failed. */ @@ -2218,6 +2512,8 @@ xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData) { xmlParserCtxtPtr ctxt; + xmlInitParser(); + ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); if (ctxt == NULL) return(NULL); diff --git a/xmlIO.c b/xmlIO.c index 2a89f449..8bd79c5e 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -1913,12 +1913,55 @@ static int xmlMemClose(void *vctxt) { xmlMemIOCtxt *ctxt = vctxt; - if (ctxt->mem != 0) + if (ctxt->mem != NULL) xmlFree(ctxt->mem); xmlFree(ctxt); return(0); } +xmlParserInputBufferPtr +xmlNewInputBufferMemory(const void *mem, size_t size, int flags, + xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + xmlMemIOCtxt *ctxt; + char *copy = NULL; + + if ((flags & XML_INPUT_BUF_STATIC) == 0) { + if (size + 1 == 0) + return(NULL); + copy = xmlMalloc(size + 1); + if (copy == NULL) + return(NULL); + memcpy(copy, mem, size); + copy[size] = 0; + + mem = copy; + } + + ret = xmlAllocParserInputBuffer(enc); + if (ret == NULL) { + xmlFree(copy); + return(NULL); + } + + ctxt = xmlMalloc(sizeof(*ctxt)); + if (ctxt == NULL) { + xmlFreeParserInputBuffer(ret); + xmlFree(copy); + return(NULL); + } + + ctxt->mem = copy; + ctxt->cur = mem; + ctxt->size = size; + + ret->context = ctxt; + ret->readcallback = xmlMemRead; + ret->closecallback = xmlMemClose; + + return(ret); +} + /** * xmlParserInputBufferCreateMem: * @mem: the memory input @@ -1940,27 +1983,10 @@ xmlMemClose(void *vctxt) { */ xmlParserInputBufferPtr xmlParserInputBufferCreateMem(const char *mem, int size, xmlCharEncoding enc) { - xmlParserInputBufferPtr buf; - xmlMemIOCtxt *ctxt; - char *copy; - - if ((size < 0) || (mem == NULL)) + if ((mem == NULL) || (size < 0)) return(NULL); - copy = (char *) xmlStrndup((const xmlChar *) mem, size); - if (copy == NULL) - return(NULL); - - buf = xmlParserInputBufferCreateStatic(copy, size, enc); - if (buf == NULL) { - xmlFree(copy); - return(NULL); - } - - ctxt = buf->context; - ctxt->mem = copy; - - return(buf); + return(xmlNewInputBufferMemory(mem, size, 0, enc)); } /** @@ -1984,40 +2010,20 @@ xmlParserInputBufferCreateMem(const char *mem, int size, xmlCharEncoding enc) { xmlParserInputBufferPtr xmlParserInputBufferCreateStatic(const char *mem, int size, xmlCharEncoding enc) { - xmlParserInputBufferPtr ret; - xmlMemIOCtxt *ctxt; - - if ((size < 0) || (mem == NULL)) + if ((mem == NULL) || (size < 0)) return(NULL); - ret = xmlAllocParserInputBuffer(enc); - if (ret == NULL) - return(NULL); - - ctxt = xmlMalloc(sizeof(*ctxt)); - if (ctxt == NULL) { - xmlFreeParserInputBuffer(ret); - return(NULL); - } - ctxt->mem = NULL; - ctxt->cur = mem; - ctxt->size = size; - - ret->context = ctxt; - ret->readcallback = xmlMemRead; - ret->closecallback = xmlMemClose; - - return(ret); + return(xmlNewInputBufferMemory(mem, size, XML_INPUT_BUF_STATIC, enc)); } typedef struct { - const xmlChar *str; + const char *str; } xmlStringIOCtxt; static int xmlStringRead(void *vctxt, char *buf, int size) { xmlStringIOCtxt *ctxt = vctxt; - const xmlChar *zero; + const char *zero; size_t len; zero = memchr(ctxt->str, 0, size); @@ -2035,21 +2041,14 @@ xmlStringClose(void *vctxt) { return(0); } -/** - * xmlParserInputBufferCreateString: - * @str: a null-terminated string - * - * Create a buffered parser input for the progressive parsing for the input - * from a null-terminated C string. - * - * Returns the new parser input or NULL - */ xmlParserInputBufferPtr -xmlParserInputBufferCreateString(const xmlChar *str) { +xmlNewInputBufferString(const char *str, int flags) { xmlParserInputBufferPtr ret; xmlStringIOCtxt *ctxt; - if (str == NULL) return(NULL); + if ((flags & XML_INPUT_BUF_STATIC) == 0) + return(xmlNewInputBufferMemory(str, strlen(str), flags, + XML_CHAR_ENCODING_NONE)); ret = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); if (ret == NULL) @@ -2060,6 +2059,7 @@ xmlParserInputBufferCreateString(const xmlChar *str) { xmlFreeParserInputBuffer(ret); return(NULL); } + ctxt->str = str; ret->context = ctxt; diff --git a/xmllint.c b/xmllint.c index 0a1baa67..bc8c2851 100644 --- a/xmllint.c +++ b/xmllint.c @@ -1776,7 +1776,6 @@ static void streamFile(char *filename) { int fd = -1; struct stat info; const char *base = NULL; - xmlParserInputBufferPtr input = NULL; if (memory) { if (stat(filename, &info) < 0) @@ -1928,7 +1927,6 @@ static void streamFile(char *filename) { #endif #ifdef HAVE_MMAP if (memory) { - xmlFreeParserInputBuffer(input); munmap((char *) base, info.st_size); close(fd); }