diff --git a/ChangeLog b/ChangeLog index 85a5a9de..455328f2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Sun Sep 17 17:58:37 CEST 2000 Daniel Veillard + + * SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c: + removed a few warnings in pedantic mode ... + * parserInternals.c parser.c: moved encoding switching function + to parserInternals.c + * configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3 + Sat Sep 16 20:12:41 CEST 2000 Daniel Veillard * HTMLparser.c parser.c: set ctxt->errNo before calling the diff --git a/SAX.c b/SAX.c index 3d1475bd..b63ed2df 100644 --- a/SAX.c +++ b/SAX.c @@ -312,7 +312,7 @@ resolveEntity(void *ctx, const xmlChar *publicId, const xmlChar *systemId) { xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; xmlParserInputPtr ret; - char *URI; + xmlChar *URI; const char *base = NULL; if (ctxt->input != NULL) @@ -320,7 +320,7 @@ resolveEntity(void *ctx, const xmlChar *publicId, const xmlChar *systemId) if (base == NULL) base = ctxt->directory; - URI = xmlBuildURI(systemId, base); + URI = xmlBuildURI(systemId, (const xmlChar *) base); #ifdef DEBUG_SAX fprintf(stderr, "SAX.resolveEntity(%s, %s)\n", publicId, systemId); @@ -423,7 +423,7 @@ entityDecl(void *ctx, const xmlChar *name, int type, ctxt->sax->warning(ctxt, "Entity(%s) already defined in the internal subset\n", name); if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) { - char *URI; + xmlChar *URI; const char *base = NULL; if (ctxt->input != NULL) @@ -431,7 +431,7 @@ entityDecl(void *ctx, const xmlChar *name, int type, if (base == NULL) base = ctxt->directory; - URI = xmlBuildURI(systemId, base); + URI = xmlBuildURI(systemId, (const xmlChar *) base); ent->URI = URI; } } else if (ctxt->inSubset == 2) { @@ -442,7 +442,7 @@ entityDecl(void *ctx, const xmlChar *name, int type, ctxt->sax->warning(ctxt, "Entity(%s) already defined in the external subset\n", name); if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) { - char *URI; + xmlChar *URI; const char *base = NULL; if (ctxt->input != NULL) @@ -450,7 +450,7 @@ entityDecl(void *ctx, const xmlChar *name, int type, if (base == NULL) base = ctxt->directory; - URI = xmlBuildURI(systemId, base); + URI = xmlBuildURI(systemId, (const xmlChar *) base); ent->URI = URI; } } else { diff --git a/config.h.in b/config.h.in index 7a7587a9..01bb4756 100644 --- a/config.h.in +++ b/config.h.in @@ -27,6 +27,9 @@ /* Define if you have the fpclass function. */ #undef HAVE_FPCLASS +/* Define if you have the iconv function. */ +#undef HAVE_ICONV + /* Define if you have the isnand function. */ #undef HAVE_ISNAND @@ -96,6 +99,9 @@ /* Define if you have the header file. */ #undef HAVE_STDLIB_H +/* Define if you have the header file. */ +#undef HAVE_STRING_H + /* Define if you have the header file. */ #undef HAVE_SYS_DIR_H @@ -144,3 +150,6 @@ /* Version number of package */ #undef VERSION +/* Define if compiler has function prototypes */ +#undef PROTOTYPES + diff --git a/configure.in b/configure.in index 5b7c675a..61977a63 100644 --- a/configure.in +++ b/configure.in @@ -5,7 +5,7 @@ AM_CONFIG_HEADER(config.h) LIBXML_MAJOR_VERSION=2 LIBXML_MINOR_VERSION=2 -LIBXML_MICRO_VERSION=2 +LIBXML_MICRO_VERSION=3 LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION @@ -243,6 +243,7 @@ if test "$with_iconv" = "no" ; then echo Disabling ICONV support WITH_ICONV=0 else + AC_CHECK_FUNCS(iconv) if test "$have_iconv" != "" ; then echo Iconv support not found WITH_ICONV=0 diff --git a/debugXML.c b/debugXML.c index e0c41f9f..9ee32989 100644 --- a/debugXML.c +++ b/debugXML.c @@ -917,6 +917,9 @@ static int xmlLsCountNode(xmlNodePtr node) { break; case XML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif list = ((xmlDocPtr) node)->children; break; case XML_ATTRIBUTE_NODE: diff --git a/doc/Makefile.am b/doc/Makefile.am index e83c1a5e..dcb86ac8 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -51,6 +51,6 @@ install-data-local: -(cd $(DESTDIR); gtkdoc-fixxref --module=$(DOC_MODULE) --html-dir=$(HTML_DIR)) dist-hook: - (cd $(srcdir) ; tar cvf - xml.html FAQ.html encoding.html structure.gif DOM.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -) + (cd $(srcdir) ; tar cvf - *.html *.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -) .PHONY : html sgml templates scan diff --git a/libxml.spec.in b/libxml.spec.in index b28e1e74..82753d88 100644 --- a/libxml.spec.in +++ b/libxml.spec.in @@ -92,8 +92,8 @@ rm -rf $RPM_BUILD_ROOT %defattr(-, root, root) %doc AUTHORS ChangeLog NEWS README COPYING COPYING.LIB TODO -%doc /usr/man/man1/xmllint.1 -%doc /usr/man/man4/libxml.4 +%doc /usr/man/man1/xmllint.1* +%doc /usr/man/man4/libxml.4* %{prefix}/lib/lib*.so.* %{prefix}/bin/xmllint diff --git a/parser.c b/parser.c index 4b435104..6f97fb07 100644 --- a/parser.c +++ b/parser.c @@ -899,311 +899,6 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, } -/************************************************************************ - * * - * Commodity functions to handle encodings * - * * - ************************************************************************/ - -/** - * xmlSwitchEncoding: - * @ctxt: the parser context - * @enc: the encoding value (number) - * - * change the input functions when discovering the character encoding - * of a given entity. - * - * Returns 0 in case of success, -1 otherwise - */ -int -xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) -{ - xmlCharEncodingHandlerPtr handler; - - switch (enc) { - case XML_CHAR_ENCODING_ERROR: - ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "encoding unknown\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - break; - case XML_CHAR_ENCODING_NONE: - /* let's assume it's UTF-8 without the XML decl */ - ctxt->charset = XML_CHAR_ENCODING_UTF8; - return(0); - case XML_CHAR_ENCODING_UTF8: - /* default encoding, no conversion should be needed */ - ctxt->charset = XML_CHAR_ENCODING_UTF8; - return(0); - default: - break; - } - handler = xmlGetCharEncodingHandler(enc); - if (handler == NULL) { - /* - * Default handlers. - */ - switch (enc) { - case XML_CHAR_ENCODING_ERROR: - ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "encoding unknown\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - ctxt->charset = XML_CHAR_ENCODING_UTF8; - break; - case XML_CHAR_ENCODING_NONE: - /* let's assume it's UTF-8 without the XML decl */ - ctxt->charset = XML_CHAR_ENCODING_UTF8; - return(0); - case XML_CHAR_ENCODING_UTF8: - case XML_CHAR_ENCODING_ASCII: - /* default encoding, no conversion should be needed */ - ctxt->charset = XML_CHAR_ENCODING_UTF8; - return(0); - case XML_CHAR_ENCODING_UTF16LE: - break; - case XML_CHAR_ENCODING_UTF16BE: - break; - case XML_CHAR_ENCODING_UCS4LE: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding USC4 little endian not supported\n"); - break; - case XML_CHAR_ENCODING_UCS4BE: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding USC4 big endian not supported\n"); - break; - case XML_CHAR_ENCODING_EBCDIC: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding EBCDIC not supported\n"); - break; - case XML_CHAR_ENCODING_UCS4_2143: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding UCS4 2143 not supported\n"); - break; - case XML_CHAR_ENCODING_UCS4_3412: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding UCS4 3412 not supported\n"); - break; - case XML_CHAR_ENCODING_UCS2: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding UCS2 not supported\n"); - break; - case XML_CHAR_ENCODING_8859_1: - case XML_CHAR_ENCODING_8859_2: - case XML_CHAR_ENCODING_8859_3: - case XML_CHAR_ENCODING_8859_4: - case XML_CHAR_ENCODING_8859_5: - case XML_CHAR_ENCODING_8859_6: - case XML_CHAR_ENCODING_8859_7: - case XML_CHAR_ENCODING_8859_8: - case XML_CHAR_ENCODING_8859_9: - /* - * We used to keep the internal content in the - * document encoding however this turns being unmaintainable - * So xmlGetCharEncodingHandler() will return non-null - * values for this now. - */ - if ((ctxt->inputNr == 1) && - (ctxt->encoding == NULL) && - (ctxt->input->encoding != NULL)) { - ctxt->encoding = xmlStrdup(ctxt->input->encoding); - } - ctxt->charset = enc; - return(0); - case XML_CHAR_ENCODING_2022_JP: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding ISO-2022-JPnot supported\n"); - break; - case XML_CHAR_ENCODING_SHIFT_JIS: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding Shift_JIS not supported\n"); - break; - case XML_CHAR_ENCODING_EUC_JP: - ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "char encoding EUC-JPnot supported\n"); - break; - } - } - if (handler == NULL) - return(-1); - ctxt->charset = XML_CHAR_ENCODING_UTF8; - return(xmlSwitchToEncoding(ctxt, handler)); -} - -/** - * xmlSwitchToEncoding: - * @ctxt: the parser context - * @handler: the encoding handler - * - * change the input functions when discovering the character encoding - * of a given entity. - * - * Returns 0 in case of success, -1 otherwise - */ -int -xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) -{ - int nbchars; - - if (handler != NULL) { - if (ctxt->input != NULL) { - if (ctxt->input->buf != NULL) { - if (ctxt->input->buf->encoder != NULL) { - if (ctxt->input->buf->encoder == handler) - return(0); - /* - * Note: this is a bit dangerous, but that's what it - * takes to use nearly compatible signature for different - * encodings. - */ - xmlCharEncCloseFunc(ctxt->input->buf->encoder); - ctxt->input->buf->encoder = handler; - return(0); - } - ctxt->input->buf->encoder = handler; - - /* - * Is there already some content down the pipe to convert ? - */ - if ((ctxt->input->buf->buffer != NULL) && - (ctxt->input->buf->buffer->use > 0)) { - int processed; - - /* - * Specific handling of the Byte Order Mark for - * UTF-16 - */ - if ((handler->name != NULL) && - (!strcmp(handler->name, "UTF-16LE")) && - (ctxt->input->cur[0] == 0xFF) && - (ctxt->input->cur[1] == 0xFE)) { - ctxt->input->cur += 2; - } - if ((handler->name != NULL) && - (!strcmp(handler->name, "UTF-16BE")) && - (ctxt->input->cur[0] == 0xFE) && - (ctxt->input->cur[1] == 0xFF)) { - ctxt->input->cur += 2; - } - - /* - * Shring the current input buffer. - * Move it as the raw buffer and create a new input buffer - */ - processed = ctxt->input->cur - ctxt->input->base; - xmlBufferShrink(ctxt->input->buf->buffer, processed); - ctxt->input->buf->raw = ctxt->input->buf->buffer; - ctxt->input->buf->buffer = xmlBufferCreate(); - - if (ctxt->html) { - /* - * converst as much as possbile of the buffer - */ - nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, - ctxt->input->buf->buffer, - ctxt->input->buf->raw); - } else { - /* - * convert just enough to get - * '' - * parsed with the autodetected encoding - * into the parser reading buffer. - */ - nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder, - ctxt->input->buf->buffer, - ctxt->input->buf->raw); - } - if (nbchars < 0) { - fprintf(stderr, "xmlSwitchToEncoding: encoder error\n"); - return(-1); - } - ctxt->input->base = - ctxt->input->cur = ctxt->input->buf->buffer->content; - - } - return(0); - } else { - if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) { - /* - * When parsing a static memory array one must know the - * size to be able to convert the buffer. - */ - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "xmlSwitchEncoding : no input\n"); - return(-1); - } else { - int processed; - - /* - * Shring the current input buffer. - * Move it as the raw buffer and create a new input buffer - */ - processed = ctxt->input->cur - ctxt->input->base; - - ctxt->input->buf->raw = xmlBufferCreate(); - xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur, - ctxt->input->length - processed); - ctxt->input->buf->buffer = xmlBufferCreate(); - - /* - * convert as much as possible of the raw input - * to the parser reading buffer. - */ - nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, - ctxt->input->buf->buffer, - ctxt->input->buf->raw); - if (nbchars < 0) { - fprintf(stderr, "xmlSwitchToEncoding: encoder error\n"); - return(-1); - } - - /* - * Conversion succeeded, get rid of the old buffer - */ - if ((ctxt->input->free != NULL) && - (ctxt->input->base != NULL)) - ctxt->input->free((xmlChar *) ctxt->input->base); - ctxt->input->base = - ctxt->input->cur = ctxt->input->buf->buffer->content; - } - } - } else { - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "xmlSwitchEncoding : no input\n"); - return(-1); - } - /* - * The parsing is now done in UTF8 natively - */ - ctxt->charset = XML_CHAR_ENCODING_UTF8; - } else - return(-1); - return(0); - -} - /************************************************************************ * * * Commodity functions to handle xmlChars * diff --git a/parserInternals.c b/parserInternals.c index c20b90ad..de757aa4 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1494,6 +1494,311 @@ xmlCopyChar(int len, xmlChar *out, int val) { return(1); } +/************************************************************************ + * * + * Commodity functions to switch encodings * + * * + ************************************************************************/ + +/** + * xmlSwitchEncoding: + * @ctxt: the parser context + * @enc: the encoding value (number) + * + * change the input functions when discovering the character encoding + * of a given entity. + * + * Returns 0 in case of success, -1 otherwise + */ +int +xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) +{ + xmlCharEncodingHandlerPtr handler; + + switch (enc) { + case XML_CHAR_ENCODING_ERROR: + ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "encoding unknown\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + break; + case XML_CHAR_ENCODING_NONE: + /* let's assume it's UTF-8 without the XML decl */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(0); + case XML_CHAR_ENCODING_UTF8: + /* default encoding, no conversion should be needed */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(0); + default: + break; + } + handler = xmlGetCharEncodingHandler(enc); + if (handler == NULL) { + /* + * Default handlers. + */ + switch (enc) { + case XML_CHAR_ENCODING_ERROR: + ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "encoding unknown\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + ctxt->charset = XML_CHAR_ENCODING_UTF8; + break; + case XML_CHAR_ENCODING_NONE: + /* let's assume it's UTF-8 without the XML decl */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(0); + case XML_CHAR_ENCODING_UTF8: + case XML_CHAR_ENCODING_ASCII: + /* default encoding, no conversion should be needed */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(0); + case XML_CHAR_ENCODING_UTF16LE: + break; + case XML_CHAR_ENCODING_UTF16BE: + break; + case XML_CHAR_ENCODING_UCS4LE: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding USC4 little endian not supported\n"); + break; + case XML_CHAR_ENCODING_UCS4BE: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding USC4 big endian not supported\n"); + break; + case XML_CHAR_ENCODING_EBCDIC: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding EBCDIC not supported\n"); + break; + case XML_CHAR_ENCODING_UCS4_2143: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding UCS4 2143 not supported\n"); + break; + case XML_CHAR_ENCODING_UCS4_3412: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding UCS4 3412 not supported\n"); + break; + case XML_CHAR_ENCODING_UCS2: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding UCS2 not supported\n"); + break; + case XML_CHAR_ENCODING_8859_1: + case XML_CHAR_ENCODING_8859_2: + case XML_CHAR_ENCODING_8859_3: + case XML_CHAR_ENCODING_8859_4: + case XML_CHAR_ENCODING_8859_5: + case XML_CHAR_ENCODING_8859_6: + case XML_CHAR_ENCODING_8859_7: + case XML_CHAR_ENCODING_8859_8: + case XML_CHAR_ENCODING_8859_9: + /* + * We used to keep the internal content in the + * document encoding however this turns being unmaintainable + * So xmlGetCharEncodingHandler() will return non-null + * values for this now. + */ + if ((ctxt->inputNr == 1) && + (ctxt->encoding == NULL) && + (ctxt->input->encoding != NULL)) { + ctxt->encoding = xmlStrdup(ctxt->input->encoding); + } + ctxt->charset = enc; + return(0); + case XML_CHAR_ENCODING_2022_JP: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding ISO-2022-JPnot supported\n"); + break; + case XML_CHAR_ENCODING_SHIFT_JIS: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding Shift_JIS not supported\n"); + break; + case XML_CHAR_ENCODING_EUC_JP: + ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "char encoding EUC-JPnot supported\n"); + break; + } + } + if (handler == NULL) + return(-1); + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(xmlSwitchToEncoding(ctxt, handler)); +} + +/** + * xmlSwitchToEncoding: + * @ctxt: the parser context + * @handler: the encoding handler + * + * change the input functions when discovering the character encoding + * of a given entity. + * + * Returns 0 in case of success, -1 otherwise + */ +int +xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) +{ + int nbchars; + + if (handler != NULL) { + if (ctxt->input != NULL) { + if (ctxt->input->buf != NULL) { + if (ctxt->input->buf->encoder != NULL) { + if (ctxt->input->buf->encoder == handler) + return(0); + /* + * Note: this is a bit dangerous, but that's what it + * takes to use nearly compatible signature for different + * encodings. + */ + xmlCharEncCloseFunc(ctxt->input->buf->encoder); + ctxt->input->buf->encoder = handler; + return(0); + } + ctxt->input->buf->encoder = handler; + + /* + * Is there already some content down the pipe to convert ? + */ + if ((ctxt->input->buf->buffer != NULL) && + (ctxt->input->buf->buffer->use > 0)) { + int processed; + + /* + * Specific handling of the Byte Order Mark for + * UTF-16 + */ + if ((handler->name != NULL) && + (!strcmp(handler->name, "UTF-16LE")) && + (ctxt->input->cur[0] == 0xFF) && + (ctxt->input->cur[1] == 0xFE)) { + ctxt->input->cur += 2; + } + if ((handler->name != NULL) && + (!strcmp(handler->name, "UTF-16BE")) && + (ctxt->input->cur[0] == 0xFE) && + (ctxt->input->cur[1] == 0xFF)) { + ctxt->input->cur += 2; + } + + /* + * Shring the current input buffer. + * Move it as the raw buffer and create a new input buffer + */ + processed = ctxt->input->cur - ctxt->input->base; + xmlBufferShrink(ctxt->input->buf->buffer, processed); + ctxt->input->buf->raw = ctxt->input->buf->buffer; + ctxt->input->buf->buffer = xmlBufferCreate(); + + if (ctxt->html) { + /* + * converst as much as possbile of the buffer + */ + nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, + ctxt->input->buf->buffer, + ctxt->input->buf->raw); + } else { + /* + * convert just enough to get + * '' + * parsed with the autodetected encoding + * into the parser reading buffer. + */ + nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder, + ctxt->input->buf->buffer, + ctxt->input->buf->raw); + } + if (nbchars < 0) { + fprintf(stderr, "xmlSwitchToEncoding: encoder error\n"); + return(-1); + } + ctxt->input->base = + ctxt->input->cur = ctxt->input->buf->buffer->content; + + } + return(0); + } else { + if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) { + /* + * When parsing a static memory array one must know the + * size to be able to convert the buffer. + */ + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSwitchEncoding : no input\n"); + return(-1); + } else { + int processed; + + /* + * Shring the current input buffer. + * Move it as the raw buffer and create a new input buffer + */ + processed = ctxt->input->cur - ctxt->input->base; + + ctxt->input->buf->raw = xmlBufferCreate(); + xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur, + ctxt->input->length - processed); + ctxt->input->buf->buffer = xmlBufferCreate(); + + /* + * convert as much as possible of the raw input + * to the parser reading buffer. + */ + nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, + ctxt->input->buf->buffer, + ctxt->input->buf->raw); + if (nbchars < 0) { + fprintf(stderr, "xmlSwitchToEncoding: encoder error\n"); + return(-1); + } + + /* + * Conversion succeeded, get rid of the old buffer + */ + if ((ctxt->input->free != NULL) && + (ctxt->input->base != NULL)) + ctxt->input->free((xmlChar *) ctxt->input->base); + ctxt->input->base = + ctxt->input->cur = ctxt->input->buf->buffer->content; + } + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSwitchEncoding : no input\n"); + return(-1); + } + /* + * The parsing is now done in UTF8 natively + */ + ctxt->charset = XML_CHAR_ENCODING_UTF8; + } else + return(-1); + return(0); + +} + /************************************************************************ * * * Commodity functions to handle entities processing * @@ -1705,7 +2010,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { return(NULL); URI = xmlStrdup((xmlChar *) filename); - directory = xmlParserGetDirectory(URI); + directory = xmlParserGetDirectory((const char *) URI); inputStream = xmlNewInputStream(ctxt); if (inputStream == NULL) { @@ -1714,7 +2019,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { return(NULL); } - inputStream->filename = URI; + inputStream->filename = (const char *) URI; inputStream->directory = directory; inputStream->buf = buf; diff --git a/tree.c b/tree.c index 4366336a..51741348 100644 --- a/tree.c +++ b/tree.c @@ -2637,6 +2637,9 @@ xmlNodeSetLang(xmlNodePtr cur, const xmlChar *lang) { case XML_PI_NODE: case XML_ENTITY_REF_NODE: case XML_ENTITY_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif return; case XML_ELEMENT_NODE: case XML_ATTRIBUTE_NODE: @@ -2719,6 +2722,9 @@ xmlNodeSetName(xmlNodePtr cur, const xmlChar *name) { case XML_DOCUMENT_FRAG_NODE: case XML_NOTATION_NODE: case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif return; case XML_ELEMENT_NODE: case XML_ATTRIBUTE_NODE: @@ -2845,6 +2851,9 @@ xmlNodeGetContent(xmlNodePtr cur) { case XML_DOCUMENT_TYPE_NODE: case XML_NOTATION_NODE: case XML_DTD_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif return(NULL); case XML_ELEMENT_DECL: /* TODO !!! */ @@ -2930,6 +2939,9 @@ xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) { case XML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif break; case XML_NOTATION_NODE: break; @@ -3012,6 +3024,9 @@ xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) { case XML_DTD_NODE: case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif break; case XML_ELEMENT_DECL: /* TODO !!! */ @@ -3096,6 +3111,9 @@ xmlNodeAddContentLen(xmlNodePtr cur, const xmlChar *content, int len) { case XML_DTD_NODE: case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif break; case XML_ELEMENT_DECL: case XML_ATTRIBUTE_DECL: diff --git a/valid.c b/valid.c index 3e38e3a5..f2fc71d2 100644 --- a/valid.c +++ b/valid.c @@ -3561,6 +3561,9 @@ xmlSprintfElementChilds(char *buf, xmlNodePtr node, int glob) { break; case XML_ATTRIBUTE_NODE: case XML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_FRAG_NODE: diff --git a/xpath.c b/xpath.c index 433bb560..03b5392f 100644 --- a/xpath.c +++ b/xpath.c @@ -1504,6 +1504,9 @@ xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_FRAG_NODE: case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif return(((xmlDocPtr) ctxt->context->node)->children); case XML_ELEMENT_DECL: case XML_ATTRIBUTE_DECL: @@ -1632,6 +1635,9 @@ xmlXPathNextParent(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_FRAG_NODE: case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif return(NULL); } } @@ -1686,6 +1692,9 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_FRAG_NODE: case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif return(NULL); } return(NULL); @@ -1717,6 +1726,9 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_FRAG_NODE: case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif return(NULL); } return(NULL);