mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-08-07 06:43:02 +03:00
revamped the encoding support, added iconv support, so now libxml if
* encoding.[ch], xmlIO.[ch], parser.c, configure.in : revamped the encoding support, added iconv support, so now libxml if compiled with iconv automatically support japanese encodings among others. Work based on initial patch from Yuan-Chen Cheng I may have broken binary compat in the encoding handler registration scheme, but that was so utterly broken I don't expect anybody to have used this feature until now. * parserInternals.h: fixup on the CHAR range macro * xml-error.h, parser.c: catch URL/URI errors using the uri.c code. * tree.[ch]: added xmlBufferGrow(), was needed for iconv * uri.c: added xmlParseURI() I can't believe I forgot to implement this one in 2.0 !!! * SAX.c: moved doc->encoding update in the endDocument() call. * TODO: updated. Iconv rules :-) Daniel
This commit is contained in:
18
ChangeLog
18
ChangeLog
@@ -1,3 +1,21 @@
|
|||||||
|
Wed May 3 14:21:25 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||||
|
|
||||||
|
* encoding.[ch], xmlIO.[ch], parser.c, configure.in : revamped
|
||||||
|
the encoding support, added iconv support, so now libxml if
|
||||||
|
compiled with iconv automatically support japanese encodings
|
||||||
|
among others. Work based on initial patch from Yuan-Chen Cheng
|
||||||
|
I may have broken binary compat in the encoding handler
|
||||||
|
registration scheme, but that was so utterly broken I don't
|
||||||
|
expect anybody to have used this feature until now.
|
||||||
|
* parserInternals.h: fixup on the CHAR range macro
|
||||||
|
* xml-error.h, parser.c: catch URL/URI errors using the uri.c
|
||||||
|
code.
|
||||||
|
* tree.[ch]: added xmlBufferGrow(), was needed for iconv
|
||||||
|
* uri.c: added xmlParseURI() I can't believe I forgot to
|
||||||
|
implement this one in 2.0 !!!
|
||||||
|
* SAX.c: moved doc->encoding update in the endDocument() call.
|
||||||
|
* TODO: updated.
|
||||||
|
|
||||||
Mon Apr 24 13:30:13 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
Mon Apr 24 13:30:13 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||||
|
|
||||||
* tree.h: removed extraneous xmlRemoveProp definition
|
* tree.h: removed extraneous xmlRemoveProp definition
|
||||||
|
9
SAX.c
9
SAX.c
@@ -595,6 +595,15 @@ endDocument(void *ctx)
|
|||||||
if (ctxt->validate && ctxt->wellFormed &&
|
if (ctxt->validate && ctxt->wellFormed &&
|
||||||
ctxt->myDoc && ctxt->myDoc->intSubset)
|
ctxt->myDoc && ctxt->myDoc->intSubset)
|
||||||
ctxt->valid &= xmlValidateDocumentFinal(&ctxt->vctxt, ctxt->myDoc);
|
ctxt->valid &= xmlValidateDocumentFinal(&ctxt->vctxt, ctxt->myDoc);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Grab the encoding if it was added on-the-fly
|
||||||
|
*/
|
||||||
|
if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
|
||||||
|
(ctxt->myDoc->encoding == NULL)) {
|
||||||
|
ctxt->myDoc->encoding = ctxt->encoding;
|
||||||
|
ctxt->encoding = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
9
TODO
9
TODO
@@ -6,6 +6,8 @@
|
|||||||
TODO:
|
TODO:
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
- xmlSwitchToEncoding() need a rewrite for correct handling of conversion
|
||||||
|
error code conditions.
|
||||||
- DOM needs
|
- DOM needs
|
||||||
xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
|
xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
|
||||||
int xmlPruneProp(xmlNodePtr node, xmlAtttrPtr attr);
|
int xmlPruneProp(xmlNodePtr node, xmlAtttrPtr attr);
|
||||||
@@ -14,7 +16,6 @@ TODO:
|
|||||||
- add support for the trick from Henry conf/sun/valid/empty.xml
|
- add support for the trick from Henry conf/sun/valid/empty.xml
|
||||||
- Correct standalone checking/emitting (hard)
|
- Correct standalone checking/emitting (hard)
|
||||||
2.9 Standalone Document Declaration
|
2.9 Standalone Document Declaration
|
||||||
- URI checkings (no fragments) rfc2396.txt
|
|
||||||
- Better checking of external parsed entities TAG 1234
|
- Better checking of external parsed entities TAG 1234
|
||||||
- Find way of representing PERefs in the Dtd so that %entity; can
|
- Find way of representing PERefs in the Dtd so that %entity; can
|
||||||
be saved back.
|
be saved back.
|
||||||
@@ -22,6 +23,7 @@ TODO:
|
|||||||
http://www.w3.org/XML/xml-19980210-errata ... bummmer
|
http://www.w3.org/XML/xml-19980210-errata ... bummmer
|
||||||
- Handle undefined namespaces in entity contents better ... at least
|
- Handle undefined namespaces in entity contents better ... at least
|
||||||
issue a warning
|
issue a warning
|
||||||
|
- Issue warning when using non-absolute namespaces URI.
|
||||||
- General checking of DTD validation in presence of namespaces ... hairy
|
- General checking of DTD validation in presence of namespaces ... hairy
|
||||||
- fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
|
- fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
|
||||||
not WITHOUT_CORBA flag
|
not WITHOUT_CORBA flag
|
||||||
@@ -30,7 +32,7 @@ TODO:
|
|||||||
=====
|
=====
|
||||||
|
|
||||||
- Get OASIS testsuite to a more friendly result, check all the results
|
- Get OASIS testsuite to a more friendly result, check all the results
|
||||||
once stable.
|
once stable. Current state at:
|
||||||
http://xmlsoft.org/conf/result.html
|
http://xmlsoft.org/conf/result.html
|
||||||
|
|
||||||
- Optimization of tag strings allocation ?
|
- Optimization of tag strings allocation ?
|
||||||
@@ -55,11 +57,13 @@ EXTENSIONS:
|
|||||||
|
|
||||||
- Add Xlink recognition/API
|
- Add Xlink recognition/API
|
||||||
=> started adding an xlink.[ch] with a unified API for XML and HTML.
|
=> started adding an xlink.[ch] with a unified API for XML and HTML.
|
||||||
|
it's crap :-(
|
||||||
|
|
||||||
- Implement XSLT
|
- Implement XSLT
|
||||||
=> seems that someone volunteered ?!?
|
=> seems that someone volunteered ?!?
|
||||||
|
|
||||||
- Implement XSchemas
|
- Implement XSchemas
|
||||||
|
=> Really need to be done <grin/>
|
||||||
|
|
||||||
- O2K parsing;
|
- O2K parsing;
|
||||||
=> this is a somewhat ugly mix of HTML and XML, adding a specific
|
=> this is a somewhat ugly mix of HTML and XML, adding a specific
|
||||||
@@ -88,6 +92,7 @@ EXTENSIONS:
|
|||||||
Done:
|
Done:
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
- URI checkings (no fragments) rfc2396.txt
|
||||||
- Added a clean mechanism for overload or added input methods:
|
- Added a clean mechanism for overload or added input methods:
|
||||||
xmlRegisterInputCallbacks()
|
xmlRegisterInputCallbacks()
|
||||||
- dynamically adapt the alloc entry point to use g_alloc()/g_free()
|
- dynamically adapt the alloc entry point to use g_alloc()/g_free()
|
||||||
|
16
configure.in
16
configure.in
@@ -4,7 +4,7 @@ AC_INIT(entities.h)
|
|||||||
AM_CONFIG_HEADER(config.h)
|
AM_CONFIG_HEADER(config.h)
|
||||||
|
|
||||||
LIBXML_MAJOR_VERSION=2
|
LIBXML_MAJOR_VERSION=2
|
||||||
LIBXML_MINOR_VERSION=0
|
LIBXML_MINOR_VERSION=1
|
||||||
LIBXML_MICRO_VERSION=0
|
LIBXML_MICRO_VERSION=0
|
||||||
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
|
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
|
||||||
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
|
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
|
||||||
@@ -203,6 +203,20 @@ fi
|
|||||||
AC_SUBST(WITH_XPATH)
|
AC_SUBST(WITH_XPATH)
|
||||||
AC_SUBST(XPATH_OBJ)
|
AC_SUBST(XPATH_OBJ)
|
||||||
|
|
||||||
|
AC_ARG_WITH(iconv, [ --with-iconv Add the ICONV support (on)])
|
||||||
|
if test "$with_iconv" = "no" ; then
|
||||||
|
echo Disabling ICONV support
|
||||||
|
WITH_ICONV=0
|
||||||
|
else
|
||||||
|
if test "$have_iconv" != "" ; then
|
||||||
|
echo Iconv support not found
|
||||||
|
WITH_ICONV=0
|
||||||
|
else
|
||||||
|
WITH_ICONV=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
AC_SUBST(WITH_ICONV)
|
||||||
|
|
||||||
AC_ARG_WITH(debug, [ --with-debug Add the debugging module (on)])
|
AC_ARG_WITH(debug, [ --with-debug Add the debugging module (on)])
|
||||||
if test "$with_debug" = "no" ; then
|
if test "$with_debug" = "no" ; then
|
||||||
echo Disabling DEBUG support
|
echo Disabling DEBUG support
|
||||||
|
728
encoding.c
728
encoding.c
File diff suppressed because it is too large
Load Diff
47
encoding.h
47
encoding.h
@@ -22,12 +22,30 @@
|
|||||||
#define __XML_CHAR_ENCODING_H__
|
#define __XML_CHAR_ENCODING_H__
|
||||||
|
|
||||||
#include <libxml/xmlversion.h>
|
#include <libxml/xmlversion.h>
|
||||||
|
#ifdef LIBXML_ICONV_ENABLED
|
||||||
|
#include <iconv.h>
|
||||||
|
#endif
|
||||||
|
#include <libxml/tree.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Predefined values for some standard encodings
|
* Predefined values for some standard encodings
|
||||||
|
* Libxml don't do beforehand translation on UTF8, ISOLatinX
|
||||||
|
* It also support UTF16 (LE and BE) by default.
|
||||||
|
*
|
||||||
|
* Anything else would have to be translated to UTF8 before being
|
||||||
|
* given to the parser itself. The BOM for UTF16 and the encoding
|
||||||
|
* declaration are looked at and a converter is looked for at that
|
||||||
|
* point. If not found the parser stops here as asked by the XML REC
|
||||||
|
* Converter can be registered by the user using xmlRegisterCharEncodingHandler
|
||||||
|
* but the currentl form doesn't allow stateful transcoding (a serious
|
||||||
|
* problem agreed !). If iconv has been found it will be used
|
||||||
|
* automatically and allow stateful transcoding, the simplest is then
|
||||||
|
* to be sure to enable icon and to provide iconv libs for the encoding
|
||||||
|
* support needed.
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
|
XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
|
||||||
@@ -65,9 +83,13 @@ typedef enum {
|
|||||||
* Take a block of chars in the original encoding and try to convert
|
* Take a block of chars in the original encoding and try to convert
|
||||||
* it to an UTF-8 block of chars out.
|
* it to an UTF-8 block of chars out.
|
||||||
*
|
*
|
||||||
* Returns the number of byte written, or -1 by lack of space.
|
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||||
|
* if the transcoding failed.
|
||||||
|
* The value of @inlen after return is the number of octets consumed
|
||||||
|
* as the return value is positive, else unpredictiable.
|
||||||
|
* The value of @outlen after return is the number of ocetes consumed.
|
||||||
*/
|
*/
|
||||||
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
|
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen);
|
const unsigned char* in, int *inlen);
|
||||||
|
|
||||||
|
|
||||||
@@ -83,12 +105,17 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
|
|||||||
*
|
*
|
||||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||||
* if the transcoding failed.
|
* if the transcoding failed.
|
||||||
|
* The value of @inlen after return is the number of octets consumed
|
||||||
|
* as the return value is positive, else unpredictiable.
|
||||||
|
* The value of @outlen after return is the number of ocetes consumed.
|
||||||
*/
|
*/
|
||||||
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
|
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen);
|
const unsigned char* in, int *inlen);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block defining the handlers for non UTF-8 encodings.
|
* Block defining the handlers for non UTF-8 encodings.
|
||||||
|
* If iconv is supported, there is two extra fields
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
|
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
|
||||||
@@ -96,7 +123,11 @@ typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
|
|||||||
struct _xmlCharEncodingHandler {
|
struct _xmlCharEncodingHandler {
|
||||||
char *name;
|
char *name;
|
||||||
xmlCharEncodingInputFunc input;
|
xmlCharEncodingInputFunc input;
|
||||||
xmlCharEncodingOutputFunc output;
|
xmlCharEncodingOutputFunc output;
|
||||||
|
#ifdef LIBXML_ICONV_ENABLED
|
||||||
|
iconv_t iconv_in;
|
||||||
|
iconv_t iconv_out;
|
||||||
|
#endif /* LIBXML_ICONV_ENABLED */
|
||||||
};
|
};
|
||||||
|
|
||||||
void xmlInitCharEncodingHandlers (void);
|
void xmlInitCharEncodingHandlers (void);
|
||||||
@@ -109,6 +140,14 @@ xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
|
|||||||
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
|
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
|
||||||
int xmlCheckUTF8 (const unsigned char *utf);
|
int xmlCheckUTF8 (const unsigned char *utf);
|
||||||
|
|
||||||
|
int xmlCharEncOutFunc (xmlCharEncodingHandler *handler,
|
||||||
|
xmlBufferPtr out,
|
||||||
|
xmlBufferPtr in);
|
||||||
|
|
||||||
|
int xmlCharEncInFunc (xmlCharEncodingHandler *handler,
|
||||||
|
xmlBufferPtr out,
|
||||||
|
xmlBufferPtr in);
|
||||||
|
int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@@ -22,12 +22,30 @@
|
|||||||
#define __XML_CHAR_ENCODING_H__
|
#define __XML_CHAR_ENCODING_H__
|
||||||
|
|
||||||
#include <libxml/xmlversion.h>
|
#include <libxml/xmlversion.h>
|
||||||
|
#ifdef LIBXML_ICONV_ENABLED
|
||||||
|
#include <iconv.h>
|
||||||
|
#endif
|
||||||
|
#include <libxml/tree.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Predefined values for some standard encodings
|
* Predefined values for some standard encodings
|
||||||
|
* Libxml don't do beforehand translation on UTF8, ISOLatinX
|
||||||
|
* It also support UTF16 (LE and BE) by default.
|
||||||
|
*
|
||||||
|
* Anything else would have to be translated to UTF8 before being
|
||||||
|
* given to the parser itself. The BOM for UTF16 and the encoding
|
||||||
|
* declaration are looked at and a converter is looked for at that
|
||||||
|
* point. If not found the parser stops here as asked by the XML REC
|
||||||
|
* Converter can be registered by the user using xmlRegisterCharEncodingHandler
|
||||||
|
* but the currentl form doesn't allow stateful transcoding (a serious
|
||||||
|
* problem agreed !). If iconv has been found it will be used
|
||||||
|
* automatically and allow stateful transcoding, the simplest is then
|
||||||
|
* to be sure to enable icon and to provide iconv libs for the encoding
|
||||||
|
* support needed.
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
|
XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
|
||||||
@@ -65,9 +83,13 @@ typedef enum {
|
|||||||
* Take a block of chars in the original encoding and try to convert
|
* Take a block of chars in the original encoding and try to convert
|
||||||
* it to an UTF-8 block of chars out.
|
* it to an UTF-8 block of chars out.
|
||||||
*
|
*
|
||||||
* Returns the number of byte written, or -1 by lack of space.
|
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||||
|
* if the transcoding failed.
|
||||||
|
* The value of @inlen after return is the number of octets consumed
|
||||||
|
* as the return value is positive, else unpredictiable.
|
||||||
|
* The value of @outlen after return is the number of ocetes consumed.
|
||||||
*/
|
*/
|
||||||
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
|
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen);
|
const unsigned char* in, int *inlen);
|
||||||
|
|
||||||
|
|
||||||
@@ -83,12 +105,17 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
|
|||||||
*
|
*
|
||||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||||
* if the transcoding failed.
|
* if the transcoding failed.
|
||||||
|
* The value of @inlen after return is the number of octets consumed
|
||||||
|
* as the return value is positive, else unpredictiable.
|
||||||
|
* The value of @outlen after return is the number of ocetes consumed.
|
||||||
*/
|
*/
|
||||||
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
|
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen);
|
const unsigned char* in, int *inlen);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Block defining the handlers for non UTF-8 encodings.
|
* Block defining the handlers for non UTF-8 encodings.
|
||||||
|
* If iconv is supported, there is two extra fields
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
|
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
|
||||||
@@ -96,7 +123,11 @@ typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
|
|||||||
struct _xmlCharEncodingHandler {
|
struct _xmlCharEncodingHandler {
|
||||||
char *name;
|
char *name;
|
||||||
xmlCharEncodingInputFunc input;
|
xmlCharEncodingInputFunc input;
|
||||||
xmlCharEncodingOutputFunc output;
|
xmlCharEncodingOutputFunc output;
|
||||||
|
#ifdef LIBXML_ICONV_ENABLED
|
||||||
|
iconv_t iconv_in;
|
||||||
|
iconv_t iconv_out;
|
||||||
|
#endif /* LIBXML_ICONV_ENABLED */
|
||||||
};
|
};
|
||||||
|
|
||||||
void xmlInitCharEncodingHandlers (void);
|
void xmlInitCharEncodingHandlers (void);
|
||||||
@@ -109,6 +140,14 @@ xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
|
|||||||
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
|
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
|
||||||
int xmlCheckUTF8 (const unsigned char *utf);
|
int xmlCheckUTF8 (const unsigned char *utf);
|
||||||
|
|
||||||
|
int xmlCharEncOutFunc (xmlCharEncodingHandler *handler,
|
||||||
|
xmlBufferPtr out,
|
||||||
|
xmlBufferPtr in);
|
||||||
|
|
||||||
|
int xmlCharEncInFunc (xmlCharEncodingHandler *handler,
|
||||||
|
xmlBufferPtr out,
|
||||||
|
xmlBufferPtr in);
|
||||||
|
int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@@ -28,10 +28,10 @@ extern "C" {
|
|||||||
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
||||||
*/
|
*/
|
||||||
#define IS_CHAR(c) \
|
#define IS_CHAR(c) \
|
||||||
((((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
(((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
||||||
(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
|
(((c) >= 0x20) && ((c) <= 0xD7FF)) || \
|
||||||
(((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
|
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
|
||||||
((c) <= 0x10FFFF))
|
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
||||||
@@ -442,8 +442,10 @@ xmlParserCtxtPtr xmlNewParserCtxt (void);
|
|||||||
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
|
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
|
||||||
const xmlChar *ID,
|
const xmlChar *ID,
|
||||||
const xmlChar *base);
|
const xmlChar *base);
|
||||||
void xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
|
int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
|
||||||
xmlCharEncoding enc);
|
xmlCharEncoding enc);
|
||||||
|
int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
|
||||||
|
xmlCharEncodingHandlerPtr handler);
|
||||||
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
|
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -380,6 +380,8 @@ void xmlBufferCCat (xmlBufferPtr buf,
|
|||||||
const char *str);
|
const char *str);
|
||||||
int xmlBufferShrink (xmlBufferPtr buf,
|
int xmlBufferShrink (xmlBufferPtr buf,
|
||||||
int len);
|
int len);
|
||||||
|
int xmlBufferGrow (xmlBufferPtr buf,
|
||||||
|
int len);
|
||||||
void xmlBufferEmpty (xmlBufferPtr buf);
|
void xmlBufferEmpty (xmlBufferPtr buf);
|
||||||
const xmlChar* xmlBufferContent (const xmlBufferPtr buf);
|
const xmlChar* xmlBufferContent (const xmlBufferPtr buf);
|
||||||
int xmlBufferUse (const xmlBufferPtr buf);
|
int xmlBufferUse (const xmlBufferPtr buf);
|
||||||
|
@@ -33,6 +33,7 @@ struct _xmlParserInputBuffer {
|
|||||||
xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
|
xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
|
||||||
|
|
||||||
xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */
|
xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */
|
||||||
|
xmlBufferPtr raw; /* if encoder != NULL buffer for raw input */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
600
parser.c
600
parser.c
@@ -41,6 +41,7 @@
|
|||||||
#include <libxml/valid.h>
|
#include <libxml/valid.h>
|
||||||
#include <libxml/parserInternals.h>
|
#include <libxml/parserInternals.h>
|
||||||
#include <libxml/xmlIO.h>
|
#include <libxml/xmlIO.h>
|
||||||
|
#include <libxml/uri.h>
|
||||||
#include "xml-error.h"
|
#include "xml-error.h"
|
||||||
|
|
||||||
#define XML_PARSER_BIG_BUFFER_SIZE 1000
|
#define XML_PARSER_BIG_BUFFER_SIZE 1000
|
||||||
@@ -483,7 +484,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
|
|||||||
if ((ctxt->sax != NULL) &&
|
if ((ctxt->sax != NULL) &&
|
||||||
(ctxt->sax->error != NULL))
|
(ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
"Char out of allowed range\n");
|
"Char 0x%X out of allowed range\n", val);
|
||||||
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
||||||
ctxt->wellFormed = 0;
|
ctxt->wellFormed = 0;
|
||||||
ctxt->disableSAX = 1;
|
ctxt->disableSAX = 1;
|
||||||
@@ -612,7 +613,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
|
|||||||
if ((ctxt->sax != NULL) &&
|
if ((ctxt->sax != NULL) &&
|
||||||
(ctxt->sax->error != NULL))
|
(ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
"Char out of allowed range\n");
|
"Char 0x%X out of allowed range\n", val);
|
||||||
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
||||||
ctxt->wellFormed = 0;
|
ctxt->wellFormed = 0;
|
||||||
ctxt->disableSAX = 1;
|
ctxt->disableSAX = 1;
|
||||||
@@ -727,7 +728,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
|
|||||||
if ((ctxt->sax != NULL) &&
|
if ((ctxt->sax != NULL) &&
|
||||||
(ctxt->sax->error != NULL))
|
(ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
"Char out of allowed range\n");
|
"Char 0x%X out of allowed range\n", val);
|
||||||
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
||||||
ctxt->wellFormed = 0;
|
ctxt->wellFormed = 0;
|
||||||
ctxt->disableSAX = 1;
|
ctxt->disableSAX = 1;
|
||||||
@@ -2278,96 +2279,209 @@ xmlCheckLanguageID(const xmlChar *lang) {
|
|||||||
*
|
*
|
||||||
* change the input functions when discovering the character encoding
|
* change the input functions when discovering the character encoding
|
||||||
* of a given entity.
|
* of a given entity.
|
||||||
|
*
|
||||||
|
* Returns 0 in case of success, -1 otherwise
|
||||||
*/
|
*/
|
||||||
void
|
int
|
||||||
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||||
{
|
{
|
||||||
xmlCharEncodingHandlerPtr handler;
|
xmlCharEncodingHandlerPtr handler;
|
||||||
|
|
||||||
|
switch (enc) {
|
||||||
|
case XML_CHAR_ENCODING_ERROR:
|
||||||
|
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||||
|
ctxt->wellFormed = 0;
|
||||||
|
ctxt->disableSAX = 1;
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_NONE:
|
||||||
|
/* let's assume it's UTF-8 without the XML decl */
|
||||||
|
return(0);
|
||||||
|
case XML_CHAR_ENCODING_UTF8:
|
||||||
|
/* default encoding, no conversion should be needed */
|
||||||
|
return(0);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
handler = xmlGetCharEncodingHandler(enc);
|
handler = xmlGetCharEncodingHandler(enc);
|
||||||
|
if (handler == NULL) {
|
||||||
|
/*
|
||||||
|
* Default handlers.
|
||||||
|
*/
|
||||||
|
switch (enc) {
|
||||||
|
case XML_CHAR_ENCODING_ERROR:
|
||||||
|
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||||
|
ctxt->wellFormed = 0;
|
||||||
|
ctxt->disableSAX = 1;
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_NONE:
|
||||||
|
/* let's assume it's UTF-8 without the XML decl */
|
||||||
|
return(0);
|
||||||
|
case XML_CHAR_ENCODING_UTF8:
|
||||||
|
/* default encoding, no conversion should be needed */
|
||||||
|
return(0);
|
||||||
|
case XML_CHAR_ENCODING_UTF16LE:
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_UTF16BE:
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_UCS4LE:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding USC4 little endian not supported\n");
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_UCS4BE:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding USC4 big endian not supported\n");
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_EBCDIC:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding EBCDIC not supported\n");
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_UCS4_2143:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding UCS4 2143 not supported\n");
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_UCS4_3412:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding UCS4 3412 not supported\n");
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_UCS2:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding UCS2 not supported\n");
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_8859_1:
|
||||||
|
case XML_CHAR_ENCODING_8859_2:
|
||||||
|
case XML_CHAR_ENCODING_8859_3:
|
||||||
|
case XML_CHAR_ENCODING_8859_4:
|
||||||
|
case XML_CHAR_ENCODING_8859_5:
|
||||||
|
case XML_CHAR_ENCODING_8859_6:
|
||||||
|
case XML_CHAR_ENCODING_8859_7:
|
||||||
|
case XML_CHAR_ENCODING_8859_8:
|
||||||
|
case XML_CHAR_ENCODING_8859_9:
|
||||||
|
/*
|
||||||
|
* Keep the internal content in the document encoding
|
||||||
|
*/
|
||||||
|
if ((ctxt->inputNr == 1) &&
|
||||||
|
(ctxt->encoding == NULL) &&
|
||||||
|
(ctxt->input->encoding != NULL)) {
|
||||||
|
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
case XML_CHAR_ENCODING_2022_JP:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding ISO-2022-JPnot supported\n");
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_SHIFT_JIS:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding Shift_JIS not supported\n");
|
||||||
|
break;
|
||||||
|
case XML_CHAR_ENCODING_EUC_JP:
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"char encoding EUC-JPnot supported\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (handler == NULL)
|
||||||
|
return(-1);
|
||||||
|
return(xmlSwitchToEncoding(ctxt, handler));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xmlSwitchToEncoding:
|
||||||
|
* @ctxt: the parser context
|
||||||
|
* @handler: the encoding handler
|
||||||
|
*
|
||||||
|
* change the input functions when discovering the character encoding
|
||||||
|
* of a given entity.
|
||||||
|
*
|
||||||
|
* Returns 0 in case of success, -1 otherwise
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
||||||
|
{
|
||||||
|
int nbchars;
|
||||||
|
|
||||||
if (handler != NULL) {
|
if (handler != NULL) {
|
||||||
if (ctxt->input != NULL) {
|
if (ctxt->input != NULL) {
|
||||||
if (ctxt->input->buf != NULL) {
|
if (ctxt->input->buf != NULL) {
|
||||||
if (ctxt->input->buf->encoder != NULL) {
|
if (ctxt->input->buf->encoder != NULL) {
|
||||||
|
if (ctxt->input->buf->encoder == handler)
|
||||||
|
return(0);
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
"xmlSwitchEncoding : encoder already regitered\n");
|
"xmlSwitchEncoding : encoder already regitered\n");
|
||||||
return;
|
return(-1);
|
||||||
}
|
}
|
||||||
ctxt->input->buf->encoder = handler;
|
ctxt->input->buf->encoder = handler;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is there already some content down the pipe to convert
|
* Is there already some content down the pipe to convert ?
|
||||||
*/
|
*/
|
||||||
if ((ctxt->input->buf->buffer != NULL) &&
|
if ((ctxt->input->buf->buffer != NULL) &&
|
||||||
(ctxt->input->buf->buffer->use > 0)) {
|
(ctxt->input->buf->buffer->use > 0)) {
|
||||||
xmlChar *buf;
|
|
||||||
int res, len, size;
|
|
||||||
int processed;
|
int processed;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Specific handling of the Byte Order Mark for
|
* Specific handling of the Byte Order Mark for
|
||||||
* UTF-16
|
* UTF-16
|
||||||
*/
|
*/
|
||||||
if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
|
if ((handler->name != NULL) &&
|
||||||
|
(!strcmp(handler->name, "UTF-16LE")) &&
|
||||||
(ctxt->input->cur[0] == 0xFF) &&
|
(ctxt->input->cur[0] == 0xFF) &&
|
||||||
(ctxt->input->cur[1] == 0xFE)) {
|
(ctxt->input->cur[1] == 0xFE)) {
|
||||||
SKIP(2);
|
ctxt->input->cur += 2;
|
||||||
}
|
}
|
||||||
if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
|
if ((handler->name != NULL) &&
|
||||||
|
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||||
(ctxt->input->cur[0] == 0xFE) &&
|
(ctxt->input->cur[0] == 0xFE) &&
|
||||||
(ctxt->input->cur[1] == 0xFF)) {
|
(ctxt->input->cur[1] == 0xFF)) {
|
||||||
SKIP(2);
|
ctxt->input->cur += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* convert the non processed part
|
* Shring the current input buffer.
|
||||||
|
* Move it as the raw buffer and create a new input buffer
|
||||||
*/
|
*/
|
||||||
processed = ctxt->input->cur - ctxt->input->base;
|
processed = ctxt->input->cur - ctxt->input->base;
|
||||||
len = ctxt->input->buf->buffer->use - processed;
|
xmlBufferShrink(ctxt->input->buf->buffer, processed);
|
||||||
|
ctxt->input->buf->raw = ctxt->input->buf->buffer;
|
||||||
if (len <= 0) {
|
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||||
return;
|
|
||||||
}
|
|
||||||
size = ctxt->input->buf->buffer->use * 4;
|
|
||||||
if (size < 4000)
|
|
||||||
size = 4000;
|
|
||||||
retry_larger:
|
|
||||||
buf = (xmlChar *) xmlMalloc(size + 1);
|
|
||||||
if (buf == NULL) {
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"xmlSwitchEncoding : out of memory\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* TODO !!! Handling of buf too small */
|
|
||||||
res = handler->input(buf, size, ctxt->input->cur, &len);
|
|
||||||
if (res == -1) {
|
|
||||||
size *= 2;
|
|
||||||
xmlFree(buf);
|
|
||||||
goto retry_larger;
|
|
||||||
}
|
|
||||||
if ((res < 0) ||
|
|
||||||
(len != ctxt->input->buf->buffer->use - processed)) {
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"xmlSwitchEncoding : conversion failed\n");
|
|
||||||
xmlFree(buf);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Conversion succeeded, get rid of the old buffer
|
* convert as much as possible of the raw input
|
||||||
|
* to the parser reading buffer.
|
||||||
*/
|
*/
|
||||||
xmlFree(ctxt->input->buf->buffer->content);
|
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||||
ctxt->input->buf->buffer->content = buf;
|
ctxt->input->buf->buffer,
|
||||||
ctxt->input->base = buf;
|
ctxt->input->buf->raw);
|
||||||
ctxt->input->cur = buf;
|
if (nbchars < 0) {
|
||||||
ctxt->input->buf->buffer->size = size;
|
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||||
ctxt->input->buf->buffer->use = res;
|
return(-1);
|
||||||
buf[res] = 0;
|
}
|
||||||
|
ctxt->input->base =
|
||||||
|
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||||
}
|
}
|
||||||
return;
|
return(0);
|
||||||
} else {
|
} else {
|
||||||
if (ctxt->input->length == 0) {
|
if (ctxt->input->length == 0) {
|
||||||
/*
|
/*
|
||||||
@@ -2377,191 +2491,59 @@ retry_larger:
|
|||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
"xmlSwitchEncoding : no input\n");
|
"xmlSwitchEncoding : no input\n");
|
||||||
return;
|
return(-1);
|
||||||
} else {
|
} else {
|
||||||
xmlChar *buf;
|
int processed;
|
||||||
int res, len;
|
|
||||||
int processed = ctxt->input->cur - ctxt->input->base;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* convert the non processed part
|
* Shring the current input buffer.
|
||||||
|
* Move it as the raw buffer and create a new input buffer
|
||||||
*/
|
*/
|
||||||
len = ctxt->input->length - processed;
|
processed = ctxt->input->cur - ctxt->input->base;
|
||||||
if (len <= 0) {
|
ctxt->input->buf->raw = xmlBufferCreate();
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->input->length - processed);
|
||||||
"xmlSwitchEncoding : input fully consumed?\n");
|
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||||
return;
|
|
||||||
}
|
/*
|
||||||
buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
|
* convert as much as possible of the raw input
|
||||||
if (buf == NULL) {
|
* to the parser reading buffer.
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
*/
|
||||||
ctxt->sax->error(ctxt->userData,
|
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||||
"xmlSwitchEncoding : out of memory\n");
|
ctxt->input->buf->buffer,
|
||||||
return;
|
ctxt->input->buf->raw);
|
||||||
}
|
if (nbchars < 0) {
|
||||||
res = handler->input(buf, ctxt->input->length * 4,
|
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||||
ctxt->input->cur, &len);
|
return(-1);
|
||||||
if ((res < 0) ||
|
|
||||||
(len != ctxt->input->length - processed)) {
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"xmlSwitchEncoding : conversion failed\n");
|
|
||||||
xmlFree(buf);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Conversion succeeded, get rid of the old buffer
|
* Conversion succeeded, get rid of the old buffer
|
||||||
*/
|
*/
|
||||||
if ((ctxt->input->free != NULL) &&
|
if ((ctxt->input->free != NULL) &&
|
||||||
(ctxt->input->base != NULL))
|
(ctxt->input->base != NULL))
|
||||||
ctxt->input->free((xmlChar *) ctxt->input->base);
|
ctxt->input->free((xmlChar *) ctxt->input->base);
|
||||||
ctxt->input->base = ctxt->input->cur = buf;
|
ctxt->input->base =
|
||||||
ctxt->input->length = res;
|
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
"xmlSwitchEncoding : no input\n");
|
"xmlSwitchEncoding : no input\n");
|
||||||
|
return(-1);
|
||||||
}
|
}
|
||||||
}
|
/*
|
||||||
|
* The parsing is now done in UTF8 natively
|
||||||
|
*/
|
||||||
|
if (ctxt->encoding != NULL) {
|
||||||
|
xmlFree((xmlChar *) ctxt->encoding);
|
||||||
|
ctxt->encoding = NULL;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
return(-1);
|
||||||
|
return(0);
|
||||||
|
|
||||||
switch (enc) {
|
|
||||||
case XML_CHAR_ENCODING_ERROR:
|
|
||||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
|
||||||
ctxt->wellFormed = 0;
|
|
||||||
ctxt->disableSAX = 1;
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_NONE:
|
|
||||||
/* let's assume it's UTF-8 without the XML decl */
|
|
||||||
return;
|
|
||||||
case XML_CHAR_ENCODING_UTF8:
|
|
||||||
/* default encoding, no conversion should be needed */
|
|
||||||
return;
|
|
||||||
case XML_CHAR_ENCODING_UTF16LE:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding UTF16 little endian not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_UTF16BE:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding UTF16 big endian not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_UCS4LE:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding USC4 little endian not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_UCS4BE:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding USC4 big endian not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_EBCDIC:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding EBCDIC not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_UCS4_2143:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding UCS4 2143 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_UCS4_3412:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding UCS4 3412 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_UCS2:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding UCS2 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_1:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_1 ISO Latin 1 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_2:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_2 ISO Latin 2 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_3:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_3 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_4:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_4 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_5:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_5 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_6:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_6 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_7:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_7 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_8:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_8 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_8859_9:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO_8859_9 not supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_2022_JP:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding ISO-2022-JPnot supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_SHIFT_JIS:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding Shift_JISnot supported\n");
|
|
||||||
break;
|
|
||||||
case XML_CHAR_ENCODING_EUC_JP:
|
|
||||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
|
||||||
ctxt->sax->error(ctxt->userData,
|
|
||||||
"char encoding EUC-JPnot supported\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
@@ -4253,7 +4235,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
|
|||||||
void
|
void
|
||||||
xmlParseComment(xmlParserCtxtPtr ctxt) {
|
xmlParseComment(xmlParserCtxtPtr ctxt) {
|
||||||
xmlChar *buf = NULL;
|
xmlChar *buf = NULL;
|
||||||
int len = 0;
|
int len;
|
||||||
int size = XML_PARSER_BUFFER_SIZE;
|
int size = XML_PARSER_BUFFER_SIZE;
|
||||||
int q, ql;
|
int q, ql;
|
||||||
int r, rl;
|
int r, rl;
|
||||||
@@ -4282,10 +4264,11 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
|
|||||||
r = CUR_CHAR(rl);
|
r = CUR_CHAR(rl);
|
||||||
NEXTL(rl);
|
NEXTL(rl);
|
||||||
cur = CUR_CHAR(l);
|
cur = CUR_CHAR(l);
|
||||||
|
len = 0;
|
||||||
while (IS_CHAR(cur) &&
|
while (IS_CHAR(cur) &&
|
||||||
((cur != '>') ||
|
((cur != '>') ||
|
||||||
(r != '-') || (q != '-'))) {
|
(r != '-') || (q != '-'))) {
|
||||||
if ((r == '-') && (q == '-')) {
|
if ((r == '-') && (q == '-') && (len > 1)) {
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
"Comment must not contain '--' (double-hyphen)`\n");
|
"Comment must not contain '--' (double-hyphen)`\n");
|
||||||
@@ -4732,11 +4715,36 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
|
|||||||
ctxt->disableSAX = 1;
|
ctxt->disableSAX = 1;
|
||||||
}
|
}
|
||||||
if (URI) {
|
if (URI) {
|
||||||
if ((ctxt->sax != NULL) &&
|
xmlURIPtr uri;
|
||||||
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
|
|
||||||
ctxt->sax->entityDecl(ctxt->userData, name,
|
uri = xmlParseURI((const char *) URI);
|
||||||
XML_EXTERNAL_PARAMETER_ENTITY,
|
if (uri == NULL) {
|
||||||
literal, URI, NULL);
|
if ((ctxt->sax != NULL) &&
|
||||||
|
(!ctxt->disableSAX) &&
|
||||||
|
(ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"Invalid URI: %s\n", URI);
|
||||||
|
ctxt->wellFormed = 0;
|
||||||
|
ctxt->errNo = XML_ERR_INVALID_URI;
|
||||||
|
} else {
|
||||||
|
if (uri->fragment != NULL) {
|
||||||
|
if ((ctxt->sax != NULL) &&
|
||||||
|
(!ctxt->disableSAX) &&
|
||||||
|
(ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"Fragment not allowed: %s\n", URI);
|
||||||
|
ctxt->wellFormed = 0;
|
||||||
|
ctxt->errNo = XML_ERR_URI_FRAGMENT;
|
||||||
|
} else {
|
||||||
|
if ((ctxt->sax != NULL) &&
|
||||||
|
(!ctxt->disableSAX) &&
|
||||||
|
(ctxt->sax->entityDecl != NULL))
|
||||||
|
ctxt->sax->entityDecl(ctxt->userData, name,
|
||||||
|
XML_EXTERNAL_PARAMETER_ENTITY,
|
||||||
|
literal, URI, NULL);
|
||||||
|
}
|
||||||
|
xmlFreeURI(uri);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -4757,6 +4765,31 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
|
|||||||
ctxt->wellFormed = 0;
|
ctxt->wellFormed = 0;
|
||||||
ctxt->disableSAX = 1;
|
ctxt->disableSAX = 1;
|
||||||
}
|
}
|
||||||
|
if (URI) {
|
||||||
|
xmlURIPtr uri;
|
||||||
|
|
||||||
|
uri = xmlParseURI((const char *)URI);
|
||||||
|
if (uri == NULL) {
|
||||||
|
if ((ctxt->sax != NULL) &&
|
||||||
|
(!ctxt->disableSAX) &&
|
||||||
|
(ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"Invalid URI: %s\n", URI);
|
||||||
|
ctxt->wellFormed = 0;
|
||||||
|
ctxt->errNo = XML_ERR_INVALID_URI;
|
||||||
|
} else {
|
||||||
|
if (uri->fragment != NULL) {
|
||||||
|
if ((ctxt->sax != NULL) &&
|
||||||
|
(!ctxt->disableSAX) &&
|
||||||
|
(ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"Fragment not allowed: %s\n", URI);
|
||||||
|
ctxt->wellFormed = 0;
|
||||||
|
ctxt->errNo = XML_ERR_URI_FRAGMENT;
|
||||||
|
}
|
||||||
|
xmlFreeURI(uri);
|
||||||
|
}
|
||||||
|
}
|
||||||
if ((RAW != '>') && (!IS_BLANK(CUR))) {
|
if ((RAW != '>') && (!IS_BLANK(CUR))) {
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
@@ -5973,7 +6006,20 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
|
|||||||
/*
|
/*
|
||||||
* We know that '<?xml' is here.
|
* We know that '<?xml' is here.
|
||||||
*/
|
*/
|
||||||
SKIP(5);
|
if ((RAW == '<') && (NXT(1) == '?') &&
|
||||||
|
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||||
|
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||||
|
SKIP(5);
|
||||||
|
} else {
|
||||||
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
|
ctxt->sax->error(ctxt->userData,
|
||||||
|
"Text declaration '<?xml' required\n");
|
||||||
|
ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
|
||||||
|
ctxt->wellFormed = 0;
|
||||||
|
ctxt->disableSAX = 1;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!IS_BLANK(CUR)) {
|
if (!IS_BLANK(CUR)) {
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
@@ -6003,7 +6049,13 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
|
|||||||
ctxt->wellFormed = 0;
|
ctxt->wellFormed = 0;
|
||||||
ctxt->disableSAX = 1;
|
ctxt->disableSAX = 1;
|
||||||
}
|
}
|
||||||
ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
|
xmlParseEncodingDecl(ctxt);
|
||||||
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||||
|
/*
|
||||||
|
* The XML REC instructs us to stop parsing right here
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
SKIP_BLANKS;
|
SKIP_BLANKS;
|
||||||
if ((RAW == '?') && (NXT(1) == '>')) {
|
if ((RAW == '?') && (NXT(1) == '>')) {
|
||||||
@@ -6192,6 +6244,13 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
|
|||||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||||
(NXT(4) == 'l')) {
|
(NXT(4) == 'l')) {
|
||||||
xmlParseTextDecl(ctxt);
|
xmlParseTextDecl(ctxt);
|
||||||
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||||
|
/*
|
||||||
|
* The XML REC instructs us to stop parsing right here
|
||||||
|
*/
|
||||||
|
ctxt->instate = XML_PARSER_EOF;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (ctxt->myDoc == NULL) {
|
if (ctxt->myDoc == NULL) {
|
||||||
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
|
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
|
||||||
@@ -6441,6 +6500,13 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
|
|||||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||||
xmlParseTextDecl(ctxt);
|
xmlParseTextDecl(ctxt);
|
||||||
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||||
|
/*
|
||||||
|
* The XML REC instructs us to stop parsing right here
|
||||||
|
*/
|
||||||
|
ctxt->instate = XML_PARSER_EOF;
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (input->standalone) {
|
if (input->standalone) {
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||||
ctxt->sax->error(ctxt->userData,
|
ctxt->sax->error(ctxt->userData,
|
||||||
@@ -6947,6 +7013,15 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) {
|
|||||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||||
xmlParseTextDecl(ctxt);
|
xmlParseTextDecl(ctxt);
|
||||||
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||||
|
/*
|
||||||
|
* The XML REC instructs us to stop parsing
|
||||||
|
* right here
|
||||||
|
*/
|
||||||
|
ctxt->instate = XML_PARSER_EOF;
|
||||||
|
xmlFree(name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (ctxt->token == 0)
|
if (ctxt->token == 0)
|
||||||
ctxt->token = ' ';
|
ctxt->token = ' ';
|
||||||
@@ -8197,6 +8272,38 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
|
|||||||
ctxt->disableSAX = 1;
|
ctxt->disableSAX = 1;
|
||||||
ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
|
ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
|
||||||
}
|
}
|
||||||
|
if (encoding != NULL) {
|
||||||
|
xmlCharEncoding enc;
|
||||||
|
xmlCharEncodingHandlerPtr handler;
|
||||||
|
|
||||||
|
if (ctxt->input->encoding != NULL)
|
||||||
|
xmlFree((xmlChar *) ctxt->input->encoding);
|
||||||
|
ctxt->input->encoding = encoding;
|
||||||
|
|
||||||
|
enc = xmlParseCharEncoding((const char *) encoding);
|
||||||
|
/*
|
||||||
|
* registered set of known encodings
|
||||||
|
*/
|
||||||
|
if (enc != XML_CHAR_ENCODING_ERROR) {
|
||||||
|
xmlSwitchEncoding(ctxt, enc);
|
||||||
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||||
|
xmlFree(encoding);
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* fallback for unknown encodings
|
||||||
|
*/
|
||||||
|
handler = xmlFindCharEncodingHandler((const char *) encoding);
|
||||||
|
if (handler != NULL) {
|
||||||
|
xmlSwitchToEncoding(ctxt, handler);
|
||||||
|
} else {
|
||||||
|
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||||
|
xmlFree(encoding);
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return(encoding);
|
return(encoding);
|
||||||
}
|
}
|
||||||
@@ -8362,7 +8469,13 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
|
|||||||
ctxt->wellFormed = 0;
|
ctxt->wellFormed = 0;
|
||||||
ctxt->disableSAX = 1;
|
ctxt->disableSAX = 1;
|
||||||
}
|
}
|
||||||
ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
|
xmlParseEncodingDecl(ctxt);
|
||||||
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||||
|
/*
|
||||||
|
* The XML REC instructs us to stop parsing right here
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We may have the standalone status.
|
* We may have the standalone status.
|
||||||
@@ -8489,12 +8602,19 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
|||||||
if ((RAW == '<') && (NXT(1) == '?') &&
|
if ((RAW == '<') && (NXT(1) == '?') &&
|
||||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that we will switch encoding on the fly.
|
||||||
|
*/
|
||||||
xmlParseXMLDecl(ctxt);
|
xmlParseXMLDecl(ctxt);
|
||||||
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||||
|
/*
|
||||||
|
* The XML REC instructs us to stop parsing right here
|
||||||
|
*/
|
||||||
|
return(-1);
|
||||||
|
}
|
||||||
ctxt->standalone = ctxt->input->standalone;
|
ctxt->standalone = ctxt->input->standalone;
|
||||||
SKIP_BLANKS;
|
SKIP_BLANKS;
|
||||||
if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
|
|
||||||
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
|
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
|
||||||
}
|
}
|
||||||
@@ -8581,14 +8701,6 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
|||||||
(!ctxt->disableSAX))
|
(!ctxt->disableSAX))
|
||||||
ctxt->sax->endDocument(ctxt->userData);
|
ctxt->sax->endDocument(ctxt->userData);
|
||||||
|
|
||||||
/*
|
|
||||||
* Grab the encoding if it was added on-the-fly
|
|
||||||
*/
|
|
||||||
if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
|
|
||||||
(ctxt->myDoc->encoding == NULL)) {
|
|
||||||
ctxt->myDoc->encoding = ctxt->encoding;
|
|
||||||
ctxt->encoding = NULL;
|
|
||||||
}
|
|
||||||
if (! ctxt->wellFormed) return(-1);
|
if (! ctxt->wellFormed) return(-1);
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
@@ -8805,6 +8917,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
fprintf(stderr, "PP: Parsing XML Decl\n");
|
fprintf(stderr, "PP: Parsing XML Decl\n");
|
||||||
#endif
|
#endif
|
||||||
xmlParseXMLDecl(ctxt);
|
xmlParseXMLDecl(ctxt);
|
||||||
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||||
|
/*
|
||||||
|
* The XML REC instructs us to stop parsing right
|
||||||
|
* here
|
||||||
|
*/
|
||||||
|
ctxt->instate = XML_PARSER_EOF;
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
ctxt->standalone = ctxt->input->standalone;
|
ctxt->standalone = ctxt->input->standalone;
|
||||||
if ((ctxt->encoding == NULL) &&
|
if ((ctxt->encoding == NULL) &&
|
||||||
(ctxt->input->encoding != NULL))
|
(ctxt->input->encoding != NULL))
|
||||||
|
@@ -28,10 +28,10 @@ extern "C" {
|
|||||||
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
||||||
*/
|
*/
|
||||||
#define IS_CHAR(c) \
|
#define IS_CHAR(c) \
|
||||||
((((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
(((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
||||||
(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
|
(((c) >= 0x20) && ((c) <= 0xD7FF)) || \
|
||||||
(((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
|
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
|
||||||
((c) <= 0x10FFFF))
|
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
||||||
@@ -442,8 +442,10 @@ xmlParserCtxtPtr xmlNewParserCtxt (void);
|
|||||||
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
|
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
|
||||||
const xmlChar *ID,
|
const xmlChar *ID,
|
||||||
const xmlChar *base);
|
const xmlChar *base);
|
||||||
void xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
|
int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
|
||||||
xmlCharEncoding enc);
|
xmlCharEncoding enc);
|
||||||
|
int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
|
||||||
|
xmlCharEncodingHandlerPtr handler);
|
||||||
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
|
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
25
tree.c
25
tree.c
@@ -3771,6 +3771,31 @@ xmlBufferShrink(xmlBufferPtr buf, int len) {
|
|||||||
return(len);
|
return(len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xmlBufferGrow:
|
||||||
|
* @buf: the buffer
|
||||||
|
* @len: the minimum free sie to allocate
|
||||||
|
*
|
||||||
|
* Grow the available space of an XML buffer.
|
||||||
|
*
|
||||||
|
* Returns the new available space or -1 in case of error
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xmlBufferGrow(xmlBufferPtr buf, int len) {
|
||||||
|
int size;
|
||||||
|
xmlChar *newbuf;
|
||||||
|
|
||||||
|
if (len <= buf->use) return(0);
|
||||||
|
|
||||||
|
size = buf->size + buf->use + len + 100;
|
||||||
|
|
||||||
|
newbuf = xmlRealloc(buf->content, size);
|
||||||
|
if (newbuf == NULL) return(-1);
|
||||||
|
buf->content = newbuf;
|
||||||
|
buf->size = size;
|
||||||
|
return(buf->size - buf->use);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlBufferDump:
|
* xmlBufferDump:
|
||||||
* @file: the file output
|
* @file: the file output
|
||||||
|
2
tree.h
2
tree.h
@@ -380,6 +380,8 @@ void xmlBufferCCat (xmlBufferPtr buf,
|
|||||||
const char *str);
|
const char *str);
|
||||||
int xmlBufferShrink (xmlBufferPtr buf,
|
int xmlBufferShrink (xmlBufferPtr buf,
|
||||||
int len);
|
int len);
|
||||||
|
int xmlBufferGrow (xmlBufferPtr buf,
|
||||||
|
int len);
|
||||||
void xmlBufferEmpty (xmlBufferPtr buf);
|
void xmlBufferEmpty (xmlBufferPtr buf);
|
||||||
const xmlChar* xmlBufferContent (const xmlBufferPtr buf);
|
const xmlChar* xmlBufferContent (const xmlBufferPtr buf);
|
||||||
int xmlBufferUse (const xmlBufferPtr buf);
|
int xmlBufferUse (const xmlBufferPtr buf);
|
||||||
|
28
uri.c
28
uri.c
@@ -1283,6 +1283,34 @@ xmlParseURIReference(xmlURIPtr uri, const char *str) {
|
|||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xmlParseURI:
|
||||||
|
* @str: the URI string to analyze
|
||||||
|
*
|
||||||
|
* Parse an URI
|
||||||
|
*
|
||||||
|
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
|
||||||
|
*
|
||||||
|
* Returns a newly build xmlURIPtr or NULL in case of error
|
||||||
|
*/
|
||||||
|
xmlURIPtr
|
||||||
|
xmlParseURI(const char *str) {
|
||||||
|
xmlURIPtr uri;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (str == NULL)
|
||||||
|
return(NULL);
|
||||||
|
uri = xmlCreateURI();
|
||||||
|
if (uri != NULL) {
|
||||||
|
ret = xmlParseURIReference(uri, str);
|
||||||
|
if (ret) {
|
||||||
|
xmlFreeURI(uri);
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(uri);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlNormalizeURIPath:
|
* xmlNormalizeURIPath:
|
||||||
* @path: pointer to the path string
|
* @path: pointer to the path string
|
||||||
|
@@ -130,7 +130,9 @@ typedef enum {
|
|||||||
XML_ERR_ENTITY_CHAR_ERROR, /* 88 */
|
XML_ERR_ENTITY_CHAR_ERROR, /* 88 */
|
||||||
XML_ERR_ENTITY_PE_INTERNAL, /* 88 */
|
XML_ERR_ENTITY_PE_INTERNAL, /* 88 */
|
||||||
XML_ERR_ENTITY_LOOP, /* 89 */
|
XML_ERR_ENTITY_LOOP, /* 89 */
|
||||||
XML_ERR_ENTITY_BOUNDARY /* 90 */
|
XML_ERR_ENTITY_BOUNDARY, /* 90 */
|
||||||
|
XML_ERR_INVALID_URI, /* 91 */
|
||||||
|
XML_ERR_URI_FRAGMENT /* 92 */
|
||||||
}xmlParserErrors;
|
}xmlParserErrors;
|
||||||
|
|
||||||
void xmlParserError (void *ctx,
|
void xmlParserError (void *ctx,
|
||||||
|
89
xmlIO.c
89
xmlIO.c
@@ -498,6 +498,10 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) {
|
|||||||
}
|
}
|
||||||
ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
|
ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
|
||||||
ret->encoder = xmlGetCharEncodingHandler(enc);
|
ret->encoder = xmlGetCharEncodingHandler(enc);
|
||||||
|
if (ret->encoder != NULL)
|
||||||
|
ret->raw = xmlBufferCreate();
|
||||||
|
else
|
||||||
|
ret->raw = NULL;
|
||||||
ret->readcallback = NULL;
|
ret->readcallback = NULL;
|
||||||
ret->closecallback = NULL;
|
ret->closecallback = NULL;
|
||||||
ret->context = NULL;
|
ret->context = NULL;
|
||||||
@@ -513,13 +517,20 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) {
|
|||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
|
xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
|
||||||
if (in->buffer != NULL) {
|
if (in->raw) {
|
||||||
xmlBufferFree(in->buffer);
|
xmlBufferFree(in->raw);
|
||||||
in->buffer = NULL;
|
in->raw = NULL;
|
||||||
|
}
|
||||||
|
if (in->encoder != NULL) {
|
||||||
|
xmlCharEncCloseFunc(in->encoder);
|
||||||
}
|
}
|
||||||
if (in->closecallback != NULL) {
|
if (in->closecallback != NULL) {
|
||||||
in->closecallback(in->context);
|
in->closecallback(in->context);
|
||||||
}
|
}
|
||||||
|
if (in->buffer != NULL) {
|
||||||
|
xmlBufferFree(in->buffer);
|
||||||
|
in->buffer = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
|
memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
|
||||||
xmlFree(in);
|
xmlFree(in);
|
||||||
@@ -683,34 +694,22 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
|
|||||||
|
|
||||||
if (len < 0) return(0);
|
if (len < 0) return(0);
|
||||||
if (in->encoder != NULL) {
|
if (in->encoder != NULL) {
|
||||||
xmlChar *buffer;
|
/*
|
||||||
int processed = len;
|
* Store the data in the incoming raw buffer
|
||||||
|
|
||||||
buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
|
|
||||||
if (buffer == NULL) {
|
|
||||||
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
|
|
||||||
return(-1);
|
|
||||||
}
|
|
||||||
nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
|
|
||||||
(xmlChar *) buf, &processed);
|
|
||||||
/*
|
|
||||||
* TODO : we really need to have something atomic or the
|
|
||||||
* encoder must report the number of bytes read
|
|
||||||
*/
|
*/
|
||||||
|
if (in->raw == NULL) {
|
||||||
|
in->raw = xmlBufferCreate();
|
||||||
|
}
|
||||||
|
xmlBufferAdd(in->raw, (const xmlChar *) buf, len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* convert as much as possible to the parser reading buffer.
|
||||||
|
*/
|
||||||
|
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
|
||||||
if (nbchars < 0) {
|
if (nbchars < 0) {
|
||||||
fprintf(stderr, "xmlParserInputBufferPush: encoder error\n");
|
fprintf(stderr, "xmlParserInputBufferPush: encoder error\n");
|
||||||
xmlFree(buffer);
|
|
||||||
return(-1);
|
return(-1);
|
||||||
}
|
}
|
||||||
if (processed != len) {
|
|
||||||
fprintf(stderr,
|
|
||||||
"TODO xmlParserInputBufferPush: processed != len\n");
|
|
||||||
xmlFree(buffer);
|
|
||||||
return(-1);
|
|
||||||
}
|
|
||||||
buffer[nbchars] = 0;
|
|
||||||
xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
|
|
||||||
xmlFree(buffer);
|
|
||||||
} else {
|
} else {
|
||||||
nbchars = len;
|
nbchars = len;
|
||||||
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
|
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
|
||||||
@@ -730,7 +729,9 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
|
|||||||
* Grow up the content of the input buffer, the old data are preserved
|
* Grow up the content of the input buffer, the old data are preserved
|
||||||
* This routine handle the I18N transcoding to internal UTF-8
|
* This routine handle the I18N transcoding to internal UTF-8
|
||||||
* This routine is used when operating the parser in normal (pull) mode
|
* This routine is used when operating the parser in normal (pull) mode
|
||||||
* TODO: one should be able to remove one extra copy
|
*
|
||||||
|
* TODO: one should be able to remove one extra copy by copying directy
|
||||||
|
* onto in->buffer or in->raw
|
||||||
*
|
*
|
||||||
* Returns the number of chars read and stored in the buffer, or -1
|
* Returns the number of chars read and stored in the buffer, or -1
|
||||||
* in case of error.
|
* in case of error.
|
||||||
@@ -779,34 +780,22 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
|
|||||||
return(-1);
|
return(-1);
|
||||||
}
|
}
|
||||||
if (in->encoder != NULL) {
|
if (in->encoder != NULL) {
|
||||||
xmlChar *buf;
|
/*
|
||||||
int wrote = res;
|
* Store the data in the incoming raw buffer
|
||||||
|
*/
|
||||||
buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
|
if (in->raw == NULL) {
|
||||||
if (buf == NULL) {
|
in->raw = xmlBufferCreate();
|
||||||
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
|
|
||||||
xmlFree(buffer);
|
|
||||||
return(-1);
|
|
||||||
}
|
}
|
||||||
nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
|
xmlBufferAdd(in->raw, (const xmlChar *) buffer, len);
|
||||||
BAD_CAST buffer, &wrote);
|
|
||||||
buf[nbchars] = 0;
|
|
||||||
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
|
|
||||||
xmlFree(buf);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that the encoder was able to process the full input
|
* convert as much as possible to the parser reading buffer.
|
||||||
*/
|
*/
|
||||||
if (wrote != res) {
|
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
|
||||||
fprintf(stderr,
|
if (nbchars < 0) {
|
||||||
"TODO : xmlParserInputBufferGrow wrote %d != res %d\n",
|
fprintf(stderr, "xmlParserInputBufferGrow: encoder error\n");
|
||||||
wrote, res);
|
return(-1);
|
||||||
/*
|
|
||||||
* TODO !!!
|
|
||||||
* Need to keep the unprocessed input in a buffer in->unprocessed
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
nbchars = res;
|
nbchars = res;
|
||||||
buffer[nbchars] = 0;
|
buffer[nbchars] = 0;
|
||||||
|
1
xmlIO.h
1
xmlIO.h
@@ -33,6 +33,7 @@ struct _xmlParserInputBuffer {
|
|||||||
xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
|
xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
|
||||||
|
|
||||||
xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */
|
xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */
|
||||||
|
xmlBufferPtr raw; /* if encoder != NULL buffer for raw input */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user