![]() | ![]() | ![]() | Gnome XML Library Reference Manual | ![]() |
---|
encoding —
enum xmlCharEncoding; int (*xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, unsigned char *in, int *inlen); int (*xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, unsigned char *in, int *inlen); struct xmlCharEncodingHandler; typedef xmlCharEncodingHandlerPtr; void xmlInitCharEncodingHandlers (void); void xmlCleanupCharEncodingHandlers (void); void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler (xmlCharEncoding enc); xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler (const char *name); xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler (const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output); int xmlAddEncodingAlias (const char *name, const char *alias); int xmlDelEncodingAlias (const char *alias); const char* xmlGetEncodingAlias (const char *alias); void xmlCleanupEncodingAliases (void); xmlCharEncoding xmlParseCharEncoding (const char *name); const char* xmlGetCharEncodingName (xmlCharEncoding enc); xmlCharEncoding xmlDetectCharEncoding (unsigned char *in, int len); int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in); int xmlCharEncInFunc (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in); int xmlCharEncFirstLine (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in); int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); int UTF8Toisolat1 (unsigned char *out, int *outlen, unsigned char *in, int *inlen); int isolat1ToUTF8 (unsigned char *out, int *outlen, unsigned char *in, int *inlen); int xmlGetUTF8Char (unsigned char *utf, int *len); int xmlCheckUTF8 (unsigned char *utf); int xmlUTF8Strsize (const xmlChar *utf, int len); xmlChar* xmlUTF8Strndup (const xmlChar *utf, int len); xmlChar* xmlUTF8Strpos (const xmlChar *utf, int pos); int xmlUTF8Strloc (const xmlChar *utf, const xmlChar *utfchar); xmlChar* xmlUTF8Strsub (const xmlChar *utf, int start, int len); int xmlUTF8Strlen (const xmlChar *utf);
typedef enum { XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ } xmlCharEncoding;
Predefined values for some standard encodings. Libxml don't do beforehand translation on UTF8, ISOLatinX. It also support UTF16 (LE and BE) by default.
Anything else would have to be translated to UTF8 before being given to the parser itself. The BOM for UTF16 and the encoding declaration are looked at and a converter is looked for at that point. If not found the parser stops here as asked by the XML REC Converter can be registered by the user using xmlRegisterCharEncodingHandler but the current form doesn't allow stateful transcoding (a serious problem agreed !). If iconv has been found it will be used automatically and allow stateful transcoding, the simplest is then to be sure to enable icon and to provide iconv libs for the encoding support needed.
int (*xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, unsigned char *in, int *inlen);
Take a block of chars in the original encoding and try to convert it to an UTF-8 block of chars out.
out : | |
outlen : | |
in : | |
inlen : | |
Returns : |
int (*xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, unsigned char *in, int *inlen);
Take a block of UTF-8 chars in and try to convert it to an other encoding. Note: a first call designed to produce heading info is called with in = NULL. If stateful this should also initialize the encoder state.
out : | |
outlen : | |
in : | |
inlen : | |
Returns : |
struct xmlCharEncodingHandler { char *name; xmlCharEncodingInputFunc input; xmlCharEncodingOutputFunc output; #ifdef LIBXML_ICONV_ENABLED iconv_t iconv_in; iconv_t iconv_out; #endif /* LIBXML_ICONV_ENABLED */ };
void xmlInitCharEncodingHandlers (void);
Initialize the char encoding support, it registers the default encoding supported. NOTE: while public, this function usually doesn't need to be called in normal processing.
void xmlCleanupCharEncodingHandlers (void);
Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases.
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
Register the char encoding handler, surprising, isn't it ?
handler : |
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler (xmlCharEncoding enc);
Search in the registered set the handler able to read/write that encoding.
enc : | |
Returns : |
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler (const char *name);
Search in the registered set the handler able to read/write that encoding.
name : | |
Returns : |
xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler (const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output);
Create and registers an xmlCharEncodingHandler.
name : | |
input : | |
output : | |
Returns : |
int xmlAddEncodingAlias (const char *name, const char *alias);
Registers and alias alias for an encoding named name. Existing alias will be overwritten.
name : | |
alias : | |
Returns : |
int xmlDelEncodingAlias (const char *alias);
Unregisters an encoding alias alias
alias : | |
Returns : |
const char* xmlGetEncodingAlias (const char *alias);
Lookup an encoding name for the given alias.
alias : | |
Returns : |
xmlCharEncoding xmlParseCharEncoding (const char *name);
Compare the string to the known encoding schemes already known. Note that the comparison is case insensitive accordingly to the section [XML] 4.3.3 Character Encoding in Entities.
name : | |
Returns : |
const char* xmlGetCharEncodingName (xmlCharEncoding enc);
The "canonical" name for XML encoding.
C.f. http://www.w3.org/TR/REC-xml
enc : | |
Returns : |
xmlCharEncoding xmlDetectCharEncoding (unsigned char *in, int len);
Guess the encoding of the entity using the first bytes of the entity content accordingly of the non-normative appendix F of the XML-1.0 recommendation.
in : | |
len : | |
Returns : |
int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in);
Generic front-end for the encoding handler output function a first call with in == NULL has to be made firs to initiate the output in case of non-stateless encoding needing to initiate their state or the output (like the BOM in UTF16). In case of UTF8 sequence conversion errors for the given encoder, the content will be automatically remapped to a CharRef sequence.
handler : | |
out : | |
in : | |
Returns : |
int xmlCharEncInFunc (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in);
Generic front-end for the encoding handler input function
handler : | |
out : | |
in : | |
Returns : |
int xmlCharEncFirstLine (xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in);
Front-end for the encoding handler input function, but handle only the very first line, i.e. limit itself to 45 chars.
handler : | |
out : | |
in : | |
Returns : |
int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler);
Generic front-end for encoding handler close function
handler : | |
Returns : |
int UTF8Toisolat1 (unsigned char *out, int *outlen, unsigned char *in, int *inlen);
Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 block of chars out.
out : | |
outlen : | |
in : | |
inlen : | |
Returns : |
int isolat1ToUTF8 (unsigned char *out, int *outlen, unsigned char *in, int *inlen);
Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out.
out : | |
outlen : | |
in : | |
inlen : | |
Returns : |
int xmlGetUTF8Char (unsigned char *utf, int *len);
Read one UTF8 Char from utf
utf : | |
len : | |
Returns : |
int xmlCheckUTF8 (unsigned char *utf);
Checks utf for being valid utf-8. utf is assumed to be null-terminated. This function is not super-strict, as it will allow longer utf-8 sequences than necessary. Note that Java is capable of producing these sequences if provoked. Also note, this routine checks for the 4-byte maximum size, but does not check for 0x10ffff maximum value.
utf : | |
Returns : |
int xmlUTF8Strsize (const xmlChar *utf, int len);
storage size of an UTF8 string
utf : | |
len : | |
Returns : |
xmlChar* xmlUTF8Strndup (const xmlChar *utf, int len);
a strndup for array of UTF8's
utf : | |
len : | |
Returns : |
xmlChar* xmlUTF8Strpos (const xmlChar *utf, int pos);
a function to provide the equivalent of fetching a character from a string array
utf : | |
pos : | |
Returns : |
int xmlUTF8Strloc (const xmlChar *utf, const xmlChar *utfchar);
a function to provide relative location of a UTF8 char
utf : | |
utfchar : | |
Returns : |
xmlChar* xmlUTF8Strsub (const xmlChar *utf, int start, int len);
Note: positions are given in units of UTF-8 chars
utf : | |
start : | |
len : | |
Returns : |
int xmlUTF8Strlen (const xmlChar *utf);
compute the length of an UTF8 string, it doesn't do a full UTF8 checking of the content of the string.
utf : | |
Returns : |
<< parserInternals | hash >> |