Table of Contentsint xmlDelEncodingAlias (const char * alias)
const char * xmlGetEncodingAlias (const char * alias)
int xmlUTF8Strloc (const xmlChar * utf, const xmlChar * utfchar)
void xmlCleanupCharEncodingHandlers (void)
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler (const char * name)
int xmlCharEncCloseFunc (xmlCharEncodingHandler * handler)
int xmlUTF8Size (const xmlChar * utf)
xmlCharEncoding xmlParseCharEncoding (const char * name)
xmlChar * xmlUTF8Strpos (const xmlChar * utf, int pos)
Structure xmlCharEncodingHandler struct _xmlCharEncodingHandler {
char * name
xmlCharEncodingInputFunc input
xmlCharEncodingOutputFunc output
iconv_t iconv_in
iconv_t iconv_out
}
const char * xmlGetCharEncodingName (xmlCharEncoding enc)
int xmlCheckUTF8 (const unsigned char * utf)
int UTF8Toisolat1 (unsigned char * out, int * outlen, const unsigned char * in, int * inlen)
int xmlCharEncInFunc (xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferPtr in)
int xmlAddEncodingAlias (const char * name, const char * alias)
Typedef xmlCharEncodingHandler * xmlCharEncodingHandlerPtr
int xmlCharEncOutFunc (xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferPtr in)
Function type: xmlCharEncodingInputFunc
int xmlCharEncodingInputFunc (unsigned char * out, int * outlen, const unsigned char * in, int * inlen)
Take a block of chars in the original encoding and try to convert it to an UTF-8 block of chars out. out: | a pointer to an array of bytes to store the UTF-8 result | outlen: | the length of @out | in: | a pointer to an array of chars in the original encoding | inlen: | the length of @in | Returns: | the number of byte written, or -1 by lack of space, or -2 if the transcoding failed. The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictiable. The value of @outlen after return is the number of octets consumed. |
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler (xmlCharEncoding enc)
xmlChar * xmlUTF8Strsub (const xmlChar * utf, int start, int len)
int xmlGetUTF8Char (const unsigned char * utf, int * len)
int isolat1ToUTF8 (unsigned char * out, int * outlen, const unsigned char * in, int * inlen)
xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler (const char * name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output)
void xmlInitCharEncodingHandlers (void)
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler)
int xmlCharEncFirstLine (xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferPtr in)
Enum xmlCharEncoding {
XML_CHAR_ENCODING_ERROR = -1 : No char encoding detected
XML_CHAR_ENCODING_NONE = 0 : No char encoding detected
XML_CHAR_ENCODING_UTF8 = 1 : UTF-8
XML_CHAR_ENCODING_UTF16LE = 2 : UTF-16 little endian
XML_CHAR_ENCODING_UTF16BE = 3 : UTF-16 big endian
XML_CHAR_ENCODING_UCS4LE = 4 : UCS-4 little endian
XML_CHAR_ENCODING_UCS4BE = 5 : UCS-4 big endian
XML_CHAR_ENCODING_EBCDIC = 6 : EBCDIC uh!
XML_CHAR_ENCODING_UCS4_2143 = 7 : UCS-4 unusual ordering
XML_CHAR_ENCODING_UCS4_3412 = 8 : UCS-4 unusual ordering
XML_CHAR_ENCODING_UCS2 = 9 : UCS-2
XML_CHAR_ENCODING_8859_1 = 10 : ISO-8859-1 ISO Latin 1
XML_CHAR_ENCODING_8859_2 = 11 : ISO-8859-2 ISO Latin 2
XML_CHAR_ENCODING_8859_3 = 12 : ISO-8859-3
XML_CHAR_ENCODING_8859_4 = 13 : ISO-8859-4
XML_CHAR_ENCODING_8859_5 = 14 : ISO-8859-5
XML_CHAR_ENCODING_8859_6 = 15 : ISO-8859-6
XML_CHAR_ENCODING_8859_7 = 16 : ISO-8859-7
XML_CHAR_ENCODING_8859_8 = 17 : ISO-8859-8
XML_CHAR_ENCODING_8859_9 = 18 : ISO-8859-9
XML_CHAR_ENCODING_2022_JP = 19 : ISO-2022-JP
XML_CHAR_ENCODING_SHIFT_JIS = 20 : Shift_JIS
XML_CHAR_ENCODING_EUC_JP = 21 : EUC-JP
XML_CHAR_ENCODING_ASCII = 22 : pure ASCII
}
int xmlUTF8Strsize (const xmlChar * utf, int len)
int xmlUTF8Charcmp (const xmlChar * utf1, const xmlChar * utf2)
int xmlUTF8Strlen (const xmlChar * utf)
Function type: xmlCharEncodingOutputFunc
int xmlCharEncodingOutputFunc (unsigned char * out, int * outlen, const unsigned char * in, int * inlen)
Take a block of UTF-8 chars in and try to convert it to an other encoding. Note: a first call designed to produce heading info is called with in = NULL. If stateful this should also initialize the encoder state. out: | a pointer to an array of bytes to store the result | outlen: | the length of @out | in: | a pointer to an array of UTF-8 chars | inlen: | the length of @in | Returns: | the number of byte written, or -1 by lack of space, or -2 if the transcoding failed. The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictiable. The value of @outlen after return is the number of ocetes consumed. |
xmlChar * xmlUTF8Strndup (const xmlChar * utf, int len)
xmlCharEncoding xmlDetectCharEncoding (const unsigned char * in, int len)
void xmlCleanupEncodingAliases (void)
Description
Function: xmlDelEncodingAliasint xmlDelEncodingAlias (const char * alias)
Unregisters an encoding alias @alias
alias: | the alias name as parsed, in UTF-8 format (ASCII actually) | Returns: | 0 in case of success, -1 in case of error |
Function: xmlGetEncodingAliasconst char * xmlGetEncodingAlias (const char * alias)
Lookup an encoding name for the given alias.
alias: | the alias name as parsed, in UTF-8 format (ASCII actually) | Returns: | NULL if not found the original name otherwise |
Function: xmlUTF8Strlocint xmlUTF8Strloc (const xmlChar * utf, const xmlChar * utfchar)
a function to provide relative location of a UTF8 char
utf: | the input UTF8 * | utfchar: | the UTF8 character to be found | Returns: | the relative character position of the desired char or -1 if not found |
Function: xmlCleanupCharEncodingHandlersvoid xmlCleanupCharEncodingHandlers (void)
Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases.
Function: xmlFindCharEncodingHandlerxmlCharEncodingHandlerPtr xmlFindCharEncodingHandler (const char * name)
Search in the registered set the handler able to read/write that encoding.
name: | a string describing the char encoding. | Returns: | the handler or NULL if not found |
Function: xmlCharEncCloseFuncint xmlCharEncCloseFunc (xmlCharEncodingHandler * handler)
Generic front-end for encoding handler close function
handler: | char enconding transformation data structure | Returns: | 0 if success, or -1 in case of error |
Function: xmlUTF8Sizeint xmlUTF8Size (const xmlChar * utf)
calulates the internal size of a UTF8 character
utf: | pointer to the UTF8 character | Returns: | the numbers of bytes in the character, -1 on format error |
Function: xmlParseCharEncodingxmlCharEncoding xmlParseCharEncoding (const char * name)
Compare the string to the known encoding schemes already known. Note that the comparison is case insensitive accordingly to the section [XML] 4.3.3 Character Encoding in Entities.
name: | the encoding name as parsed, in UTF-8 format (ASCII actually) | Returns: | one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE if not recognized. |
Function: xmlUTF8StrposxmlChar * xmlUTF8Strpos (const xmlChar * utf, int pos)
a function to provide the equivalent of fetching a character from a string array
utf: | the input UTF8 * | pos: | the position of the desired UTF8 char (in chars) | Returns: | a pointer to the UTF8 character or NULL |
Function: xmlGetCharEncodingNameconst char * xmlGetCharEncodingName (xmlCharEncoding enc)
The "canonical" name for XML encoding. C.f. http://www.w3.org/TR/REC-xml#charencoding Section 4.3.3 Character Encoding in Entities
enc: | the encoding | Returns: | the canonical name for the given encoding |
Function: xmlCheckUTF8int xmlCheckUTF8 (const unsigned char * utf)
Checks @utf for being valid utf-8. @utf is assumed to be null-terminated. This function is not super-strict, as it will allow longer utf-8 sequences than necessary. Note that Java is capable of producing these sequences if provoked. Also note, this routine checks for the 4-byte maximum size, but does not check for 0x10ffff maximum value.
utf: | Pointer to putative utf-8 encoded string. | Returns: | value: true if @utf is valid. |
Function: UTF8Toisolat1int UTF8Toisolat1 (unsigned char * out, int * outlen, const unsigned char * in, int * inlen)
Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 block of chars out.
out: | a pointer to an array of bytes to store the result | outlen: | the length of @out | in: | a pointer to an array of UTF-8 chars | inlen: | the length of @in | Returns: | 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of ocetes consumed. |
Function: xmlCharEncInFuncint xmlCharEncInFunc (xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferPtr in)
Generic front-end for the encoding handler input function
handler: | char encoding transformation data structure | out: | an xmlBuffer for the output. | in: | an xmlBuffer for the input | Returns: | the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or |
Function: xmlAddEncodingAliasint xmlAddEncodingAlias (const char * name, const char * alias)
Registers and alias @alias for an encoding named @name. Existing alias will be overwritten.
name: | the encoding name as parsed, in UTF-8 format (ASCII actually) | alias: | the alias name as parsed, in UTF-8 format (ASCII actually) | Returns: | 0 in case of success, -1 in case of error |
Function: xmlCharEncOutFuncint xmlCharEncOutFunc (xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferPtr in)
Generic front-end for the encoding handler output function a first call with @in == NULL has to be made firs to initiate the output in case of non-stateless encoding needing to initiate their state or the output (like the BOM in UTF16). In case of UTF8 sequence conversion errors for the given encoder, the content will be automatically remapped to a CharRef sequence.
handler: | char enconding transformation data structure | out: | an xmlBuffer for the output. | in: | an xmlBuffer for the input | Returns: | the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or |
Take a block of chars in the original encoding and try to convert it to an UTF-8 block of chars out.
Function: xmlGetCharEncodingHandlerxmlCharEncodingHandlerPtr xmlGetCharEncodingHandler (xmlCharEncoding enc)
Search in the registered set the handler able to read/write that encoding.
Function: xmlUTF8StrsubxmlChar * xmlUTF8Strsub (const xmlChar * utf, int start, int len)
Note: positions are given in units of UTF-8 chars
utf: | a sequence of UTF-8 encoded bytes | start: | relative pos of first char | len: | total number to copy | Returns: | a pointer to a newly created string or NULL if any problem |
Function: xmlGetUTF8Charint xmlGetUTF8Char (const unsigned char * utf, int * len)
Read one UTF8 Char from @utf
utf: | a sequence of UTF-8 encoded bytes | len: | a pointer to @bytes len | Returns: | the char value or -1 in case of error and update @len with the number of bytes used |
Function: isolat1ToUTF8int isolat1ToUTF8 (unsigned char * out, int * outlen, const unsigned char * in, int * inlen)
Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out.
out: | a pointer to an array of bytes to store the result | outlen: | the length of @out | in: | a pointer to an array of ISO Latin 1 chars | inlen: | the length of @in | Returns: | 0 if success, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of ocetes consumed. |
Function: xmlNewCharEncodingHandlerxmlCharEncodingHandlerPtr xmlNewCharEncodingHandler (const char * name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output)
Create and registers an xmlCharEncodingHandler.
Function: xmlInitCharEncodingHandlersvoid xmlInitCharEncodingHandlers (void)
Initialize the char encoding support, it registers the default encoding supported. NOTE: while public, this function usually doesn't need to be called in normal processing.
Function: xmlRegisterCharEncodingHandlervoid xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler)
Register the char encoding handler, surprising, isn't it ?
Function: xmlCharEncFirstLineint xmlCharEncFirstLine (xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferPtr in)
Front-end for the encoding handler input function, but handle only the very first line, i.e. limit itself to 45 chars.
handler: | char enconding transformation data structure | out: | an xmlBuffer for the output. | in: | an xmlBuffer for the input | Returns: | the number of byte written if success, or -1 general error -2 if the transcoding fails (for *in is not valid utf8 string or the result of transformation can't fit into the encoding we want), or |
Function: xmlUTF8Strsizeint xmlUTF8Strsize (const xmlChar * utf, int len)
storage size of an UTF8 string
utf: | a sequence of UTF-8 encoded bytes | len: | the number of characters in the array | Returns: | the storage size of the first 'len' characters of ARRAY |
Function: xmlUTF8Charcmpint xmlUTF8Charcmp (const xmlChar * utf1, const xmlChar * utf2)
compares the two UCS4 values
utf1: | pointer to first UTF8 char | utf2: | pointer to second UTF8 char | Returns: | result of the compare as with xmlStrncmp |
Function: xmlUTF8Strlenint xmlUTF8Strlen (const xmlChar * utf)
compute the length of an UTF8 string, it doesn't do a full UTF8 checking of the content of the string.
utf: | a sequence of UTF-8 encoded bytes | Returns: | the number of characters in the string or -1 in case of error |
Take a block of UTF-8 chars in and try to convert it to an other encoding. Note: a first call designed to produce heading info is called with in = NULL. If stateful this should also initialize the encoder state.
Function: xmlUTF8StrndupxmlChar * xmlUTF8Strndup (const xmlChar * utf, int len)
a strndup for array of UTF8's
utf: | the input UTF8 * | len: | the len of @utf (in chars) | Returns: | a new UTF8 * or NULL |
Function: xmlDetectCharEncodingxmlCharEncoding xmlDetectCharEncoding (const unsigned char * in, int len)
Guess the encoding of the entity using the first bytes of the entity content accordingly of the non-normative appendix F of the XML-1.0 recommendation.
in: | a pointer to the first bytes of the XML entity, must be at least 4 bytes long. | len: | pointer to the length of the buffer | Returns: | one of the XML_CHAR_ENCODING_... values. |
Function: xmlCleanupEncodingAliasesvoid xmlCleanupEncodingAliases (void)
Unregisters all aliases
Daniel Veillard |