encoding

Name

encoding —

Synopsis



enum        xmlCharEncoding;
int         (*xmlCharEncodingInputFunc)     (unsigned char *out,
                                             int *outlen,
                                             unsigned char *in,
                                             int *inlen);
int         (*xmlCharEncodingOutputFunc)    (unsigned char *out,
                                             int *outlen,
                                             unsigned char *in,
                                             int *inlen);
struct      xmlCharEncodingHandler;
typedef     xmlCharEncodingHandlerPtr;
void        xmlInitCharEncodingHandlers     (void);
void        xmlCleanupCharEncodingHandlers  (void);
void        xmlRegisterCharEncodingHandler  (xmlCharEncodingHandlerPtr handler);
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler
                                            (xmlCharEncoding enc);
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler
                                            (const char *name);
int         xmlAddEncodingAlias             (const char *name,
                                             const char *alias);
int         xmlDelEncodingAlias             (const char *alias);
const char* xmlGetEncodingAlias             (const char *alias);
void        xmlCleanupEncodingAliases       (void);
xmlCharEncoding xmlParseCharEncoding        (const char *name);
const char* xmlGetCharEncodingName          (xmlCharEncoding enc);
xmlCharEncoding xmlDetectCharEncoding       (unsigned char *in,
                                             int len);
int         xmlCheckUTF8                    (unsigned char *utf);
int         xmlCharEncOutFunc               (xmlCharEncodingHandler *handler,
                                             xmlBufferPtr out,
                                             xmlBufferPtr in);
int         xmlCharEncInFunc                (xmlCharEncodingHandler *handler,
                                             xmlBufferPtr out,
                                             xmlBufferPtr in);
int         xmlCharEncFirstLine             (xmlCharEncodingHandler *handler,
                                             xmlBufferPtr out,
                                             xmlBufferPtr in);
int         xmlCharEncCloseFunc             (xmlCharEncodingHandler *handler);

Description

Details

enum xmlCharEncoding

typedef enum {
    XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
    XML_CHAR_ENCODING_NONE=	0, /* No char encoding detected */
    XML_CHAR_ENCODING_UTF8=	1, /* UTF-8 */
    XML_CHAR_ENCODING_UTF16LE=	2, /* UTF-16 little endian */
    XML_CHAR_ENCODING_UTF16BE=	3, /* UTF-16 big endian */
    XML_CHAR_ENCODING_UCS4LE=	4, /* UCS-4 little endian */
    XML_CHAR_ENCODING_UCS4BE=	5, /* UCS-4 big endian */
    XML_CHAR_ENCODING_EBCDIC=	6, /* EBCDIC uh! */
    XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */
    XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */
    XML_CHAR_ENCODING_UCS2=	9, /* UCS-2 */
    XML_CHAR_ENCODING_8859_1=	10,/* ISO-8859-1 ISO Latin 1 */
    XML_CHAR_ENCODING_8859_2=	11,/* ISO-8859-2 ISO Latin 2 */
    XML_CHAR_ENCODING_8859_3=	12,/* ISO-8859-3 */
    XML_CHAR_ENCODING_8859_4=	13,/* ISO-8859-4 */
    XML_CHAR_ENCODING_8859_5=	14,/* ISO-8859-5 */
    XML_CHAR_ENCODING_8859_6=	15,/* ISO-8859-6 */
    XML_CHAR_ENCODING_8859_7=	16,/* ISO-8859-7 */
    XML_CHAR_ENCODING_8859_8=	17,/* ISO-8859-8 */
    XML_CHAR_ENCODING_8859_9=	18,/* ISO-8859-9 */
    XML_CHAR_ENCODING_2022_JP=  19,/* ISO-2022-JP */
    XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
    XML_CHAR_ENCODING_EUC_JP=   21,/* EUC-JP */
    XML_CHAR_ENCODING_ASCII=    22 /* pure ASCII */
} xmlCharEncoding;


xmlCharEncodingInputFunc ()

int         (*xmlCharEncodingInputFunc)     (unsigned char *out,
                                             int *outlen,
                                             unsigned char *in,
                                             int *inlen);

out : 
outlen : 
in : 
inlen : 
Returns : 


xmlCharEncodingOutputFunc ()

int         (*xmlCharEncodingOutputFunc)    (unsigned char *out,
                                             int *outlen,
                                             unsigned char *in,
                                             int *inlen);

out : 
outlen : 
in : 
inlen : 
Returns : 


struct xmlCharEncodingHandler

struct xmlCharEncodingHandler {
    char                       *name;
    xmlCharEncodingInputFunc   input;
    xmlCharEncodingOutputFunc  output;
#ifdef LIBXML_ICONV_ENABLED
    iconv_t                    iconv_in;
    iconv_t                    iconv_out;
#endif /* LIBXML_ICONV_ENABLED */
};


xmlCharEncodingHandlerPtr


xmlInitCharEncodingHandlers ()

void        xmlInitCharEncodingHandlers     (void);

Initialize the char encoding support, it registers the default encoding supported. NOTE: while public, this function usually doesn't need to be called in normal processing.


xmlCleanupCharEncodingHandlers ()

void        xmlCleanupCharEncodingHandlers  (void);

Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases.


xmlRegisterCharEncodingHandler ()

void        xmlRegisterCharEncodingHandler  (xmlCharEncodingHandlerPtr handler);

Register the char encoding handler, surprizing, isn't it ?

handler : 


xmlGetCharEncodingHandler ()

xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler
                                            (xmlCharEncoding enc);

Search in the registrered set the handler able to read/write that encoding.

enc : 
Returns : 


xmlFindCharEncodingHandler ()

xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler
                                            (const char *name);

name : 
Returns : 


xmlAddEncodingAlias ()

int         xmlAddEncodingAlias             (const char *name,
                                             const char *alias);

Registers and alias alias for an encoding named name. Existing alias will be overwritten.

name : 
alias : 
Returns : 


xmlDelEncodingAlias ()

int         xmlDelEncodingAlias             (const char *alias);

Unregisters an encoding alias alias

alias : 
Returns : 


xmlGetEncodingAlias ()

const char* xmlGetEncodingAlias             (const char *alias);

Lookup an encoding name for the given alias.

alias : 
Returns : 


xmlCleanupEncodingAliases ()

void        xmlCleanupEncodingAliases       (void);

Unregisters all aliases


xmlParseCharEncoding ()

xmlCharEncoding xmlParseCharEncoding        (const char *name);

Conpare the string to the known encoding schemes already known. Note that the comparison is case insensitive accordingly to the section [XML] 4.3.3 Character Encoding in Entities.

name : 
Returns : 


xmlGetCharEncodingName ()

const char* xmlGetCharEncodingName          (xmlCharEncoding enc);

The "canonical" name for XML encoding. C.f. http://www.w3.org/TR/REC-xmlcharencoding Section 4.3.3 Character Encoding in Entities

enc : 
Returns : 


xmlDetectCharEncoding ()

xmlCharEncoding xmlDetectCharEncoding       (unsigned char *in,
                                             int len);

Guess the encoding of the entity using the first bytes of the entity content accordingly of the non-normative appendix F of the XML-1.0 recommendation.

in : 
len : 
Returns : 


xmlCheckUTF8 ()

int         xmlCheckUTF8                    (unsigned char *utf);

Checks utf for being valid utf-8. utf is assumed to be null-terminated. This function is not super-strict, as it will allow longer utf-8 sequences than necessary. Note that Java is capable of producing these sequences if provoked. Also note, this routine checks for the 4-byte maxiumum size, but does not check for 0x10ffff maximum value.

utf : 
Returns : 


xmlCharEncOutFunc ()

int         xmlCharEncOutFunc               (xmlCharEncodingHandler *handler,
                                             xmlBufferPtr out,
                                             xmlBufferPtr in);

Generic front-end for the encoding handler output function a first call with in == NULL has to be made firs to initiate the output in case of non-stateless encoding needing to initiate their state or the output (like the BOM in UTF16). In case of UTF8 sequence conversion errors for the given encoder, the content will be automatically remapped to a CharRef sequence.

handler : 
out : 
in : 
Returns : 


xmlCharEncInFunc ()

int         xmlCharEncInFunc                (xmlCharEncodingHandler *handler,
                                             xmlBufferPtr out,
                                             xmlBufferPtr in);

Generic front-end for the encoding handler input function

handler : 
out : 
in : 
Returns : 


xmlCharEncFirstLine ()

int         xmlCharEncFirstLine             (xmlCharEncodingHandler *handler,
                                             xmlBufferPtr out,
                                             xmlBufferPtr in);

Front-end for the encoding handler input function, but handle only the very first line, i.e. limit itself to 45 chars.

handler : 
out : 
in : 
Returns : 


xmlCharEncCloseFunc ()

int         xmlCharEncCloseFunc             (xmlCharEncodingHandler *handler);

Generic front-end for hencoding handler close function

handler : 
Returns :