mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-29 11:41:22 +03:00
New set of cleanups, released 2.2.3:
- SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c: removed a few warnings in pedantic mode ... - parserInternals.c parser.c: moved encoding switching function to parserInternals.c - configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3 Daniel
This commit is contained in:
@ -1494,6 +1494,311 @@ xmlCopyChar(int len, xmlChar *out, int val) {
|
||||
return(1);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* Commodity functions to switch encodings *
|
||||
* *
|
||||
************************************************************************/
|
||||
|
||||
/**
|
||||
* xmlSwitchEncoding:
|
||||
* @ctxt: the parser context
|
||||
* @enc: the encoding value (number)
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
int
|
||||
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
{
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
/* default encoding, no conversion should be needed */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
handler = xmlGetCharEncodingHandler(enc);
|
||||
if (handler == NULL) {
|
||||
/*
|
||||
* Default handlers.
|
||||
*/
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
case XML_CHAR_ENCODING_ASCII:
|
||||
/* default encoding, no conversion should be needed */
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF16LE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UTF16BE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4LE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 little endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4BE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 big endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EBCDIC:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EBCDIC not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_2143:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 2143 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_3412:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 3412 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS2:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS2 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_1:
|
||||
case XML_CHAR_ENCODING_8859_2:
|
||||
case XML_CHAR_ENCODING_8859_3:
|
||||
case XML_CHAR_ENCODING_8859_4:
|
||||
case XML_CHAR_ENCODING_8859_5:
|
||||
case XML_CHAR_ENCODING_8859_6:
|
||||
case XML_CHAR_ENCODING_8859_7:
|
||||
case XML_CHAR_ENCODING_8859_8:
|
||||
case XML_CHAR_ENCODING_8859_9:
|
||||
/*
|
||||
* We used to keep the internal content in the
|
||||
* document encoding however this turns being unmaintainable
|
||||
* So xmlGetCharEncodingHandler() will return non-null
|
||||
* values for this now.
|
||||
*/
|
||||
if ((ctxt->inputNr == 1) &&
|
||||
(ctxt->encoding == NULL) &&
|
||||
(ctxt->input->encoding != NULL)) {
|
||||
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
|
||||
}
|
||||
ctxt->charset = enc;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_2022_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO-2022-JPnot supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_SHIFT_JIS:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding Shift_JIS not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EUC_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EUC-JPnot supported\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (handler == NULL)
|
||||
return(-1);
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
return(xmlSwitchToEncoding(ctxt, handler));
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlSwitchToEncoding:
|
||||
* @ctxt: the parser context
|
||||
* @handler: the encoding handler
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
int
|
||||
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
||||
{
|
||||
int nbchars;
|
||||
|
||||
if (handler != NULL) {
|
||||
if (ctxt->input != NULL) {
|
||||
if (ctxt->input->buf != NULL) {
|
||||
if (ctxt->input->buf->encoder != NULL) {
|
||||
if (ctxt->input->buf->encoder == handler)
|
||||
return(0);
|
||||
/*
|
||||
* Note: this is a bit dangerous, but that's what it
|
||||
* takes to use nearly compatible signature for different
|
||||
* encodings.
|
||||
*/
|
||||
xmlCharEncCloseFunc(ctxt->input->buf->encoder);
|
||||
ctxt->input->buf->encoder = handler;
|
||||
return(0);
|
||||
}
|
||||
ctxt->input->buf->encoder = handler;
|
||||
|
||||
/*
|
||||
* Is there already some content down the pipe to convert ?
|
||||
*/
|
||||
if ((ctxt->input->buf->buffer != NULL) &&
|
||||
(ctxt->input->buf->buffer->use > 0)) {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-16
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16LE")) &&
|
||||
(ctxt->input->cur[0] == 0xFF) &&
|
||||
(ctxt->input->cur[1] == 0xFE)) {
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||
(ctxt->input->cur[0] == 0xFE) &&
|
||||
(ctxt->input->cur[1] == 0xFF)) {
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shring the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
xmlBufferShrink(ctxt->input->buf->buffer, processed);
|
||||
ctxt->input->buf->raw = ctxt->input->buf->buffer;
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
if (ctxt->html) {
|
||||
/*
|
||||
* converst as much as possbile of the buffer
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
} else {
|
||||
/*
|
||||
* convert just enough to get
|
||||
* '<?xml version="1.0" encoding="xxx"?>'
|
||||
* parsed with the autodetected encoding
|
||||
* into the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
}
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
|
||||
}
|
||||
return(0);
|
||||
} else {
|
||||
if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
|
||||
/*
|
||||
* When parsing a static memory array one must know the
|
||||
* size to be able to convert the buffer.
|
||||
*/
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : no input\n");
|
||||
return(-1);
|
||||
} else {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Shring the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
|
||||
ctxt->input->buf->raw = xmlBufferCreate();
|
||||
xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
|
||||
ctxt->input->length - processed);
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
/*
|
||||
* convert as much as possible of the raw input
|
||||
* to the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conversion succeeded, get rid of the old buffer
|
||||
*/
|
||||
if ((ctxt->input->free != NULL) &&
|
||||
(ctxt->input->base != NULL))
|
||||
ctxt->input->free((xmlChar *) ctxt->input->base);
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : no input\n");
|
||||
return(-1);
|
||||
}
|
||||
/*
|
||||
* The parsing is now done in UTF8 natively
|
||||
*/
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
} else
|
||||
return(-1);
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* *
|
||||
* Commodity functions to handle entities processing *
|
||||
@ -1705,7 +2010,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
|
||||
return(NULL);
|
||||
|
||||
URI = xmlStrdup((xmlChar *) filename);
|
||||
directory = xmlParserGetDirectory(URI);
|
||||
directory = xmlParserGetDirectory((const char *) URI);
|
||||
|
||||
inputStream = xmlNewInputStream(ctxt);
|
||||
if (inputStream == NULL) {
|
||||
@ -1714,7 +2019,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
inputStream->filename = URI;
|
||||
inputStream->filename = (const char *) URI;
|
||||
inputStream->directory = directory;
|
||||
inputStream->buf = buf;
|
||||
|
||||
|
Reference in New Issue
Block a user