1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

New set of cleanups, released 2.2.3:

- SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c:
  removed a few warnings in pedantic mode ...
- parserInternals.c parser.c: moved encoding switching function
  to parserInternals.c
- configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3
Daniel
This commit is contained in:
Daniel Veillard
2000-09-17 16:00:22 +00:00
parent a2c6da94f8
commit 04698d9e1c
12 changed files with 371 additions and 317 deletions

View File

@ -1494,6 +1494,311 @@ xmlCopyChar(int len, xmlChar *out, int val) {
return(1);
}
/************************************************************************
* *
* Commodity functions to switch encodings *
* *
************************************************************************/
/**
* xmlSwitchEncoding:
* @ctxt: the parser context
* @enc: the encoding value (number)
*
* change the input functions when discovering the character encoding
* of a given entity.
*
* Returns 0 in case of success, -1 otherwise
*/
int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
{
xmlCharEncodingHandlerPtr handler;
switch (enc) {
case XML_CHAR_ENCODING_ERROR:
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
ctxt->wellFormed = 0;
ctxt->disableSAX = 1;
break;
case XML_CHAR_ENCODING_NONE:
/* let's assume it's UTF-8 without the XML decl */
ctxt->charset = XML_CHAR_ENCODING_UTF8;
return(0);
case XML_CHAR_ENCODING_UTF8:
/* default encoding, no conversion should be needed */
ctxt->charset = XML_CHAR_ENCODING_UTF8;
return(0);
default:
break;
}
handler = xmlGetCharEncodingHandler(enc);
if (handler == NULL) {
/*
* Default handlers.
*/
switch (enc) {
case XML_CHAR_ENCODING_ERROR:
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
ctxt->wellFormed = 0;
ctxt->disableSAX = 1;
ctxt->charset = XML_CHAR_ENCODING_UTF8;
break;
case XML_CHAR_ENCODING_NONE:
/* let's assume it's UTF-8 without the XML decl */
ctxt->charset = XML_CHAR_ENCODING_UTF8;
return(0);
case XML_CHAR_ENCODING_UTF8:
case XML_CHAR_ENCODING_ASCII:
/* default encoding, no conversion should be needed */
ctxt->charset = XML_CHAR_ENCODING_UTF8;
return(0);
case XML_CHAR_ENCODING_UTF16LE:
break;
case XML_CHAR_ENCODING_UTF16BE:
break;
case XML_CHAR_ENCODING_UCS4LE:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding USC4 little endian not supported\n");
break;
case XML_CHAR_ENCODING_UCS4BE:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding USC4 big endian not supported\n");
break;
case XML_CHAR_ENCODING_EBCDIC:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding EBCDIC not supported\n");
break;
case XML_CHAR_ENCODING_UCS4_2143:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding UCS4 2143 not supported\n");
break;
case XML_CHAR_ENCODING_UCS4_3412:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding UCS4 3412 not supported\n");
break;
case XML_CHAR_ENCODING_UCS2:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding UCS2 not supported\n");
break;
case XML_CHAR_ENCODING_8859_1:
case XML_CHAR_ENCODING_8859_2:
case XML_CHAR_ENCODING_8859_3:
case XML_CHAR_ENCODING_8859_4:
case XML_CHAR_ENCODING_8859_5:
case XML_CHAR_ENCODING_8859_6:
case XML_CHAR_ENCODING_8859_7:
case XML_CHAR_ENCODING_8859_8:
case XML_CHAR_ENCODING_8859_9:
/*
* We used to keep the internal content in the
* document encoding however this turns being unmaintainable
* So xmlGetCharEncodingHandler() will return non-null
* values for this now.
*/
if ((ctxt->inputNr == 1) &&
(ctxt->encoding == NULL) &&
(ctxt->input->encoding != NULL)) {
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
}
ctxt->charset = enc;
return(0);
case XML_CHAR_ENCODING_2022_JP:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding ISO-2022-JPnot supported\n");
break;
case XML_CHAR_ENCODING_SHIFT_JIS:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding Shift_JIS not supported\n");
break;
case XML_CHAR_ENCODING_EUC_JP:
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"char encoding EUC-JPnot supported\n");
break;
}
}
if (handler == NULL)
return(-1);
ctxt->charset = XML_CHAR_ENCODING_UTF8;
return(xmlSwitchToEncoding(ctxt, handler));
}
/**
* xmlSwitchToEncoding:
* @ctxt: the parser context
* @handler: the encoding handler
*
* change the input functions when discovering the character encoding
* of a given entity.
*
* Returns 0 in case of success, -1 otherwise
*/
int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
{
int nbchars;
if (handler != NULL) {
if (ctxt->input != NULL) {
if (ctxt->input->buf != NULL) {
if (ctxt->input->buf->encoder != NULL) {
if (ctxt->input->buf->encoder == handler)
return(0);
/*
* Note: this is a bit dangerous, but that's what it
* takes to use nearly compatible signature for different
* encodings.
*/
xmlCharEncCloseFunc(ctxt->input->buf->encoder);
ctxt->input->buf->encoder = handler;
return(0);
}
ctxt->input->buf->encoder = handler;
/*
* Is there already some content down the pipe to convert ?
*/
if ((ctxt->input->buf->buffer != NULL) &&
(ctxt->input->buf->buffer->use > 0)) {
int processed;
/*
* Specific handling of the Byte Order Mark for
* UTF-16
*/
if ((handler->name != NULL) &&
(!strcmp(handler->name, "UTF-16LE")) &&
(ctxt->input->cur[0] == 0xFF) &&
(ctxt->input->cur[1] == 0xFE)) {
ctxt->input->cur += 2;
}
if ((handler->name != NULL) &&
(!strcmp(handler->name, "UTF-16BE")) &&
(ctxt->input->cur[0] == 0xFE) &&
(ctxt->input->cur[1] == 0xFF)) {
ctxt->input->cur += 2;
}
/*
* Shring the current input buffer.
* Move it as the raw buffer and create a new input buffer
*/
processed = ctxt->input->cur - ctxt->input->base;
xmlBufferShrink(ctxt->input->buf->buffer, processed);
ctxt->input->buf->raw = ctxt->input->buf->buffer;
ctxt->input->buf->buffer = xmlBufferCreate();
if (ctxt->html) {
/*
* converst as much as possbile of the buffer
*/
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
ctxt->input->buf->buffer,
ctxt->input->buf->raw);
} else {
/*
* convert just enough to get
* '<?xml version="1.0" encoding="xxx"?>'
* parsed with the autodetected encoding
* into the parser reading buffer.
*/
nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
ctxt->input->buf->buffer,
ctxt->input->buf->raw);
}
if (nbchars < 0) {
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
return(-1);
}
ctxt->input->base =
ctxt->input->cur = ctxt->input->buf->buffer->content;
}
return(0);
} else {
if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
/*
* When parsing a static memory array one must know the
* size to be able to convert the buffer.
*/
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"xmlSwitchEncoding : no input\n");
return(-1);
} else {
int processed;
/*
* Shring the current input buffer.
* Move it as the raw buffer and create a new input buffer
*/
processed = ctxt->input->cur - ctxt->input->base;
ctxt->input->buf->raw = xmlBufferCreate();
xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
ctxt->input->length - processed);
ctxt->input->buf->buffer = xmlBufferCreate();
/*
* convert as much as possible of the raw input
* to the parser reading buffer.
*/
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
ctxt->input->buf->buffer,
ctxt->input->buf->raw);
if (nbchars < 0) {
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
return(-1);
}
/*
* Conversion succeeded, get rid of the old buffer
*/
if ((ctxt->input->free != NULL) &&
(ctxt->input->base != NULL))
ctxt->input->free((xmlChar *) ctxt->input->base);
ctxt->input->base =
ctxt->input->cur = ctxt->input->buf->buffer->content;
}
}
} else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"xmlSwitchEncoding : no input\n");
return(-1);
}
/*
* The parsing is now done in UTF8 natively
*/
ctxt->charset = XML_CHAR_ENCODING_UTF8;
} else
return(-1);
return(0);
}
/************************************************************************
* *
* Commodity functions to handle entities processing *
@ -1705,7 +2010,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
return(NULL);
URI = xmlStrdup((xmlChar *) filename);
directory = xmlParserGetDirectory(URI);
directory = xmlParserGetDirectory((const char *) URI);
inputStream = xmlNewInputStream(ctxt);
if (inputStream == NULL) {
@ -1714,7 +2019,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
return(NULL);
}
inputStream->filename = URI;
inputStream->filename = (const char *) URI;
inputStream->directory = directory;
inputStream->buf = buf;