diff --git a/ChangeLog b/ChangeLog index 1536d602..ff6811e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Sun Oct 19 15:31:43 CEST 2003 Daniel Veillard + + * include/libxml/nanohttp.h include/libxml/parserInternals.h + include/libxml/xmlIO.h nanohttp.c parserInternals.c xmlIO.c: + Fixed the HTTP<->parser interraction, which should fix 2 long + standing bugs #104790 and #124054 , this also fix the fact that + HTTP error code (> 400) should not generate data, we usually + don't want to parse the HTML error information instead of the + resource looked at. + Sun Oct 19 19:20:48 HKT 2003 William Brack * doc/Makefile.am: enhanced the installation of tutorial files diff --git a/include/libxml/nanohttp.h b/include/libxml/nanohttp.h index be2ba78e..f404c7be 100644 --- a/include/libxml/nanohttp.h +++ b/include/libxml/nanohttp.h @@ -53,9 +53,11 @@ XMLPUBFUN int XMLCALL XMLPUBFUN const char * XMLCALL xmlNanoHTTPAuthHeader (void *ctx); XMLPUBFUN const char * XMLCALL - xmlNanoHTTPRedir (void * ctx); + xmlNanoHTTPRedir (void *ctx); XMLPUBFUN const char * XMLCALL - xmlNanoHTTPEncoding (void * ctx); + xmlNanoHTTPEncoding (void *ctx); +XMLPUBFUN const char * XMLCALL + xmlNanoHTTPMimeType (void *ctx); XMLPUBFUN int XMLCALL xmlNanoHTTPRead (void *ctx, void *dest, diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index 8b894297..495b051a 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -271,8 +271,21 @@ XMLPUBFUN int XMLCALL xmlCharEncoding enc); XMLPUBFUN int XMLCALL xmlSwitchToEncoding (xmlParserCtxtPtr ctxt, - xmlCharEncodingHandlerPtr handler); + xmlCharEncodingHandlerPtr handler); +XMLPUBFUN int XMLCALL + xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt, + xmlParserInputPtr input, + xmlCharEncodingHandlerPtr handler); +#ifdef IN_LIBXML +/* internal error reporting */ +XMLPUBFUN void XMLCALL + __xmlErrEncoding (xmlParserCtxtPtr ctxt, + xmlParserErrors error, + const char *msg, + const xmlChar * str1, + const xmlChar * str2); +#endif /** * Entities */ diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h index 690a492e..ac390ce0 100644 --- a/include/libxml/xmlIO.h +++ b/include/libxml/xmlIO.h @@ -260,6 +260,9 @@ XMLPUBFUN void * XMLCALL XMLPUBFUN void XMLCALL xmlRegisterHTTPPostCallbacks (void ); #endif +XMLPUBFUN xmlParserInputPtr XMLCALL + xmlCheckHTTPInput (xmlParserCtxtPtr ctxt, + xmlParserInputPtr ret); /* * A predefined entity loader disabling network accesses diff --git a/nanohttp.c b/nanohttp.c index 08dd932d..489f14a6 100644 --- a/nanohttp.c +++ b/nanohttp.c @@ -150,6 +150,7 @@ typedef struct xmlNanoHTTPCtxt { char *location; /* the new URL in case of redirect */ char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */ char *encoding; /* encoding extracted from the contentType */ + char *mimeType; /* Mime-Type extracted from the contentType */ } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr; static int initialized = 0; @@ -530,6 +531,7 @@ xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) { if (ctxt->in != NULL) xmlFree(ctxt->in); if (ctxt->contentType != NULL) xmlFree(ctxt->contentType); if (ctxt->encoding != NULL) xmlFree(ctxt->encoding); + if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType); if (ctxt->location != NULL) xmlFree(ctxt->location); if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader); ctxt->state = XML_NANO_HTTP_NONE; @@ -737,7 +739,7 @@ xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) { * Try to extract useful informations from the server answer. * We currently parse and process: * - The HTTP revision/ return code - * - The Content-Type + * - The Content-Type, Mime-Type and charset used * - The Location for redirect processing. * * Returns -1 in case of failure, the file descriptor number otherwise @@ -781,16 +783,56 @@ xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) { if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return; ctxt->returnValue = ret; } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) { + const xmlChar *charset, *last, *mime; cur += 13; while ((*cur == ' ') || (*cur == '\t')) cur++; if (ctxt->contentType != NULL) xmlFree(ctxt->contentType); ctxt->contentType = xmlMemStrdup(cur); + mime = (const xmlChar *) cur; + last = mime; + while ((*last != 0) && (*last != ' ') && (*last != '\t') && + (*last != ';') && (*last != ',')) + last++; + if (ctxt->mimeType != NULL) + xmlFree(ctxt->mimeType); + ctxt->mimeType = (char *) xmlStrndup(mime, last - mime); + charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset="); + if (charset != NULL) { + charset += 8; + last = charset; + while ((*last != 0) && (*last != ' ') && (*last != '\t') && + (*last != ';') && (*last != ',')) + last++; + if (ctxt->encoding != NULL) + xmlFree(ctxt->encoding); + ctxt->encoding = (char *) xmlStrndup(charset, last - charset); + } } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) { + const xmlChar *charset, *last, *mime; cur += 12; if (ctxt->contentType != NULL) return; while ((*cur == ' ') || (*cur == '\t')) cur++; ctxt->contentType = xmlMemStrdup(cur); + mime = (const xmlChar *) cur; + last = mime; + while ((*last != 0) && (*last != ' ') && (*last != '\t') && + (*last != ';') && (*last != ',')) + last++; + if (ctxt->mimeType != NULL) + xmlFree(ctxt->mimeType); + ctxt->mimeType = (char *) xmlStrndup(mime, last - mime); + charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset="); + if (charset != NULL) { + charset += 8; + last = charset; + while ((*last != 0) && (*last != ' ') && (*last != '\t') && + (*last != ';') && (*last != ',')) + last++; + if (ctxt->encoding != NULL) + xmlFree(ctxt->encoding); + ctxt->encoding = (char *) xmlStrndup(charset, last - charset); + } } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) { cur += 9; while ((*cur == ' ') || (*cur == '\t')) cur++; @@ -1227,6 +1269,7 @@ retry: ctxt = xmlNanoHTTPNewCtxt(URL); else { ctxt = xmlNanoHTTPNewCtxt(redirURL); + ctxt->location = xmlMemStrdup(redirURL); } if ( ctxt == NULL ) { @@ -1608,6 +1651,21 @@ xmlNanoHTTPEncoding( void * ctx ) { return ( ( ctxt == NULL ) ? NULL : ctxt->encoding ); } +/** + * xmlNanoHTTPMimeType: + * @ctx: the HTTP context + * + * Provides the specified Mime-Type if specified in the HTTP headers. + * + * Return the specified Mime-Type or NULL if not available + */ +const char * +xmlNanoHTTPMimeType( void * ctx ) { + xmlNanoHTTPCtxtPtr ctxt = ctx; + + return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType ); +} + /** * xmlNanoHTTPFetchContent: * @ctx: the HTTP context diff --git a/parserInternals.c b/parserInternals.c index 2d1bd697..6b3df55a 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -122,7 +122,7 @@ xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) } /** - * xmlErrEncoding: + * __xmlErrEncoding: * @ctxt: an XML parser context * @error: the error number * @msg: the error message @@ -131,9 +131,9 @@ xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) * * Handle an encoding error */ -static void -xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar * str1, const xmlChar * str2) +void +__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors error, + const char *msg, const xmlChar * str1, const xmlChar * str2) { if (ctxt != NULL) ctxt->errNo = error; @@ -558,7 +558,7 @@ encoding_error: * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ - xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, + __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, "Input is not proper UTF-8, indicate encoding !\n", NULL, NULL); if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { @@ -704,7 +704,7 @@ encoding_error: * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ - xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, + __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, "Input is not proper UTF-8, indicate encoding !\n", NULL, NULL); if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { @@ -804,7 +804,7 @@ encoding_error: * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ - xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, + __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, "Input is not proper UTF-8, indicate encoding !\n", NULL, NULL); if ((ctxt != NULL) && (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { @@ -901,7 +901,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) switch (enc) { case XML_CHAR_ENCODING_ERROR: - xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, "encoding unknown\n", NULL, NULL); break; case XML_CHAR_ENCODING_NONE: @@ -951,7 +951,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) */ switch (enc) { case XML_CHAR_ENCODING_ERROR: - xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, "encoding unknown\n", NULL, NULL); break; case XML_CHAR_ENCODING_NONE: @@ -968,32 +968,32 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) case XML_CHAR_ENCODING_UTF16BE: break; case XML_CHAR_ENCODING_UCS4LE: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "USC4 little endian", NULL); break; case XML_CHAR_ENCODING_UCS4BE: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "USC4 big endian", NULL); break; case XML_CHAR_ENCODING_EBCDIC: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "EBCDIC", NULL); break; case XML_CHAR_ENCODING_UCS4_2143: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "UCS4 2143", NULL); break; case XML_CHAR_ENCODING_UCS4_3412: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "UCS4 3412", NULL); break; case XML_CHAR_ENCODING_UCS2: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "UCS2", NULL); break; @@ -1020,17 +1020,17 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) ctxt->charset = enc; return(0); case XML_CHAR_ENCODING_2022_JP: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "ISO-2022-JP", NULL); break; case XML_CHAR_ENCODING_SHIFT_JIS: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "Shift_JIS", NULL); break; case XML_CHAR_ENCODING_EUC_JP: - xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, + __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, "encoding not supported %s\n", BAD_CAST "EUC-JP", NULL); break; @@ -1042,6 +1042,175 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) return(xmlSwitchToEncoding(ctxt, handler)); } +/** + * xmlSwitchInputEncoding: + * @ctxt: the parser context + * @input: the input stream + * @handler: the encoding handler + * + * change the input functions when discovering the character encoding + * of a given entity. + * + * Returns 0 in case of success, -1 otherwise + */ +int +xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, + xmlCharEncodingHandlerPtr handler) +{ + int nbchars; + + if (handler == NULL) + return (-1); + if (input == NULL) + return (-1); + if (input->buf != NULL) { + if (input->buf->encoder != NULL) { + /* + * Check in case the auto encoding detetection triggered + * in already. + */ + if (input->buf->encoder == handler) + return (0); + + /* + * "UTF-16" can be used for both LE and BE + if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, + BAD_CAST "UTF-16", 6)) && + (!xmlStrncmp(BAD_CAST handler->name, + BAD_CAST "UTF-16", 6))) { + return(0); + } + */ + + /* + * Note: this is a bit dangerous, but that's what it + * takes to use nearly compatible signature for different + * encodings. + */ + xmlCharEncCloseFunc(input->buf->encoder); + input->buf->encoder = handler; + return (0); + } + input->buf->encoder = handler; + + /* + * Is there already some content down the pipe to convert ? + */ + if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { + int processed; + + /* + * Specific handling of the Byte Order Mark for + * UTF-16 + */ + if ((handler->name != NULL) && + (!strcmp(handler->name, "UTF-16LE")) && + (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { + input->cur += 2; + } + if ((handler->name != NULL) && + (!strcmp(handler->name, "UTF-16BE")) && + (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { + input->cur += 2; + } + /* + * Errata on XML-1.0 June 20 2001 + * Specific handling of the Byte Order Mark for + * UTF-8 + */ + if ((handler->name != NULL) && + (!strcmp(handler->name, "UTF-8")) && + (input->cur[0] == 0xEF) && + (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { + input->cur += 3; + } + + /* + * Shrink the current input buffer. + * Move it as the raw buffer and create a new input buffer + */ + processed = input->cur - input->base; + xmlBufferShrink(input->buf->buffer, processed); + input->buf->raw = input->buf->buffer; + input->buf->buffer = xmlBufferCreate(); + + if (ctxt->html) { + /* + * convert as much as possible of the buffer + */ + nbchars = xmlCharEncInFunc(input->buf->encoder, + input->buf->buffer, + input->buf->raw); + } else { + /* + * convert just enough to get + * '' + * parsed with the autodetected encoding + * into the parser reading buffer. + */ + nbchars = xmlCharEncFirstLine(input->buf->encoder, + input->buf->buffer, + input->buf->raw); + } + if (nbchars < 0) { + xmlErrInternal(ctxt, + "switching encoding: encoder error\n", + NULL); + return (-1); + } + input->base = input->cur = input->buf->buffer->content; + input->end = &input->base[input->buf->buffer->use]; + + } + return (0); + } else { + if ((input->length == 0) || (input->buf == NULL)) { + /* + * When parsing a static memory array one must know the + * size to be able to convert the buffer. + */ + xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); + return (-1); + } else { + int processed; + + /* + * Shrink the current input buffer. + * Move it as the raw buffer and create a new input buffer + */ + processed = input->cur - input->base; + + input->buf->raw = xmlBufferCreate(); + xmlBufferAdd(input->buf->raw, input->cur, + input->length - processed); + input->buf->buffer = xmlBufferCreate(); + + /* + * convert as much as possible of the raw input + * to the parser reading buffer. + */ + nbchars = xmlCharEncInFunc(input->buf->encoder, + input->buf->buffer, + input->buf->raw); + if (nbchars < 0) { + xmlErrInternal(ctxt, + "switching encoding: encoder error\n", + NULL); + return (-1); + } + + /* + * Conversion succeeded, get rid of the old buffer + */ + if ((input->free != NULL) && (input->base != NULL)) + input->free((xmlChar *) input->base); + input->base = input->cur = input->buf->buffer->content; + input->end = &input->base[input->buf->buffer->use]; + } + } + return (0); +} + /** * xmlSwitchToEncoding: * @ctxt: the parser context @@ -1055,165 +1224,9 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) int xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) { - int nbchars; - if (handler != NULL) { if (ctxt->input != NULL) { - if (ctxt->input->buf != NULL) { - if (ctxt->input->buf->encoder != NULL) { - /* - * Check in case the auto encoding detetection triggered - * in already. - */ - if (ctxt->input->buf->encoder == handler) - return(0); - - /* - * "UTF-16" can be used for both LE and BE - if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name, - BAD_CAST "UTF-16", 6)) && - (!xmlStrncmp(BAD_CAST handler->name, - BAD_CAST "UTF-16", 6))) { - return(0); - } - */ - - /* - * Note: this is a bit dangerous, but that's what it - * takes to use nearly compatible signature for different - * encodings. - */ - xmlCharEncCloseFunc(ctxt->input->buf->encoder); - ctxt->input->buf->encoder = handler; - return(0); - } - ctxt->input->buf->encoder = handler; - - /* - * Is there already some content down the pipe to convert ? - */ - if ((ctxt->input->buf->buffer != NULL) && - (ctxt->input->buf->buffer->use > 0)) { - int processed; - - /* - * Specific handling of the Byte Order Mark for - * UTF-16 - */ - if ((handler->name != NULL) && - (!strcmp(handler->name, "UTF-16LE")) && - (ctxt->input->cur[0] == 0xFF) && - (ctxt->input->cur[1] == 0xFE)) { - ctxt->input->cur += 2; - } - if ((handler->name != NULL) && - (!strcmp(handler->name, "UTF-16BE")) && - (ctxt->input->cur[0] == 0xFE) && - (ctxt->input->cur[1] == 0xFF)) { - ctxt->input->cur += 2; - } - /* - * Errata on XML-1.0 June 20 2001 - * Specific handling of the Byte Order Mark for - * UTF-8 - */ - if ((handler->name != NULL) && - (!strcmp(handler->name, "UTF-8")) && - (ctxt->input->cur[0] == 0xEF) && - (ctxt->input->cur[1] == 0xBB) && - (ctxt->input->cur[2] == 0xBF)) { - ctxt->input->cur += 3; - } - - /* - * Shrink the current input buffer. - * Move it as the raw buffer and create a new input buffer - */ - processed = ctxt->input->cur - ctxt->input->base; - xmlBufferShrink(ctxt->input->buf->buffer, processed); - ctxt->input->buf->raw = ctxt->input->buf->buffer; - ctxt->input->buf->buffer = xmlBufferCreate(); - - if (ctxt->html) { - /* - * convert as much as possible of the buffer - */ - nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, - ctxt->input->buf->buffer, - ctxt->input->buf->raw); - } else { - /* - * convert just enough to get - * '' - * parsed with the autodetected encoding - * into the parser reading buffer. - */ - nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder, - ctxt->input->buf->buffer, - ctxt->input->buf->raw); - } - if (nbchars < 0) { - xmlErrInternal(ctxt, - "xmlSwitchToEncoding: encoder error\n", - NULL); - return(-1); - } - ctxt->input->base = - ctxt->input->cur = ctxt->input->buf->buffer->content; - ctxt->input->end = - &ctxt->input->base[ctxt->input->buf->buffer->use]; - - } - return(0); - } else { - if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) { - /* - * When parsing a static memory array one must know the - * size to be able to convert the buffer. - */ - xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", - NULL); - return(-1); - } else { - int processed; - - /* - * Shrink the current input buffer. - * Move it as the raw buffer and create a new input buffer - */ - processed = ctxt->input->cur - ctxt->input->base; - - ctxt->input->buf->raw = xmlBufferCreate(); - xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur, - ctxt->input->length - processed); - ctxt->input->buf->buffer = xmlBufferCreate(); - - /* - * convert as much as possible of the raw input - * to the parser reading buffer. - */ - nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, - ctxt->input->buf->buffer, - ctxt->input->buf->raw); - if (nbchars < 0) { - xmlErrInternal(ctxt, - "xmlSwitchToEncoding: encoder error\n", - NULL); - return(-1); - } - - /* - * Conversion succeeded, get rid of the old buffer - */ - if ((ctxt->input->free != NULL) && - (ctxt->input->base != NULL)) - ctxt->input->free((xmlChar *) ctxt->input->base); - ctxt->input->base = - ctxt->input->cur = ctxt->input->buf->buffer->content; - ctxt->input->end = - &ctxt->input->base[ctxt->input->buf->buffer->use]; - } - } + xmlSwitchInputEncoding(ctxt, ctxt->input, handler); } else { xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", NULL); @@ -1226,7 +1239,6 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) } else return(-1); return(0); - } /************************************************************************ @@ -1417,7 +1429,7 @@ xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { * @ctxt: an XML parser context * @filename: the filename to use as entity * - * Create a new input stream based on a file. + * Create a new input stream based on a file or an URL. * * Returns the new input stream or NULL in case of error */ @@ -1436,20 +1448,25 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { if (buf == NULL) return(NULL); - URI = xmlStrdup((xmlChar *) filename); - directory = xmlParserGetDirectory((const char *) URI); - inputStream = xmlNewInputStream(ctxt); if (inputStream == NULL) { if (directory != NULL) xmlFree((char *) directory); if (URI != NULL) xmlFree((char *) URI); return(NULL); } - + inputStream->buf = buf; + inputStream = xmlCheckHTTPInput(ctxt, inputStream); + if (inputStream == NULL) + return(NULL); + + if (inputStream->filename == NULL) + URI = xmlStrdup((xmlChar *) filename); + else + URI = xmlStrdup((xmlChar *) inputStream->filename); + directory = xmlParserGetDirectory((const char *) URI); inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); if (URI != NULL) xmlFree((char *) URI); inputStream->directory = directory; - inputStream->buf = buf; inputStream->base = inputStream->buf->buffer->content; inputStream->cur = inputStream->buf->buffer->content; diff --git a/xmlIO.c b/xmlIO.c index 8014f863..ce0c2646 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -2961,6 +2961,80 @@ xmlParserGetDirectory(const char *filename) { * * ****************************************************************/ +/** + * xmlCheckHTTPInput: + * @ctxt: an XML parser context + * @ret: an XML parser input + * + * Check an input in case it was created from an HTTP stream, in that + * case it will handle encoding and update of the base URL in case of + * redirection. It also checks for HTTP errors in which case the input + * is cleanly freed up and an appropriate error is raised in context + * + * Returns the input or NULL in case of HTTP error. + */ +xmlParserInputPtr +xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) { +#ifdef LIBXML_HTTP_ENABLED + if ((ret != NULL) && (ret->buf != NULL) && + (ret->buf->readcallback == xmlIOHTTPRead) && + (ret->buf->context != NULL)) { + const char *encoding; + const char *redir; + const char *mime; + int code; + + code = xmlNanoHTTPReturnCode(ret->buf->context); + if (code >= 400) { + /* fatal error */ + if (ret->filename != NULL) + xmlLoaderErr(ctxt, "failed to load HTTP resource \"%s\"\n", + (const char *) ret->filename); + else + xmlLoaderErr(ctxt, "failed to load HTTP resource\n", NULL); + xmlFreeInputStream(ret); + ret = NULL; + } else { + + mime = xmlNanoHTTPMimeType(ret->buf->context); + if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) || + (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) { + encoding = xmlNanoHTTPEncoding(ret->buf->context); + if (encoding != NULL) { + xmlCharEncodingHandlerPtr handler; + + handler = xmlFindCharEncodingHandler(encoding); + if (handler != NULL) { + xmlSwitchInputEncoding(ctxt, ret, handler); + } else { + __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, + "Unknown encoding %s", + BAD_CAST encoding, NULL); + } + if (ret->encoding == NULL) + ret->encoding = xmlStrdup(BAD_CAST encoding); + } +#if 0 + } else if (xmlStrstr(BAD_CAST mime, BAD_CAST "html")) { +#endif + } + redir = xmlNanoHTTPRedir(ret->buf->context); + if (redir != NULL) { + if (ret->filename != NULL) + xmlFree((xmlChar *) ret->filename); + if (ret->directory != NULL) { + xmlFree((xmlChar *) ret->directory); + ret->directory = NULL; + } + ret->filename = + (char *) xmlStrdup((const xmlChar *) redir); + } + } + } +#endif + return(ret); +} + static int xmlSysIDExists(const char *URL) { #ifdef HAVE_STAT int ret; @@ -3001,19 +3075,20 @@ static int xmlSysIDExists(const char *URL) { * * Returns a new allocated xmlParserInputPtr, or NULL. */ -static -xmlParserInputPtr +static xmlParserInputPtr xmlDefaultExternalEntityLoader(const char *URL, const char *ID, - xmlParserCtxtPtr ctxt) { + xmlParserCtxtPtr ctxt) +{ xmlParserInputPtr ret = NULL; xmlChar *resource = NULL; + #ifdef LIBXML_CATALOG_ENABLED xmlCatalogAllow pref; #endif #ifdef DEBUG_EXTERNAL_ENTITIES xmlGenericError(xmlGenericErrorContext, - "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL); + "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL); #endif #ifdef LIBXML_CATALOG_ENABLED /* @@ -3023,87 +3098,71 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID, pref = xmlCatalogGetDefaults(); if ((pref != XML_CATA_ALLOW_NONE) && (!xmlSysIDExists(URL))) { - /* - * Do a local lookup - */ - if ((ctxt->catalogs != NULL) && - ((pref == XML_CATA_ALLOW_ALL) || - (pref == XML_CATA_ALLOW_DOCUMENT))) { - resource = xmlCatalogLocalResolve(ctxt->catalogs, - (const xmlChar *)ID, - (const xmlChar *)URL); + /* + * Do a local lookup + */ + if ((ctxt->catalogs != NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_DOCUMENT))) { + resource = xmlCatalogLocalResolve(ctxt->catalogs, + (const xmlChar *) ID, + (const xmlChar *) URL); } - /* - * Try a global lookup - */ - if ((resource == NULL) && - ((pref == XML_CATA_ALLOW_ALL) || - (pref == XML_CATA_ALLOW_GLOBAL))) { - resource = xmlCatalogResolve((const xmlChar *)ID, - (const xmlChar *)URL); - } - if ((resource == NULL) && (URL != NULL)) - resource = xmlStrdup((const xmlChar *) URL); + /* + * Try a global lookup + */ + if ((resource == NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_GLOBAL))) { + resource = xmlCatalogResolve((const xmlChar *) ID, + (const xmlChar *) URL); + } + if ((resource == NULL) && (URL != NULL)) + resource = xmlStrdup((const xmlChar *) URL); - /* - * TODO: do an URI lookup on the reference - */ - if ((resource != NULL) && (!xmlSysIDExists((const char *)resource))) { - xmlChar *tmp = NULL; + /* + * TODO: do an URI lookup on the reference + */ + if ((resource != NULL) + && (!xmlSysIDExists((const char *) resource))) { + xmlChar *tmp = NULL; - if ((ctxt->catalogs != NULL) && - ((pref == XML_CATA_ALLOW_ALL) || - (pref == XML_CATA_ALLOW_DOCUMENT))) { - tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource); - } - if ((tmp == NULL) && - ((pref == XML_CATA_ALLOW_ALL) || - (pref == XML_CATA_ALLOW_GLOBAL))) { - tmp = xmlCatalogResolveURI(resource); - } + if ((ctxt->catalogs != NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_DOCUMENT))) { + tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource); + } + if ((tmp == NULL) && + ((pref == XML_CATA_ALLOW_ALL) || + (pref == XML_CATA_ALLOW_GLOBAL))) { + tmp = xmlCatalogResolveURI(resource); + } - if (tmp != NULL) { - xmlFree(resource); - resource = tmp; - } - } + if (tmp != NULL) { + xmlFree(resource); + resource = tmp; + } + } } #endif if (resource == NULL) - resource = (xmlChar *) URL; + resource = (xmlChar *) URL; if (resource == NULL) { - if (ID == NULL) - ID = "NULL"; - xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", ID); - return(NULL); + if (ID == NULL) + ID = "NULL"; + xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", ID); + return (NULL); } - ret = xmlNewInputFromFile(ctxt, (const char *)resource); + ret = xmlNewInputFromFile(ctxt, (const char *) resource); if (ret == NULL) { - xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", - (const char *) resource); - return(NULL); - } - if ((ret->buf != NULL) && (ret->buf->readcallback == xmlIOHTTPRead)) { - const char *encoding; - const char *redir; - - encoding = xmlNanoHTTPEncoding(ret->buf->context); - redir = xmlNanoHTTPRedir(ret->buf->context); - if (redir != NULL) { - if (ret->filename != NULL) - xmlFree((xmlChar *) ret->filename); - if (ret->directory != NULL) { - xmlFree((xmlChar *) ret->directory); - ret->directory = NULL; - } - ret->filename = (char *) xmlStrdup((const xmlChar *)redir); - } + xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", + (const char *) resource); } if ((resource != NULL) && (resource != (xmlChar *) URL)) - xmlFree(resource); - return(ret); + xmlFree(resource); + return (ret); } static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =