diff --git a/ChangeLog b/ChangeLog index 1409f252..be2cd7a5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Tue Oct 10 22:02:29 CEST 2006 Daniel Veillard + + * include/libxml/entities.h entities.c SAX2.c parser.c: trying to + fix entities behaviour when using SAX, had to extend entities + content and hack on the entities processing code, but that should + fix the long standing bug #159219 + Tue Oct 10 14:36:18 CEST 2006 Daniel Veillard * uri.c include/libxml/uri.h: add a new function xmlPathToUri() diff --git a/SAX2.c b/SAX2.c index 1c5bb89e..7d4ab64a 100644 --- a/SAX2.c +++ b/SAX2.c @@ -580,6 +580,7 @@ xmlSAX2GetEntity(void *ctx, const xmlChar *name) return(NULL); } ret->owner = 1; + ret->checked = 1; } return(ret); } diff --git a/entities.c b/entities.c index 6a0e38d7..91a39780 100644 --- a/entities.c +++ b/entities.c @@ -31,35 +31,35 @@ static xmlEntity xmlEntityLt = { NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST "<", BAD_CAST "<", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0 + NULL, NULL, NULL, NULL, 0, 1 }; static xmlEntity xmlEntityGt = { NULL, XML_ENTITY_DECL, BAD_CAST "gt", NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST ">", BAD_CAST ">", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0 + NULL, NULL, NULL, NULL, 0, 1 }; static xmlEntity xmlEntityAmp = { NULL, XML_ENTITY_DECL, BAD_CAST "amp", NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST "&", BAD_CAST "&", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0 + NULL, NULL, NULL, NULL, 0, 1 }; static xmlEntity xmlEntityQuot = { NULL, XML_ENTITY_DECL, BAD_CAST "quot", NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST "\"", BAD_CAST "\"", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0 + NULL, NULL, NULL, NULL, 0, 1 }; static xmlEntity xmlEntityApos = { NULL, XML_ENTITY_DECL, BAD_CAST "apos", NULL, NULL, NULL, NULL, NULL, NULL, BAD_CAST "'", BAD_CAST "'", 1, XML_INTERNAL_PREDEFINED_ENTITY, - NULL, NULL, NULL, NULL, 0 + NULL, NULL, NULL, NULL, 0, 1 }; /** @@ -182,6 +182,7 @@ xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type, } memset(ret, 0, sizeof(xmlEntity)); ret->type = XML_ENTITY_DECL; + ret->checked = 0; /* * fill the structure. diff --git a/include/libxml/entities.h b/include/libxml/entities.h index 0bb28a0b..fdd72225 100644 --- a/include/libxml/entities.h +++ b/include/libxml/entities.h @@ -56,6 +56,7 @@ struct _xmlEntity { struct _xmlEntity *nexte; /* unused */ const xmlChar *URI; /* the full URI as computed */ int owner; /* does the entity own the childrens */ + int checked; /* was the entity content checked */ }; /* diff --git a/parser.c b/parser.c index 7bc5644d..90d608a1 100644 --- a/parser.c +++ b/parser.c @@ -5929,10 +5929,13 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { ctxt->sax->characters(ctxt->userData, out, i); } } else { + int was_checked; + ent = xmlParseEntityRef(ctxt); if (ent == NULL) return; if (!ctxt->wellFormed) return; + was_checked = ent->checked; if ((ent->name != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) { xmlNodePtr list = NULL; @@ -5944,8 +5947,9 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * where the ent->children is filled with the result from * the parsing. */ - if (ent->children == NULL) { + if (ent->checked == 0) { xmlChar *value; + value = ent->content; /* @@ -6080,15 +6084,69 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { list = NULL; } } + ent->checked = 1; + } + + if (ent->children == NULL) { + /* + * Probably running in SAX mode and the callbacks don't + * build the entity content. So unless we already went + * though parsing for first checking go though the entity + * content to generate callbacks associated to the entity + */ + if (was_checked == 1) { + void *user_data; + /* + * This is a bit hackish but this seems the best + * way to make sure both SAX and DOM entity support + * behaves okay. + */ + if (ctxt->userData == ctxt) + user_data = NULL; + else + user_data = ctxt->userData; + + if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) { + ctxt->depth++; + ret = xmlParseBalancedChunkMemoryInternal(ctxt, + ent->content, user_data, NULL); + ctxt->depth--; + } else if (ent->etype == + XML_EXTERNAL_GENERAL_PARSED_ENTITY) { + ctxt->depth++; + ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, + ctxt->sax, user_data, ctxt->depth, + ent->URI, ent->ExternalID, NULL); + ctxt->depth--; + } else { + ret = XML_ERR_ENTITY_PE_INTERNAL; + xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, + "invalid entity type found\n", NULL); + } + if (ret == XML_ERR_ENTITY_LOOP) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + return; + } + } + if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + /* + * Entity reference callback comes second, it's somewhat + * superfluous but a compatibility to historical behaviour + */ + ctxt->sax->reference(ctxt->userData, ent->name); + } + return; } if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && - (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { + (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { /* * Create a node. */ ctxt->sax->reference(ctxt->userData, ent->name); return; - } else if (ctxt->replaceEntities) { + } + if ((ctxt->replaceEntities) || (ent->children == NULL)) { /* * There is a problem on the handling of _private for entities * (bug 155816): Should we copy the content of the field from @@ -6212,31 +6270,6 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { ctxt->nodemem = 0; ctxt->nodelen = 0; return; - } else { - /* - * Probably running in SAX mode - */ - xmlParserInputPtr input; - - input = xmlNewEntityInputStream(ctxt, ent); - xmlPushInput(ctxt, input); - if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && - (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && - (IS_BLANK_CH(NXT(5)))) { - xmlParseTextDecl(ctxt); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { - /* - * The XML REC instructs us to stop parsing right here - */ - ctxt->instate = XML_PARSER_EOF; - return; - } - if (input->standalone == 1) { - xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE, - NULL); - } - } - return; } } } else {