diff --git a/ChangeLog b/ChangeLog index 79d16fef..c47bdb04 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Fri Jun 30 20:29:08 MEST 2000 + + * HTMLparser.c HTMLtree.c SAX.c valid.c tree.h : more cleanup + of the HTML parser to force it to not bypass SAX + Fri Jun 30 11:19:59 CEST 2000 Daniel Veillard * win32config.h.in: updated diff --git a/HTMLparser.c b/HTMLparser.c index 472d2cf6..375a038e 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -618,7 +618,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *new) { */ void htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) { - if (!strcmp(new, "html")) + if (!xmlStrcmp(new, BAD_CAST"html")) return; if (ctxt->nameNr <= 0) { #ifdef DEBUG @@ -628,12 +628,15 @@ htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) { if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); } - if ((!strcmp(new, "body")) || (!strcmp(new, "head"))) + if ((!xmlStrcmp(new, BAD_CAST"body")) || (!xmlStrcmp(new, BAD_CAST"head"))) return; if (ctxt->nameNr <= 1) { - if ((!strcmp(new, "script")) || (!strcmp(new, "style")) || - (!strcmp(new, "meta")) || (!strcmp(new, "link")) || - (!strcmp(new, "title")) || (!strcmp(new, "base"))) { + if ((!xmlStrcmp(new, BAD_CAST"script")) || + (!xmlStrcmp(new, BAD_CAST"style")) || + (!xmlStrcmp(new, BAD_CAST"meta")) || + (!xmlStrcmp(new, BAD_CAST"link")) || + (!xmlStrcmp(new, BAD_CAST"title")) || + (!xmlStrcmp(new, BAD_CAST"base"))) { /* * dropped OBJECT ... i you put it first BODY will be * assumed ! @@ -2152,17 +2155,15 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); ctxt->wellFormed = 0; /* We shouldn't try to resynchronize ... */ - } else { } NEXT; /* - * Create the document accordingly to the DOCTYPE + * Create or update the document accordingly to the DOCTYPE */ - if (ctxt->myDoc != NULL) - xmlFreeDoc(ctxt->myDoc); - - ctxt->myDoc = htmlNewDoc(URI, ExternalID); + if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); /* * Cleanup, since we don't use all those identifiers @@ -2845,13 +2846,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { } SKIP_BLANKS; - /* - * Create the document if not done already. - */ - if (ctxt->myDoc == NULL) { - ctxt->myDoc = htmlNewDoc(NULL, NULL); - } - /* * Time to start parsing the tree itself */ @@ -3171,6 +3165,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator); + if ((ctxt->sax) && (ctxt->sax->startDocument) && + (!ctxt->disableSAX)) + ctxt->sax->startDocument(ctxt->userData); + cur = in->cur[0]; next = in->cur[1]; if ((cur == '<') && (next == '!') && @@ -3190,7 +3188,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { fprintf(stderr, "HPP: entering PROLOG\n"); #endif } else { - ctxt->myDoc = htmlNewDoc(NULL, NULL); ctxt->instate = XML_PARSER_MISC; } #ifdef DEBUG_PUSH diff --git a/HTMLtree.c b/HTMLtree.c index 24a90ba7..d981ec0e 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -158,6 +158,8 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { /* * Special cases. */ + if (cur->type == XML_DTD_NODE) + return; if (cur->type == XML_HTML_DOCUMENT_NODE) { htmlDocContentDump(buf, (xmlDocPtr) cur); return; diff --git a/SAX.c b/SAX.c index 68e2d316..c352a044 100644 --- a/SAX.c +++ b/SAX.c @@ -25,6 +25,7 @@ #include #include #include +#include /* #define DEBUG_SAX */ /* #define DEBUG_SAX_TREE */ @@ -157,11 +158,22 @@ internalSubset(void *ctx, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID) { xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlDtdPtr dtd; #ifdef DEBUG_SAX fprintf(stderr, "SAX.internalSubset(%s, %s, %s)\n", name, ExternalID, SystemID); #endif - xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID); + + if (ctxt->myDoc == NULL) + return; + dtd = xmlGetIntSubset(ctxt->myDoc); + if (dtd != NULL) { + xmlUnlinkNode((xmlNodePtr) dtd); + xmlFreeDtd(dtd); + ctxt->myDoc->intSubset = NULL; + } + ctxt->myDoc->intSubset = + xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID); } /** @@ -1485,7 +1497,7 @@ xmlDefaultSAXHandlerInit(void) * Default handler for HTML, builds the DOM tree */ xmlSAXHandler htmlDefaultSAXHandler = { - NULL, + internalSubset, NULL, NULL, NULL, @@ -1522,7 +1534,7 @@ xmlSAXHandler htmlDefaultSAXHandler = { void htmlDefaultSAXHandlerInit(void) { - htmlDefaultSAXHandler.internalSubset = NULL; + htmlDefaultSAXHandler.internalSubset = internalSubset; htmlDefaultSAXHandler.externalSubset = NULL; htmlDefaultSAXHandler.isStandalone = NULL; htmlDefaultSAXHandler.hasInternalSubset = NULL; diff --git a/doc/upgrade.html b/doc/upgrade.html index e7013ba1..50aaa6a4 100644 --- a/doc/upgrade.html +++ b/doc/upgrade.html @@ -48,7 +48,7 @@ mail:

Use xmlDocGetRootElement(doc) to get the root element of a document. Alternatively if you are sure to not reference Dtds nor have PIs or comments before or after the root element s/->root/->children/g - will probably do it. + will probably do it.
  • The white space issue, this one is more complex, unless special case of validating parsing, the line breaks and spaces usually used for indenting and formatting the document content becomes significant. So they are @@ -90,7 +90,7 @@ They offers the following:

    #include<libxml/...> in both cases.
  • similar identifiers defined via macros for the child and root fields: respectively xmlChildrenNode and - xmlRootNode
  • + xmlRootNode
  • a new macro LIBXML_TEST_VERSION which should be inserted once in the client code
  • @@ -118,7 +118,7 @@ following:

    LIBXML_TEST_VERSION is a fine place). -

    Following those 3 steps should work. It worked for some of my own code.

    +

    Following those steps should work. It worked for some of my own code.

    Let me put some emphasis on the fact that there is far more changes from libxml 1.x to 2.x than the ones you may have to patch for. The overall code @@ -128,6 +128,6 @@ upgrade, it may cost a lot on the long term ...

    Daniel Veillard

    -

    $Id: upgrade.html,v 1.5 2000/05/06 08:11:18 veillard Exp $

    +

    $Id: upgrade.html,v 1.6 2000/06/29 00:43:26 veillard Exp $

    diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 9c5b2805..6c68dc31 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -414,6 +414,7 @@ xmlDtdPtr xmlNewDtd (xmlDocPtr doc, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID); +xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc); void xmlFreeDtd (xmlDtdPtr cur); xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc, const xmlChar *href, diff --git a/tree.h b/tree.h index 9c5b2805..6c68dc31 100644 --- a/tree.h +++ b/tree.h @@ -414,6 +414,7 @@ xmlDtdPtr xmlNewDtd (xmlDocPtr doc, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID); +xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc); void xmlFreeDtd (xmlDtdPtr cur); xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc, const xmlChar *href, diff --git a/valid.c b/valid.c index 5dc37edf..52359913 100644 --- a/valid.c +++ b/valid.c @@ -2031,6 +2031,9 @@ xmlIsRef(xmlDocPtr doc, xmlNodePtr elem, xmlAttrPtr attr) { ((attr->name[1] == 'D') || (attr->name[1] == 'd')) && (attr->name[2] == 0)) return(1); *******************/ + } else if (doc->type == XML_HTML_DOCUMENT_NODE) { + /* TODO @@@ */ + return(0); } else { xmlAttributePtr attrDecl;