1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-28 00:21:53 +03:00

more cleanup of the HTML parser to force it to not bypass SAX, Daniel.

Ready for 2.1.1 it seems
This commit is contained in:
Daniel Veillard
2000-06-30 18:39:56 +00:00
parent 3f6f7f64ce
commit d83eb8212e
8 changed files with 47 additions and 26 deletions

View File

@ -1,3 +1,8 @@
Fri Jun 30 20:29:08 MEST 2000
* HTMLparser.c HTMLtree.c SAX.c valid.c tree.h : more cleanup
of the HTML parser to force it to not bypass SAX
Fri Jun 30 11:19:59 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* win32config.h.in: updated

View File

@ -618,7 +618,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *new) {
*/
void
htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
if (!strcmp(new, "html"))
if (!xmlStrcmp(new, BAD_CAST"html"))
return;
if (ctxt->nameNr <= 0) {
#ifdef DEBUG
@ -628,12 +628,15 @@ htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
}
if ((!strcmp(new, "body")) || (!strcmp(new, "head")))
if ((!xmlStrcmp(new, BAD_CAST"body")) || (!xmlStrcmp(new, BAD_CAST"head")))
return;
if (ctxt->nameNr <= 1) {
if ((!strcmp(new, "script")) || (!strcmp(new, "style")) ||
(!strcmp(new, "meta")) || (!strcmp(new, "link")) ||
(!strcmp(new, "title")) || (!strcmp(new, "base"))) {
if ((!xmlStrcmp(new, BAD_CAST"script")) ||
(!xmlStrcmp(new, BAD_CAST"style")) ||
(!xmlStrcmp(new, BAD_CAST"meta")) ||
(!xmlStrcmp(new, BAD_CAST"link")) ||
(!xmlStrcmp(new, BAD_CAST"title")) ||
(!xmlStrcmp(new, BAD_CAST"base"))) {
/*
* dropped OBJECT ... i you put it first BODY will be
* assumed !
@ -2152,17 +2155,15 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
ctxt->wellFormed = 0;
/* We shouldn't try to resynchronize ... */
} else {
}
NEXT;
/*
* Create the document accordingly to the DOCTYPE
* Create or update the document accordingly to the DOCTYPE
*/
if (ctxt->myDoc != NULL)
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = htmlNewDoc(URI, ExternalID);
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
(!ctxt->disableSAX))
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
/*
* Cleanup, since we don't use all those identifiers
@ -2845,13 +2846,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
}
SKIP_BLANKS;
/*
* Create the document if not done already.
*/
if (ctxt->myDoc == NULL) {
ctxt->myDoc = htmlNewDoc(NULL, NULL);
}
/*
* Time to start parsing the tree itself
*/
@ -3171,6 +3165,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData,
&xmlDefaultSAXLocator);
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
(!ctxt->disableSAX))
ctxt->sax->startDocument(ctxt->userData);
cur = in->cur[0];
next = in->cur[1];
if ((cur == '<') && (next == '!') &&
@ -3190,7 +3188,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "HPP: entering PROLOG\n");
#endif
} else {
ctxt->myDoc = htmlNewDoc(NULL, NULL);
ctxt->instate = XML_PARSER_MISC;
}
#ifdef DEBUG_PUSH

View File

@ -158,6 +158,8 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
/*
* Special cases.
*/
if (cur->type == XML_DTD_NODE)
return;
if (cur->type == XML_HTML_DOCUMENT_NODE) {
htmlDocContentDump(buf, (xmlDocPtr) cur);
return;

16
SAX.c
View File

@ -25,6 +25,7 @@
#include <libxml/xmlIO.h>
#include <libxml/SAX.h>
#include <libxml/uri.h>
#include <libxml/HTMLtree.h>
/* #define DEBUG_SAX */
/* #define DEBUG_SAX_TREE */
@ -157,10 +158,21 @@ internalSubset(void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlDtdPtr dtd;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.internalSubset(%s, %s, %s)\n",
name, ExternalID, SystemID);
#endif
if (ctxt->myDoc == NULL)
return;
dtd = xmlGetIntSubset(ctxt->myDoc);
if (dtd != NULL) {
xmlUnlinkNode((xmlNodePtr) dtd);
xmlFreeDtd(dtd);
ctxt->myDoc->intSubset = NULL;
}
ctxt->myDoc->intSubset =
xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
}
@ -1485,7 +1497,7 @@ xmlDefaultSAXHandlerInit(void)
* Default handler for HTML, builds the DOM tree
*/
xmlSAXHandler htmlDefaultSAXHandler = {
NULL,
internalSubset,
NULL,
NULL,
NULL,
@ -1522,7 +1534,7 @@ xmlSAXHandler htmlDefaultSAXHandler = {
void
htmlDefaultSAXHandlerInit(void)
{
htmlDefaultSAXHandler.internalSubset = NULL;
htmlDefaultSAXHandler.internalSubset = internalSubset;
htmlDefaultSAXHandler.externalSubset = NULL;
htmlDefaultSAXHandler.isStandalone = NULL;
htmlDefaultSAXHandler.hasInternalSubset = NULL;

View File

@ -118,7 +118,7 @@ following:</p>
<strong>LIBXML_TEST_VERSION</strong> is a fine place).</li>
</ol>
<p>Following those 3 steps should work. It worked for some of my own code.</p>
<p>Following those steps should work. It worked for some of my own code.</p>
<p>Let me put some emphasis on the fact that there is far more changes from
libxml 1.x to 2.x than the ones you may have to patch for. The overall code
@ -128,6 +128,6 @@ upgrade, it may cost a lot on the long term ...</p>
<p><a href="mailto:Daniel.Veillard@w3.org">Daniel Veillard</a></p>
<p>$Id: upgrade.html,v 1.5 2000/05/06 08:11:18 veillard Exp $</p>
<p>$Id: upgrade.html,v 1.6 2000/06/29 00:43:26 veillard Exp $</p>
</body>
</html>

View File

@ -414,6 +414,7 @@ xmlDtdPtr xmlNewDtd (xmlDocPtr doc,
const xmlChar *name,
const xmlChar *ExternalID,
const xmlChar *SystemID);
xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc);
void xmlFreeDtd (xmlDtdPtr cur);
xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc,
const xmlChar *href,

1
tree.h
View File

@ -414,6 +414,7 @@ xmlDtdPtr xmlNewDtd (xmlDocPtr doc,
const xmlChar *name,
const xmlChar *ExternalID,
const xmlChar *SystemID);
xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc);
void xmlFreeDtd (xmlDtdPtr cur);
xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc,
const xmlChar *href,

View File

@ -2031,6 +2031,9 @@ xmlIsRef(xmlDocPtr doc, xmlNodePtr elem, xmlAttrPtr attr) {
((attr->name[1] == 'D') || (attr->name[1] == 'd')) &&
(attr->name[2] == 0)) return(1);
*******************/
} else if (doc->type == XML_HTML_DOCUMENT_NODE) {
/* TODO @@@ */
return(0);
} else {
xmlAttributePtr attrDecl;