1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-16 07:21:58 +03:00

more cleanup of the HTML parser to force it to not bypass SAX, Daniel.

Ready for 2.1.1 it seems
This commit is contained in:
Daniel Veillard
2000-06-30 18:39:56 +00:00
parent 3f6f7f64ce
commit d83eb8212e
8 changed files with 47 additions and 26 deletions

View File

@ -618,7 +618,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *new) {
*/
void
htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
if (!strcmp(new, "html"))
if (!xmlStrcmp(new, BAD_CAST"html"))
return;
if (ctxt->nameNr <= 0) {
#ifdef DEBUG
@ -628,12 +628,15 @@ htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
}
if ((!strcmp(new, "body")) || (!strcmp(new, "head")))
if ((!xmlStrcmp(new, BAD_CAST"body")) || (!xmlStrcmp(new, BAD_CAST"head")))
return;
if (ctxt->nameNr <= 1) {
if ((!strcmp(new, "script")) || (!strcmp(new, "style")) ||
(!strcmp(new, "meta")) || (!strcmp(new, "link")) ||
(!strcmp(new, "title")) || (!strcmp(new, "base"))) {
if ((!xmlStrcmp(new, BAD_CAST"script")) ||
(!xmlStrcmp(new, BAD_CAST"style")) ||
(!xmlStrcmp(new, BAD_CAST"meta")) ||
(!xmlStrcmp(new, BAD_CAST"link")) ||
(!xmlStrcmp(new, BAD_CAST"title")) ||
(!xmlStrcmp(new, BAD_CAST"base"))) {
/*
* dropped OBJECT ... i you put it first BODY will be
* assumed !
@ -2152,17 +2155,15 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
ctxt->wellFormed = 0;
/* We shouldn't try to resynchronize ... */
} else {
}
NEXT;
/*
* Create the document accordingly to the DOCTYPE
* Create or update the document accordingly to the DOCTYPE
*/
if (ctxt->myDoc != NULL)
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = htmlNewDoc(URI, ExternalID);
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
(!ctxt->disableSAX))
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
/*
* Cleanup, since we don't use all those identifiers
@ -2845,13 +2846,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
}
SKIP_BLANKS;
/*
* Create the document if not done already.
*/
if (ctxt->myDoc == NULL) {
ctxt->myDoc = htmlNewDoc(NULL, NULL);
}
/*
* Time to start parsing the tree itself
*/
@ -3171,6 +3165,10 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData,
&xmlDefaultSAXLocator);
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
(!ctxt->disableSAX))
ctxt->sax->startDocument(ctxt->userData);
cur = in->cur[0];
next = in->cur[1];
if ((cur == '<') && (next == '!') &&
@ -3190,7 +3188,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "HPP: entering PROLOG\n");
#endif
} else {
ctxt->myDoc = htmlNewDoc(NULL, NULL);
ctxt->instate = XML_PARSER_MISC;
}
#ifdef DEBUG_PUSH