diff --git a/ChangeLog b/ChangeLog
index ff457c99..11a0ec6c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Sun Oct 15 01:34:37 CEST 2000 Daniel Veillard
+
+ * HTMLparser.c HTMLtree.[ch] SAX.c testHTML.c tree.c: fixed HTML
+ support for SCRIPT and STYLE with help from Bjorn Reese
+ * test/HTML/* result/HTML/*: added simple testcase and updated
+ the existing ones.
+
Fri Oct 13 18:24:31 CEST 2000 Daniel Veillard
* xpath.c xpointer.c: XPointer reorder of ranges start/end and
diff --git a/HTMLparser.c b/HTMLparser.c
index 40a15a46..617b903f 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -559,7 +559,6 @@ static char *htmlNoContentElements[] = {
NULL
};
-
static char** htmlStartCloseIndex[100];
static int htmlStartCloseIndexinitialized = 0;
@@ -1863,7 +1862,7 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
/*
* allocate a translation buffer.
*/
- buffer_size = HTML_PARSER_BIG_BUFFER_SIZE;
+ buffer_size = HTML_PARSER_BUFFER_SIZE;
buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
if (buffer == NULL) {
perror("htmlParseHTMLAttribute: malloc failed");
@@ -2209,6 +2208,71 @@ htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
return(ret);
}
+/**
+ * htmlParseScript:
+ * @ctxt: an HTML parser context
+ *
+ * parse the content of an HTML SCRIPT or STYLE element
+ * http://www.w3.org/TR/html4/sgml/dtd.html#Script
+ * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
+ * http://www.w3.org/TR/html4/types.html#type-script
+ * http://www.w3.org/TR/html4/types.html#h-6.15
+ * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
+ *
+ * Script data ( %Script; in the DTD) can be the content of the SCRIPT
+ * element and the value of intrinsic event attributes. User agents must
+ * not evaluate script data as HTML markup but instead must pass it on as
+ * data to a script engine.
+ * NOTES:
+ * - The content is passed like CDATA
+ * - the attributes for style and scripting "onXXX" are also described
+ * as CDATA but SGML allows entities references in attributes so their
+ * processing is identical as other attributes
+ */
+void
+htmlParseScript(htmlParserCtxtPtr ctxt) {
+ xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 1];
+ int nbchar = 0;
+ xmlChar cur;
+
+ SHRINK;
+ cur = CUR;
+ while (IS_CHAR(cur)) {
+ if ((cur == '<') && (NXT(1) == '/')) {
+ /*
+ * One should break here, the specification is clear:
+ * Authors should therefore escape "" within the content.
+ * Escape mechanisms are specific to each scripting or
+ * style sheet language.
+ */
+ if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) ||
+ ((NXT(2) >= 'a') && (NXT(2) <= 'z')))
+ break; /* while */
+ }
+ buf[nbchar++] = cur;
+ if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
+ if (ctxt->sax->cdataBlock!= NULL) {
+ /*
+ * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
+ */
+ ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
+ }
+ nbchar = 0;
+ }
+ NEXT;
+ cur = CUR;
+ }
+ if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
+ if (ctxt->sax->cdataBlock!= NULL) {
+ /*
+ * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
+ */
+ ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
+ }
+ }
+}
+
+
/**
* htmlParseCharData:
* @ctxt: an HTML parser context
@@ -3112,68 +3176,75 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
return;
}
- /*
- * Sometimes DOCTYPE arrives in the middle of the document
- */
- if ((CUR == '<') && (NXT(1) == '!') &&
- (UPP(2) == 'D') && (UPP(3) == 'O') &&
- (UPP(4) == 'C') && (UPP(5) == 'T') &&
- (UPP(6) == 'Y') && (UPP(7) == 'P') &&
- (UPP(8) == 'E')) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Misplaced DOCTYPE declaration\n");
- ctxt->wellFormed = 0;
- htmlParseDocTypeDecl(ctxt);
- }
-
- /*
- * First case : a comment
- */
- if ((CUR == '<') && (NXT(1) == '!') &&
- (NXT(2) == '-') && (NXT(3) == '-')) {
- htmlParseComment(ctxt);
- }
-
- /*
- * Second case : a sub-element.
- */
- else if (CUR == '<') {
- htmlParseElement(ctxt);
- }
-
- /*
- * Third case : a reference. If if has not been resolved,
- * parsing returns it's Name, create the node
- */
- else if (CUR == '&') {
- htmlParseReference(ctxt);
- }
-
- /*
- * Fourth : end of the resource
- */
- else if (CUR == 0) {
- htmlAutoClose(ctxt, NULL);
- }
-
- /*
- * Last case, text. Note that References are handled directly.
- */
- else {
- htmlParseCharData(ctxt, 0);
- }
-
- if (cons == ctxt->nbChars) {
- if (ctxt->node != NULL) {
+ if ((xmlStrEqual(currentNode, BAD_CAST"script")) ||
+ (xmlStrEqual(currentNode, BAD_CAST"style"))) {
+ /*
+ * Handle SCRIPT/STYLE separately
+ */
+ htmlParseScript(ctxt);
+ } else {
+ /*
+ * Sometimes DOCTYPE arrives in the middle of the document
+ */
+ if ((CUR == '<') && (NXT(1) == '!') &&
+ (UPP(2) == 'D') && (UPP(3) == 'O') &&
+ (UPP(4) == 'C') && (UPP(5) == 'T') &&
+ (UPP(6) == 'Y') && (UPP(7) == 'P') &&
+ (UPP(8) == 'E')) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
- "detected an error in element content\n");
+ "Misplaced DOCTYPE declaration\n");
ctxt->wellFormed = 0;
+ htmlParseDocTypeDecl(ctxt);
}
- break;
- }
+ /*
+ * First case : a comment
+ */
+ if ((CUR == '<') && (NXT(1) == '!') &&
+ (NXT(2) == '-') && (NXT(3) == '-')) {
+ htmlParseComment(ctxt);
+ }
+
+ /*
+ * Second case : a sub-element.
+ */
+ else if (CUR == '<') {
+ htmlParseElement(ctxt);
+ }
+
+ /*
+ * Third case : a reference. If if has not been resolved,
+ * parsing returns it's Name, create the node
+ */
+ else if (CUR == '&') {
+ htmlParseReference(ctxt);
+ }
+
+ /*
+ * Fourth : end of the resource
+ */
+ else if (CUR == 0) {
+ htmlAutoClose(ctxt, NULL);
+ }
+
+ /*
+ * Last case, text. Note that References are handled directly.
+ */
+ else {
+ htmlParseCharData(ctxt, 0);
+ }
+
+ if (cons == ctxt->nbChars) {
+ if (ctxt->node != NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "detected an error in element content\n");
+ ctxt->wellFormed = 0;
+ }
+ break;
+ }
+ }
GROW;
}
if (currentNode != NULL) xmlFree(currentNode);
@@ -3739,6 +3810,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "HPP: try EPILOG\n");break;
case XML_PARSER_PI:
fprintf(stderr, "HPP: try PI\n");break;
+ case XML_PARSER_SYSTEM_LITERAL:
+ fprintf(stderr, "HPP: try SYSTEM_LITERAL\n");break;
}
#endif
@@ -4105,75 +4178,94 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
cur = in->cur[0];
next = in->cur[1];
cons = ctxt->nbChars;
- /*
- * Sometimes DOCTYPE arrives in the middle of the document
- */
- if ((cur == '<') && (next == '!') &&
- (UPP(2) == 'D') && (UPP(3) == 'O') &&
- (UPP(4) == 'C') && (UPP(5) == 'T') &&
- (UPP(6) == 'Y') && (UPP(7) == 'P') &&
- (UPP(8) == 'E')) {
- if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
- goto done;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Misplaced DOCTYPE declaration\n");
- ctxt->wellFormed = 0;
- htmlParseDocTypeDecl(ctxt);
- } else if ((cur == '<') && (next == '!') &&
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
- if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
- goto done;
-#ifdef DEBUG_PUSH
- fprintf(stderr, "HPP: Parsing Comment\n");
-#endif
- htmlParseComment(ctxt);
- ctxt->instate = XML_PARSER_CONTENT;
- } else if ((cur == '<') && (next == '!') && (avail < 4)) {
- goto done;
- } else if ((cur == '<') && (next == '/')) {
- ctxt->instate = XML_PARSER_END_TAG;
- ctxt->checkIndex = 0;
-#ifdef DEBUG_PUSH
- fprintf(stderr, "HPP: entering END_TAG\n");
-#endif
- break;
- } else if (cur == '<') {
- ctxt->instate = XML_PARSER_START_TAG;
- ctxt->checkIndex = 0;
-#ifdef DEBUG_PUSH
- fprintf(stderr, "HPP: entering START_TAG\n");
-#endif
- break;
- } else if (cur == '&') {
- if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
- goto done;
-#ifdef DEBUG_PUSH
- fprintf(stderr, "HPP: Parsing Reference\n");
-#endif
- /* TODO: check generation of subtrees if noent !!! */
- htmlParseReference(ctxt);
- } else {
- /* TODO Avoid the extra copy, handle directly !!!!!! */
+ if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
+ (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
/*
- * Goal of the following test is :
- * - minimize calls to the SAX 'character' callback
- * when they are mergeable
+ * Handle SCRIPT/STYLE separately
*/
- if ((ctxt->inputNr == 1) &&
- (avail < HTML_PARSER_BIG_BUFFER_SIZE)) {
- if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
- goto done;
- }
- ctxt->checkIndex = 0;
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '<', '/', 0) < 0))
+ goto done;
+ htmlParseScript(ctxt);
+ if ((cur == '<') && (next == '/')) {
+ ctxt->instate = XML_PARSER_END_TAG;
+ ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
- fprintf(stderr, "HPP: Parsing char data\n");
+ fprintf(stderr, "HPP: entering END_TAG\n");
#endif
- htmlParseCharData(ctxt, 0);
+ break;
+ }
+ } else {
+ /*
+ * Sometimes DOCTYPE arrives in the middle of the document
+ */
+ if ((cur == '<') && (next == '!') &&
+ (UPP(2) == 'D') && (UPP(3) == 'O') &&
+ (UPP(4) == 'C') && (UPP(5) == 'T') &&
+ (UPP(6) == 'Y') && (UPP(7) == 'P') &&
+ (UPP(8) == 'E')) {
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ goto done;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Misplaced DOCTYPE declaration\n");
+ ctxt->wellFormed = 0;
+ htmlParseDocTypeDecl(ctxt);
+ } else if ((cur == '<') && (next == '!') &&
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "HPP: Parsing Comment\n");
+#endif
+ htmlParseComment(ctxt);
+ ctxt->instate = XML_PARSER_CONTENT;
+ } else if ((cur == '<') && (next == '!') && (avail < 4)) {
+ goto done;
+ } else if ((cur == '<') && (next == '/')) {
+ ctxt->instate = XML_PARSER_END_TAG;
+ ctxt->checkIndex = 0;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "HPP: entering END_TAG\n");
+#endif
+ break;
+ } else if (cur == '<') {
+ ctxt->instate = XML_PARSER_START_TAG;
+ ctxt->checkIndex = 0;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "HPP: entering START_TAG\n");
+#endif
+ break;
+ } else if (cur == '&') {
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "HPP: Parsing Reference\n");
+#endif
+ /* TODO: check generation of subtrees if noent !!! */
+ htmlParseReference(ctxt);
+ } else {
+ /* TODO Avoid the extra copy, handle directly !!!!!! */
+ /*
+ * Goal of the following test is :
+ * - minimize calls to the SAX 'character' callback
+ * when they are mergeable
+ */
+ if ((ctxt->inputNr == 1) &&
+ (avail < HTML_PARSER_BIG_BUFFER_SIZE)) {
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
+ goto done;
+ }
+ ctxt->checkIndex = 0;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "HPP: Parsing char data\n");
+#endif
+ htmlParseCharData(ctxt, 0);
+ }
}
if (cons == ctxt->nbChars) {
if (ctxt->node != NULL) {
diff --git a/HTMLtree.c b/HTMLtree.c
index 9ce68971..c8e8a646 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -818,6 +818,16 @@ htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const
xmlOutputBufferWriteString(buf, ";");
return;
}
+ if (cur->type == HTML_PRESERVE_NODE) {
+ if (cur->content != NULL) {
+#ifndef XML_USE_BUFFER_CONTENT
+ xmlOutputBufferWriteString(buf, (const char *)cur->content);
+#else
+ xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
+#endif
+ }
+ return;
+ }
/*
* Get specific HTmL info for taht node.
diff --git a/HTMLtree.h b/HTMLtree.h
index 17043b78..2b5331c6 100644
--- a/HTMLtree.h
+++ b/HTMLtree.h
@@ -22,6 +22,7 @@ extern "C" {
#define HTML_TEXT_NODE XML_TEXT_NODE
#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
#define HTML_COMMENT_NODE XML_COMMENT_NODE
+#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE
htmlDocPtr htmlNewDoc (const xmlChar *URI,
const xmlChar *ExternalID);
diff --git a/SAX.c b/SAX.c
index c53a10f8..25a0191e 100644
--- a/SAX.c
+++ b/SAX.c
@@ -1600,7 +1600,7 @@ xmlSAXHandler htmlDefaultSAXHandler = {
xmlParserError,
xmlParserError,
getParameterEntity,
- NULL,
+ cdataBlock,
NULL,
};
@@ -1632,7 +1632,7 @@ htmlDefaultSAXHandlerInit(void)
htmlDefaultSAXHandler.endElement = endElement;
htmlDefaultSAXHandler.reference = NULL;
htmlDefaultSAXHandler.characters = characters;
- htmlDefaultSAXHandler.cdataBlock = NULL;
+ xmlDefaultSAXHandler.cdataBlock = cdataBlock;
htmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace;
htmlDefaultSAXHandler.processingInstruction = NULL;
htmlDefaultSAXHandler.comment = comment;
diff --git a/include/libxml/HTMLtree.h b/include/libxml/HTMLtree.h
index 17043b78..2b5331c6 100644
--- a/include/libxml/HTMLtree.h
+++ b/include/libxml/HTMLtree.h
@@ -22,6 +22,7 @@ extern "C" {
#define HTML_TEXT_NODE XML_TEXT_NODE
#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
#define HTML_COMMENT_NODE XML_COMMENT_NODE
+#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE
htmlDocPtr htmlNewDoc (const xmlChar *URI,
const xmlChar *ExternalID);
diff --git a/result/HTML/doc2.htm.err b/result/HTML/doc2.htm.err
index bf46ffad..d098b471 100644
--- a/result/HTML/doc2.htm.err
+++ b/result/HTML/doc2.htm.err
@@ -1,3 +1,3 @@
-./test/HTML/doc2.htm:10: error: Misplaced DOCTYPE declaration
+./test/HTML/doc2.htm:5: error: Misplaced DOCTYPE declaration
-
+
+
+
+
+
+
-
+ // -->
+
diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err
index 51266e19..5a57449d 100644
--- a/result/HTML/doc3.htm.err
+++ b/result/HTML/doc3.htm.err
@@ -1,93 +1,105 @@
-./test/HTML/doc3.htm:10: error: Misplaced DOCTYPE declaration
+./test/HTML/doc3.htm:5: error: Misplaced DOCTYPE declaration
^
-./test/HTML/doc3.htm:820: error: Unexpected end tag : noscript
+./test/HTML/doc3.htm:815: error: Unexpected end tag : noscript
^
-./test/HTML/doc3.htm:826: error: Opening and ending tag mismatch: form and center
+./test/HTML/doc3.htm:821: error: Opening and ending tag mismatch: form and center
Code:BP6-hd
^
-./test/HTML/doc3.htm:833: error: Opening and ending tag mismatch: center and table
+./test/HTML/doc3.htm:828: error: Opening and ending tag mismatch: center and table
Special Code:BP6-hd
^
-./test/HTML/doc3.htm:839: error: Unexpected end tag : p
+./test/HTML/doc3.htm:834: error: Unexpected end tag : p
width="100%">
^
-./test/HTML/doc3.htm:839: error: Unexpected end tag : center
+./test/HTML/doc3.htm:834: error: Unexpected end tag : center
width="100%">
^
-./test/HTML/doc3.htm:839: error: Unexpected end tag : tr
+./test/HTML/doc3.htm:834: error: Unexpected end tag : tr
width="100%">
^
-./test/HTML/doc3.htm:839: error: Unexpected end tag : tbody
+./test/HTML/doc3.htm:834: error: Unexpected end tag : tbody
width="100%">
^
-./test/HTML/doc3.htm:839: error: Unexpected end tag : table
+./test/HTML/doc3.htm:834: error: Unexpected end tag : table
width="100%">
^
-./test/HTML/doc3.htm:840: error: Unexpected end tag : td
+./test/HTML/doc3.htm:835: error: Unexpected end tag : td
^
diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax
index ff8f186a..0cd7df16 100644
--- a/result/HTML/doc3.htm.sax
+++ b/result/HTML/doc3.htm.sax
@@ -11,7 +11,7 @@ SAX.endElement(title)
SAX.ignorableWhitespace(
, 2)
SAX.startElement(script, language='javascript')
-SAX.characters(
+SAX.cdata(
NS_ActualOpen=wind, 199)
SAX.endElement(script)
SAX.ignorableWhitespace(
@@ -31,38 +31,15 @@ SAX.endElement(meta)
SAX.ignorableWhitespace(
, 2)
SAX.startElement(style, type='text/css')
-SAX.characters(A.nav {
+SAX.cdata(A.nav {
COLOR: #003399; TEXT, 115)
SAX.endElement(style)
SAX.ignorableWhitespace(
, 4)
SAX.startElement(script, language='JavaScript')
-SAX.characters(
-, 1)
-SAX.comment( Idea by: Nic Wolfe (Nic@TimelapseProductions.com) )
-SAX.characters(
-, 1)
-SAX.comment( Web URL: http://fineline.xs.mw )
-SAX.characters(
-
-, 2)
-SAX.comment( This script and many more are available free online at )
-SAX.characters(
-, 1)
-SAX.comment( The JavaScript Source!! http://javascript.internet.com )
-SAX.characters(
-
-, 2)
-SAX.comment( Begin
-function popUp(URL) {
-day = new Date();
-id = day.getTime();
-eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0, location=0, statusbars=0, menubars=0, resizable=0, width=145, height=250');");
-}
-// End )
-SAX.characters(
-, 1)
+SAX.cdata(
+<!-- Idea by: Nic Wolfe (, 476)
SAX.endElement(script)
SAX.ignorableWhitespace(
@@ -2717,38 +2694,20 @@ SAX.comment( BEGIN GoTo.com Search Box )
SAX.characters(
, 14)
SAX.startElement(script, language='javascript', type='text/javascript')
-SAX.characters(
- , 9)
-SAX.comment(
- if ((parseInt(navigator.appVersion) >= 3)
- && (navigator.appName != "Netscape")) {
- document.write("");
- } else if ((parseInt(navigator.appVersion) > 3)
- && (navigator.appName == "Netscape")) {
- document.write("
+
+