1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

- HTMLparser.c HTMLtree.[ch] SAX.c testHTML.c tree.c: fixed HTML

support for SCRIPT and STYLE with help from Bjorn Reese
- test/HTML/* result/HTML/*: added simple testcase and updated
  the existing ones.
Daniel
This commit is contained in:
Daniel Veillard
2000-10-14 23:38:43 +00:00
parent ff9c330753
commit 7eda8452f8
17 changed files with 390 additions and 230 deletions

View File

@ -1,3 +1,10 @@
Sun Oct 15 01:34:37 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* HTMLparser.c HTMLtree.[ch] SAX.c testHTML.c tree.c: fixed HTML
support for SCRIPT and STYLE with help from Bjorn Reese
* test/HTML/* result/HTML/*: added simple testcase and updated
the existing ones.
Fri Oct 13 18:24:31 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org> Fri Oct 13 18:24:31 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* xpath.c xpointer.c: XPointer reorder of ranges start/end and * xpath.c xpointer.c: XPointer reorder of ranges start/end and

View File

@ -559,7 +559,6 @@ static char *htmlNoContentElements[] = {
NULL NULL
}; };
static char** htmlStartCloseIndex[100]; static char** htmlStartCloseIndex[100];
static int htmlStartCloseIndexinitialized = 0; static int htmlStartCloseIndexinitialized = 0;
@ -1863,7 +1862,7 @@ htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
/* /*
* allocate a translation buffer. * allocate a translation buffer.
*/ */
buffer_size = HTML_PARSER_BIG_BUFFER_SIZE; buffer_size = HTML_PARSER_BUFFER_SIZE;
buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
if (buffer == NULL) { if (buffer == NULL) {
perror("htmlParseHTMLAttribute: malloc failed"); perror("htmlParseHTMLAttribute: malloc failed");
@ -2209,6 +2208,71 @@ htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
return(ret); return(ret);
} }
/**
* htmlParseScript:
* @ctxt: an HTML parser context
*
* parse the content of an HTML SCRIPT or STYLE element
* http://www.w3.org/TR/html4/sgml/dtd.html#Script
* http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
* http://www.w3.org/TR/html4/types.html#type-script
* http://www.w3.org/TR/html4/types.html#h-6.15
* http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
*
* Script data ( %Script; in the DTD) can be the content of the SCRIPT
* element and the value of intrinsic event attributes. User agents must
* not evaluate script data as HTML markup but instead must pass it on as
* data to a script engine.
* NOTES:
* - The content is passed like CDATA
* - the attributes for style and scripting "onXXX" are also described
* as CDATA but SGML allows entities references in attributes so their
* processing is identical as other attributes
*/
void
htmlParseScript(htmlParserCtxtPtr ctxt) {
xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 1];
int nbchar = 0;
xmlChar cur;
SHRINK;
cur = CUR;
while (IS_CHAR(cur)) {
if ((cur == '<') && (NXT(1) == '/')) {
/*
* One should break here, the specification is clear:
* Authors should therefore escape "</" within the content.
* Escape mechanisms are specific to each scripting or
* style sheet language.
*/
if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) ||
((NXT(2) >= 'a') && (NXT(2) <= 'z')))
break; /* while */
}
buf[nbchar++] = cur;
if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
if (ctxt->sax->cdataBlock!= NULL) {
/*
* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
*/
ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
}
nbchar = 0;
}
NEXT;
cur = CUR;
}
if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
if (ctxt->sax->cdataBlock!= NULL) {
/*
* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
*/
ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
}
}
}
/** /**
* htmlParseCharData: * htmlParseCharData:
* @ctxt: an HTML parser context * @ctxt: an HTML parser context
@ -3112,68 +3176,75 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
return; return;
} }
/* if ((xmlStrEqual(currentNode, BAD_CAST"script")) ||
* Sometimes DOCTYPE arrives in the middle of the document (xmlStrEqual(currentNode, BAD_CAST"style"))) {
*/ /*
if ((CUR == '<') && (NXT(1) == '!') && * Handle SCRIPT/STYLE separately
(UPP(2) == 'D') && (UPP(3) == 'O') && */
(UPP(4) == 'C') && (UPP(5) == 'T') && htmlParseScript(ctxt);
(UPP(6) == 'Y') && (UPP(7) == 'P') && } else {
(UPP(8) == 'E')) { /*
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) * Sometimes DOCTYPE arrives in the middle of the document
ctxt->sax->error(ctxt->userData, */
"Misplaced DOCTYPE declaration\n"); if ((CUR == '<') && (NXT(1) == '!') &&
ctxt->wellFormed = 0; (UPP(2) == 'D') && (UPP(3) == 'O') &&
htmlParseDocTypeDecl(ctxt); (UPP(4) == 'C') && (UPP(5) == 'T') &&
} (UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
/*
* First case : a comment
*/
if ((CUR == '<') && (NXT(1) == '!') &&
(NXT(2) == '-') && (NXT(3) == '-')) {
htmlParseComment(ctxt);
}
/*
* Second case : a sub-element.
*/
else if (CUR == '<') {
htmlParseElement(ctxt);
}
/*
* Third case : a reference. If if has not been resolved,
* parsing returns it's Name, create the node
*/
else if (CUR == '&') {
htmlParseReference(ctxt);
}
/*
* Fourth : end of the resource
*/
else if (CUR == 0) {
htmlAutoClose(ctxt, NULL);
}
/*
* Last case, text. Note that References are handled directly.
*/
else {
htmlParseCharData(ctxt, 0);
}
if (cons == ctxt->nbChars) {
if (ctxt->node != NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, ctxt->sax->error(ctxt->userData,
"detected an error in element content\n"); "Misplaced DOCTYPE declaration\n");
ctxt->wellFormed = 0; ctxt->wellFormed = 0;
htmlParseDocTypeDecl(ctxt);
} }
break;
}
/*
* First case : a comment
*/
if ((CUR == '<') && (NXT(1) == '!') &&
(NXT(2) == '-') && (NXT(3) == '-')) {
htmlParseComment(ctxt);
}
/*
* Second case : a sub-element.
*/
else if (CUR == '<') {
htmlParseElement(ctxt);
}
/*
* Third case : a reference. If if has not been resolved,
* parsing returns it's Name, create the node
*/
else if (CUR == '&') {
htmlParseReference(ctxt);
}
/*
* Fourth : end of the resource
*/
else if (CUR == 0) {
htmlAutoClose(ctxt, NULL);
}
/*
* Last case, text. Note that References are handled directly.
*/
else {
htmlParseCharData(ctxt, 0);
}
if (cons == ctxt->nbChars) {
if (ctxt->node != NULL) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"detected an error in element content\n");
ctxt->wellFormed = 0;
}
break;
}
}
GROW; GROW;
} }
if (currentNode != NULL) xmlFree(currentNode); if (currentNode != NULL) xmlFree(currentNode);
@ -3739,6 +3810,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
fprintf(stderr, "HPP: try EPILOG\n");break; fprintf(stderr, "HPP: try EPILOG\n");break;
case XML_PARSER_PI: case XML_PARSER_PI:
fprintf(stderr, "HPP: try PI\n");break; fprintf(stderr, "HPP: try PI\n");break;
case XML_PARSER_SYSTEM_LITERAL:
fprintf(stderr, "HPP: try SYSTEM_LITERAL\n");break;
} }
#endif #endif
@ -4105,75 +4178,94 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
cur = in->cur[0]; cur = in->cur[0];
next = in->cur[1]; next = in->cur[1];
cons = ctxt->nbChars; cons = ctxt->nbChars;
/* if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
* Sometimes DOCTYPE arrives in the middle of the document (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
*/
if ((cur == '<') && (next == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
goto done;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Misplaced DOCTYPE declaration\n");
ctxt->wellFormed = 0;
htmlParseDocTypeDecl(ctxt);
} else if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
goto done;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: Parsing Comment\n");
#endif
htmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
} else if ((cur == '<') && (next == '!') && (avail < 4)) {
goto done;
} else if ((cur == '<') && (next == '/')) {
ctxt->instate = XML_PARSER_END_TAG;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: entering END_TAG\n");
#endif
break;
} else if (cur == '<') {
ctxt->instate = XML_PARSER_START_TAG;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: entering START_TAG\n");
#endif
break;
} else if (cur == '&') {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: Parsing Reference\n");
#endif
/* TODO: check generation of subtrees if noent !!! */
htmlParseReference(ctxt);
} else {
/* TODO Avoid the extra copy, handle directly !!!!!! */
/* /*
* Goal of the following test is : * Handle SCRIPT/STYLE separately
* - minimize calls to the SAX 'character' callback
* when they are mergeable
*/ */
if ((ctxt->inputNr == 1) && if ((!terminate) &&
(avail < HTML_PARSER_BIG_BUFFER_SIZE)) { (htmlParseLookupSequence(ctxt, '<', '/', 0) < 0))
if ((!terminate) && goto done;
(htmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) htmlParseScript(ctxt);
goto done; if ((cur == '<') && (next == '/')) {
} ctxt->instate = XML_PARSER_END_TAG;
ctxt->checkIndex = 0; ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH #ifdef DEBUG_PUSH
fprintf(stderr, "HPP: Parsing char data\n"); fprintf(stderr, "HPP: entering END_TAG\n");
#endif #endif
htmlParseCharData(ctxt, 0); break;
}
} else {
/*
* Sometimes DOCTYPE arrives in the middle of the document
*/
if ((cur == '<') && (next == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
goto done;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"Misplaced DOCTYPE declaration\n");
ctxt->wellFormed = 0;
htmlParseDocTypeDecl(ctxt);
} else if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
goto done;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: Parsing Comment\n");
#endif
htmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
} else if ((cur == '<') && (next == '!') && (avail < 4)) {
goto done;
} else if ((cur == '<') && (next == '/')) {
ctxt->instate = XML_PARSER_END_TAG;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: entering END_TAG\n");
#endif
break;
} else if (cur == '<') {
ctxt->instate = XML_PARSER_START_TAG;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: entering START_TAG\n");
#endif
break;
} else if (cur == '&') {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: Parsing Reference\n");
#endif
/* TODO: check generation of subtrees if noent !!! */
htmlParseReference(ctxt);
} else {
/* TODO Avoid the extra copy, handle directly !!!!!! */
/*
* Goal of the following test is :
* - minimize calls to the SAX 'character' callback
* when they are mergeable
*/
if ((ctxt->inputNr == 1) &&
(avail < HTML_PARSER_BIG_BUFFER_SIZE)) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
goto done;
}
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
fprintf(stderr, "HPP: Parsing char data\n");
#endif
htmlParseCharData(ctxt, 0);
}
} }
if (cons == ctxt->nbChars) { if (cons == ctxt->nbChars) {
if (ctxt->node != NULL) { if (ctxt->node != NULL) {

View File

@ -818,6 +818,16 @@ htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const
xmlOutputBufferWriteString(buf, ";"); xmlOutputBufferWriteString(buf, ";");
return; return;
} }
if (cur->type == HTML_PRESERVE_NODE) {
if (cur->content != NULL) {
#ifndef XML_USE_BUFFER_CONTENT
xmlOutputBufferWriteString(buf, (const char *)cur->content);
#else
xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
#endif
}
return;
}
/* /*
* Get specific HTmL info for taht node. * Get specific HTmL info for taht node.

View File

@ -22,6 +22,7 @@ extern "C" {
#define HTML_TEXT_NODE XML_TEXT_NODE #define HTML_TEXT_NODE XML_TEXT_NODE
#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE #define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
#define HTML_COMMENT_NODE XML_COMMENT_NODE #define HTML_COMMENT_NODE XML_COMMENT_NODE
#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE
htmlDocPtr htmlNewDoc (const xmlChar *URI, htmlDocPtr htmlNewDoc (const xmlChar *URI,
const xmlChar *ExternalID); const xmlChar *ExternalID);

4
SAX.c
View File

@ -1600,7 +1600,7 @@ xmlSAXHandler htmlDefaultSAXHandler = {
xmlParserError, xmlParserError,
xmlParserError, xmlParserError,
getParameterEntity, getParameterEntity,
NULL, cdataBlock,
NULL, NULL,
}; };
@ -1632,7 +1632,7 @@ htmlDefaultSAXHandlerInit(void)
htmlDefaultSAXHandler.endElement = endElement; htmlDefaultSAXHandler.endElement = endElement;
htmlDefaultSAXHandler.reference = NULL; htmlDefaultSAXHandler.reference = NULL;
htmlDefaultSAXHandler.characters = characters; htmlDefaultSAXHandler.characters = characters;
htmlDefaultSAXHandler.cdataBlock = NULL; xmlDefaultSAXHandler.cdataBlock = cdataBlock;
htmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace; htmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace;
htmlDefaultSAXHandler.processingInstruction = NULL; htmlDefaultSAXHandler.processingInstruction = NULL;
htmlDefaultSAXHandler.comment = comment; htmlDefaultSAXHandler.comment = comment;

View File

@ -22,6 +22,7 @@ extern "C" {
#define HTML_TEXT_NODE XML_TEXT_NODE #define HTML_TEXT_NODE XML_TEXT_NODE
#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE #define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE
#define HTML_COMMENT_NODE XML_COMMENT_NODE #define HTML_COMMENT_NODE XML_COMMENT_NODE
#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE
htmlDocPtr htmlNewDoc (const xmlChar *URI, htmlDocPtr htmlNewDoc (const xmlChar *URI,
const xmlChar *ExternalID); const xmlChar *ExternalID);

View File

@ -1,3 +1,3 @@
./test/HTML/doc2.htm:10: error: Misplaced DOCTYPE declaration ./test/HTML/doc2.htm:5: error: Misplaced DOCTYPE declaration
<!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Tr <!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Tr
^ ^

View File

@ -11,7 +11,7 @@ SAX.endElement(title)
SAX.ignorableWhitespace( SAX.ignorableWhitespace(
, 1) , 1)
SAX.startElement(script, language='javascript') SAX.startElement(script, language='javascript')
SAX.characters( SAX.cdata(
NS_ActualOpen=windo, 194) NS_ActualOpen=windo, 194)
SAX.endElement(script) SAX.endElement(script)
SAX.ignorableWhitespace( SAX.ignorableWhitespace(

View File

@ -18,7 +18,13 @@ A.nav:hover {
} }
</style> </style>
<script language="JavaScript"> <script language="JavaScript">
<!-- Idea by: Nic Wolfe (Nic@TimelapseProductions.com) --><!-- Web URL: http://fineline.xs.mw --><!-- This script and many more are available free online at --><!-- The JavaScript Source!! http://javascript.internet.com --><!-- Begin <!-- Idea by: Nic Wolfe (Nic@TimelapseProductions.com) -->
<!-- Web URL: http://fineline.xs.mw -->
<!-- This script and many more are available free online at -->
<!-- The JavaScript Source!! http://javascript.internet.com -->
<!-- Begin
function popUp(URL) { function popUp(URL) {
day = new Date(); day = new Date();
id = day.getTime(); id = day.getTime();
@ -816,7 +822,8 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
</TR> </TR>
</TABLE> </TABLE>
--><table bgcolor="silver" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td align="middle" width="100%"> --><table bgcolor="silver" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td align="middle" width="100%">
<!-- BEGIN GoTo.com Search Box --><script language="javascript" type="text/javascript"><!-- <!-- BEGIN GoTo.com Search Box --><script language="javascript" type="text/javascript">
<!--
if ((parseInt(navigator.appVersion) >= 3) if ((parseInt(navigator.appVersion) >= 3)
&& (navigator.appName != "Netscape")) { && (navigator.appName != "Netscape")) {
document.write("<IFRAME marginheight=0 frameborder=0 "); document.write("<IFRAME marginheight=0 frameborder=0 ");
@ -825,7 +832,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t"); document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=html&size=100x90&url=http://www.goto.co"); document.write("ype=html&size=100x90&url=http://www.goto.co");
document.write("m/d/search/ssn/&target=_blank&Partner=SSN80"); document.write("m/d/search/ssn/&target=_blank&Partner=SSN80");
document.write("42DF8478957377></IFRAME>"); document.write("42DF8478957377>");
} else if ((parseInt(navigator.appVersion) > 3) } else if ((parseInt(navigator.appVersion) > 3)
&& (navigator.appName == "Netscape")) { && (navigator.appName == "Netscape")) {
document.write("<SCRIPT language=javascript type=text/javas"); document.write("<SCRIPT language=javascript type=text/javas");
@ -833,7 +840,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t"); document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=js&size=100x90&url=http://www.goto.com/"); document.write("ype=js&size=100x90&url=http://www.goto.com/");
document.write("d/search/ssn/&target=_blank&Partner=SSN8042"); document.write("d/search/ssn/&target=_blank&Partner=SSN8042");
document.write("DF8478957377></SC"); document.write("DF8478957377>");
document.write("RIPT>"); document.write("RIPT>");
} else { } else {
document.write("<A TARGET=_blank "); document.write("<A TARGET=_blank ");
@ -841,9 +848,10 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
document.write("GIF=true>"); document.write("GIF=true>");
document.write("<IMG ismap "); document.write("<IMG ismap ");
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t"); document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=gif&size=100x90></A>"); document.write("ype=gif&size=100x90>");
} }
// --></script> // -->
</script>
<b><noscript></noscript></b> <b><noscript></noscript></b>
<a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"><img align="bottom" border="0" height="90" ismap src="doc3_files/100x90.gif" width="100"></a> <a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"><img align="bottom" border="0" height="90" ismap src="doc3_files/100x90.gif" width="100"></a>
<b><a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"></a></b> <b><a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"></a></b>

View File

@ -1,93 +1,105 @@
./test/HTML/doc3.htm:10: error: Misplaced DOCTYPE declaration ./test/HTML/doc3.htm:5: error: Misplaced DOCTYPE declaration
<!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//E <!-- END Naviscope Javascript --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//E
^ ^
./test/HTML/doc3.htm:52: error: htmlParseEntityRef: expecting ';' ./test/HTML/doc3.htm:47: error: htmlParseEntityRef: expecting ';'
href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&i href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&i
^ ^
./test/HTML/doc3.htm:52: error: htmlParseEntityRef: expecting ';' ./test/HTML/doc3.htm:47: error: htmlParseEntityRef: expecting ';'
_top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&medi _top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&medi
^ ^
./test/HTML/doc3.htm:52: error: htmlParseEntityRef: expecting ';' ./test/HTML/doc3.htm:47: error: htmlParseEntityRef: expecting ';'
><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&i ><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&i
^ ^
./test/HTML/doc3.htm:145: error: error parsing attribute name ./test/HTML/doc3.htm:140: error: error parsing attribute name
width=70 Gentus?.?></A><BR><A width=70 Gentus?.?></A><BR><A
^ ^
./test/HTML/doc3.htm:145: error: htmlParseStartTag: problem parsing attributes ./test/HTML/doc3.htm:140: error: htmlParseStartTag: problem parsing attributes
width=70 Gentus?.?></A><BR><A width=70 Gentus?.?></A><BR><A
^ ^
./test/HTML/doc3.htm:145: error: Couldn't find end of Start Tag img ./test/HTML/doc3.htm:140: error: Couldn't find end of Start Tag img
width=70 Gentus?.?></A><BR><A width=70 Gentus?.?></A><BR><A
^ ^
./test/HTML/doc3.htm:148: error: Unexpected end tag : p ./test/HTML/doc3.htm:143: error: Unexpected end tag : p
</P></TD></TR></TBODY></TABLE></CENTER></TD></TR></TBODY></TABLE></CENTER></P </P></TD></TR></TBODY></TABLE></CENTER></TD></TR></TBODY></TABLE></CENTER></P
^ ^
./test/HTML/doc3.htm:236: error: Unexpected end tag : font ./test/HTML/doc3.htm:231: error: Unexpected end tag : font
Specials<BR><BR></FONT></A><BR></FONT></A><B><FONT color=yellow Specials<BR><BR></FONT></A><BR></FONT></A><B><FONT color=yellow
^ ^
./test/HTML/doc3.htm:236: error: Unexpected end tag : a ./test/HTML/doc3.htm:231: error: Unexpected end tag : a
Specials<BR><BR></FONT></A><BR></FONT></A><B><FONT color=yellow Specials<BR><BR></FONT></A><BR></FONT></A><B><FONT color=yellow
^ ^
./test/HTML/doc3.htm:747: error: htmlParseEntityRef: expecting ';' ./test/HTML/doc3.htm:742: error: htmlParseEntityRef: expecting ';'
er=0 alt="Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&si er=0 alt="Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&si
^ ^
./test/HTML/doc3.htm:747: error: htmlParseEntityRef: expecting ';' ./test/HTML/doc3.htm:742: error: htmlParseEntityRef: expecting ';'
Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&sid=1881&asi Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&sid=1881&asi
^ ^
./test/HTML/doc3.htm:747: error: Unexpected end tag : li ./test/HTML/doc3.htm:742: error: Unexpected end tag : li
light.com/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI light.com/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI
^ ^
./test/HTML/doc3.htm:747: error: Unexpected end tag : font ./test/HTML/doc3.htm:742: error: Unexpected end tag : font
om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT
^ ^
./test/HTML/doc3.htm:747: error: Unexpected end tag : p ./test/HTML/doc3.htm:742: error: Unexpected end tag : p
=7708"></a></IFRAME></CENTER></LI></FONT></TD></TR></TBODY></TABLE></CENTER></P =7708"></a></IFRAME></CENTER></LI></FONT></TD></TR></TBODY></TABLE></CENTER></P
^ ^
./test/HTML/doc3.htm:772: error: Opening and ending tag mismatch: font and form ./test/HTML/doc3.htm:767: error: Opening and ending tag mismatch: font and form
archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!-- archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--
^ ^
./test/HTML/doc3.htm:772: error: Unexpected end tag : form ./test/HTML/doc3.htm:767: error: Unexpected end tag : form
archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!-- archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--
^ ^
./test/HTML/doc3.htm:815: error: Opening and ending tag mismatch: b and noscript ./test/HTML/doc3.htm:790: error: Unexpected end tag : iframe
document.write("42DF8478957377></IFRAME>");
^
./test/HTML/doc3.htm:798: error: End tag : expected '>'
document.write("DF8478957377></SC");
^
./test/HTML/doc3.htm:798: error: Unexpected end tag : sc
document.write("DF8478957377></SC");
^
./test/HTML/doc3.htm:806: error: Unexpected end tag : a
document.write("ype=gif&size=100x90></A>");
^
./test/HTML/doc3.htm:810: error: Opening and ending tag mismatch: b and noscript
<B><NOSCRIPT></B><A <B><NOSCRIPT></B><A
^ ^
./test/HTML/doc3.htm:820: error: Unexpected end tag : a ./test/HTML/doc3.htm:815: error: Unexpected end tag : a
</A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></ </A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
^ ^
./test/HTML/doc3.htm:820: error: Unexpected end tag : noscript ./test/HTML/doc3.htm:815: error: Unexpected end tag : noscript
</A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></ </A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
^ ^
./test/HTML/doc3.htm:826: error: Opening and ending tag mismatch: form and center ./test/HTML/doc3.htm:821: error: Opening and ending tag mismatch: form and center
</FORM><!-- Pricewatch Search Box --><A </FORM><!-- Pricewatch Search Box --><A
^ ^
./test/HTML/doc3.htm:833: error: Unexpected end tag : p ./test/HTML/doc3.htm:828: error: Unexpected end tag : p
Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></ Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></
^ ^
./test/HTML/doc3.htm:833: error: Opening and ending tag mismatch: center and table ./test/HTML/doc3.htm:828: error: Opening and ending tag mismatch: center and table
Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></ Special<BR>Code:BP6-hd</FONT></A> </P></CENTER></TD></TR></TBODY></
^ ^
./test/HTML/doc3.htm:839: error: Unexpected end tag : p ./test/HTML/doc3.htm:834: error: Unexpected end tag : p
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE> width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
^ ^
./test/HTML/doc3.htm:839: error: Unexpected end tag : center ./test/HTML/doc3.htm:834: error: Unexpected end tag : center
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE> width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
^ ^
./test/HTML/doc3.htm:839: error: Unexpected end tag : tr ./test/HTML/doc3.htm:834: error: Unexpected end tag : tr
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE> width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
^ ^
./test/HTML/doc3.htm:839: error: Unexpected end tag : tbody ./test/HTML/doc3.htm:834: error: Unexpected end tag : tbody
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE> width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
^ ^
./test/HTML/doc3.htm:839: error: Unexpected end tag : table ./test/HTML/doc3.htm:834: error: Unexpected end tag : table
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE> width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
^ ^
./test/HTML/doc3.htm:840: error: Unexpected end tag : td ./test/HTML/doc3.htm:835: error: Unexpected end tag : td
<CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP" <CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP"
^ ^
./test/HTML/doc3.htm:840: error: Unexpected end tag : tr ./test/HTML/doc3.htm:835: error: Unexpected end tag : tr
<CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP" <CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP"
^ ^
./test/HTML/doc3.htm:841: error: Unexpected end tag : table ./test/HTML/doc3.htm:836: error: Unexpected end tag : table
HEIGHT="70">&nbsp;</TD> </TR></TABLE> HEIGHT="70">&nbsp;</TD> </TR></TABLE>
^ ^

View File

@ -11,7 +11,7 @@ SAX.endElement(title)
SAX.ignorableWhitespace( SAX.ignorableWhitespace(
, 2) , 2)
SAX.startElement(script, language='javascript') SAX.startElement(script, language='javascript')
SAX.characters( SAX.cdata(
NS_ActualOpen=wind, 199) NS_ActualOpen=wind, 199)
SAX.endElement(script) SAX.endElement(script)
SAX.ignorableWhitespace( SAX.ignorableWhitespace(
@ -31,38 +31,15 @@ SAX.endElement(meta)
SAX.ignorableWhitespace( SAX.ignorableWhitespace(
, 2) , 2)
SAX.startElement(style, type='text/css') SAX.startElement(style, type='text/css')
SAX.characters(A.nav { SAX.cdata(A.nav {
COLOR: #003399; TEXT, 115) COLOR: #003399; TEXT, 115)
SAX.endElement(style) SAX.endElement(style)
SAX.ignorableWhitespace( SAX.ignorableWhitespace(
, 4) , 4)
SAX.startElement(script, language='JavaScript') SAX.startElement(script, language='JavaScript')
SAX.characters( SAX.cdata(
, 1) &lt;!-- Idea by: Nic Wolfe (, 476)
SAX.comment( Idea by: Nic Wolfe (Nic@TimelapseProductions.com) )
SAX.characters(
, 1)
SAX.comment( Web URL: http://fineline.xs.mw )
SAX.characters(
, 2)
SAX.comment( This script and many more are available free online at )
SAX.characters(
, 1)
SAX.comment( The JavaScript Source!! http://javascript.internet.com )
SAX.characters(
, 2)
SAX.comment( Begin
function popUp(URL) {
day = new Date();
id = day.getTime();
eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0, location=0, statusbars=0, menubars=0, resizable=0, width=145, height=250');");
}
// End )
SAX.characters(
, 1)
SAX.endElement(script) SAX.endElement(script)
SAX.ignorableWhitespace( SAX.ignorableWhitespace(
@ -2717,38 +2694,20 @@ SAX.comment( BEGIN GoTo.com Search Box )
SAX.startElement(td, align='middle', width='100%') SAX.startElement(td, align='middle', width='100%')
SAX.comment( BEGIN GoTo.com Search Box ) SAX.comment( BEGIN GoTo.com Search Box )
SAX.characters( SAX.characters(
, 14) , 14)
SAX.startElement(script, language='javascript', type='text/javascript') SAX.startElement(script, language='javascript', type='text/javascript')
SAX.characters( SAX.cdata(
, 9) &lt;!--
SAX.comment( if ((, 532)
if ((parseInt(navigator.appVersion) >= 3) SAX.error: Unexpected end tag : iframe
&& (navigator.appName != "Netscape")) { SAX.cdata(");
document.write("<IFRAME marginheight=0 frameborder=0 "); } else if ((parseI, 463)
document.write("marginwidth=0 scrolling=no width=100 height"); SAX.error: End tag : expected '>'
document.write("=90 "); SAX.error: Unexpected end tag : sc
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t"); SAX.cdata(");
document.write("ype=html&size=100x90&url=http://www.goto.co"); document.write("RI, 361)
document.write("m/d/search/ssn/&target=_blank&Partner=SSN80");
document.write("42DF8478957377></IFRAME>");
} else if ((parseInt(navigator.appVersion) > 3)
&& (navigator.appName == "Netscape")) {
document.write("<SCRIPT language=javascript type=text/javas");
document.write("cript ");
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=js&size=100x90&url=http://www.goto.com/");
document.write("d/search/ssn/&target=_blank&Partner=SSN8042");
document.write("DF8478957377></SC");
document.write("RIPT>");
} else {
document.write("<A TARGET=_blank ");
document.write("HREF=http://www.goto.com/d/search/ssn/?from");
document.write("GIF=true>");
document.write("<IMG ismap ");
SAX.error: Unexpected end tag : a SAX.error: Unexpected end tag : a
document.write("ype=gif&size=100x90></A>"); SAX.cdata(");
}
// )
} }
// --, 37) // --, 37)
SAX.endElement(script) SAX.endElement(script)
@ -2916,7 +2875,7 @@ SAX.endElement(div)
SAX.endElement(table) SAX.endElement(table)
SAX.endElement(div) SAX.endElement(div)
SAX.ignorableWhitespace( SAX.ignorableWhitespace(
, 2) , 2)
SAX.startElement(script) SAX.startElement(script)
SAX.cdata( window.open=NS_ActualOpen; , 28) SAX.cdata( window.open=NS_ActualOpen; , 28)
SAX.endElement(script) SAX.endElement(script)

10
result/HTML/script.html Normal file
View File

@ -0,0 +1,10 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head><title>Script tests</title></head>
<body>
<script language="javascript">
if (window.open<max) ;
</script>
<input onclick="if(window.open&lt;max);">
</body>
</html>

View File

View File

@ -0,0 +1,32 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(html)
SAX.ignorableWhitespace(
, 1)
SAX.startElement(head)
SAX.startElement(title)
SAX.characters(Script tests, 12)
SAX.endElement(title)
SAX.endElement(head)
SAX.ignorableWhitespace(
, 1)
SAX.startElement(body)
SAX.ignorableWhitespace(
, 1)
SAX.startElement(script, language='javascript')
SAX.cdata(
if (window.open&lt;max) ;, 28)
SAX.endElement(script)
SAX.ignorableWhitespace(
, 1)
SAX.startElement(input, onclick='if(window.open&lt;max);')
SAX.endElement(input)
SAX.ignorableWhitespace(
, 1)
SAX.endElement(body)
SAX.ignorableWhitespace(
, 1)
SAX.endElement(html)
SAX.ignorableWhitespace(
, 1)
SAX.endDocument()

9
test/HTML/script.html Normal file
View File

@ -0,0 +1,9 @@
<HTML>
<HEAD><TITLE>Script tests</TITLE></HEAD>
<BODY>
<SCRIPT language=javascript>
if (window.open<max) ;
</SCRIPT>
<INPUT ONCLICK="if(window.open<max);">
</BODY>
</HTML>

View File

@ -420,6 +420,27 @@ charactersDebug(void *ctx, const xmlChar *ch, int len)
fprintf(stdout, "SAX.characters(%s, %d)\n", output, len); fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
} }
/**
* cdataDebug:
* @ctxt: An XML parser context
* @ch: a xmlChar string
* @len: the number of xmlChar
*
* receiving some cdata chars from the parser.
* Question: how much at a time ???
*/
void
cdataDebug(void *ctx, const xmlChar *ch, int len)
{
unsigned char output[40];
int inlen = len, outlen = 30;
htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
output[outlen] = 0;
fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
}
/** /**
* referenceDebug: * referenceDebug:
* @ctxt: An XML parser context * @ctxt: An XML parser context
@ -572,6 +593,8 @@ xmlSAXHandler debugSAXHandlerStruct = {
errorDebug, errorDebug,
fatalErrorDebug, fatalErrorDebug,
getParameterEntityDebug, getParameterEntityDebug,
cdataDebug,
NULL
}; };
xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;

8
tree.c
View File

@ -495,12 +495,8 @@ xmlDocPtr
xmlNewDoc(const xmlChar *version) { xmlNewDoc(const xmlChar *version) {
xmlDocPtr cur; xmlDocPtr cur;
if (version == NULL) { if (version == NULL)
#ifdef DEBUG_TREE version = (const xmlChar *) "1.0";
fprintf(stderr, "xmlNewDoc : version == NULL\n");
#endif
return(NULL);
}
/* /*
* Allocate a new document and fill the fields. * Allocate a new document and fill the fields.