diff --git a/ChangeLog b/ChangeLog index e2bad2bf..2bcb0efe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Mon Oct 6 10:16:30 CEST 2003 Daniel Veillard + + * check-xml-test-suite.py: fixing the script + * parser.c: replace sequences of RAW && NXT(.) == '.' with + memcmp calls, seems to not break conformance, slightly inflate + the size of the gcc generated code though. + Sun Oct 5 23:30:48 CEST 2003 Daniel Veillard * parserInternals.c parser.c valid.c include/libxml/parserInternals.h: diff --git a/check-xml-test-suite.py b/check-xml-test-suite.py index 9fca112a..23b0706d 100755 --- a/check-xml-test-suite.py +++ b/check-xml-test-suite.py @@ -23,8 +23,7 @@ def errorHandler(ctx, str): global error_nr global error_msg - if string.find(str, "error:") >= 0: - error_nr = error_nr + 1 + error_nr = error_nr + 1 if len(error_msg) < 300: if len(error_msg) == 0 or error_msg[-1] == '\n': error_msg = error_msg + " >>" + str @@ -77,16 +76,17 @@ def testNotWf(filename, id): ctxt = libxml2.createFileParserCtxt(filename) if ctxt == None: return -1 - ctxt.parseDocument() + ret = ctxt.parseDocument() try: doc = ctxt.doc() except: doc = None - if error_nr == 0 or ctxt.wellFormed() != 0: + if doc != None: + doc.freeDoc() + if ret == 0 or ctxt.wellFormed() != 0: print "%s: error: Well Formedness error not detected" % (id) log.write("%s: error: Well Formedness error not detected\n" % (id)) - doc.freeDoc() return 0 return 1 @@ -102,16 +102,17 @@ def testNotWfEnt(filename, id): if ctxt == None: return -1 ctxt.replaceEntities(1) - ctxt.parseDocument() + ret = ctxt.parseDocument() try: doc = ctxt.doc() except: doc = None - if error_nr == 0 or ctxt.wellFormed() != 0: + if doc != None: + doc.freeDoc() + if ret == 0 or ctxt.wellFormed() != 0: print "%s: error: Well Formedness error not detected" % (id) log.write("%s: error: Well Formedness error not detected\n" % (id)) - doc.freeDoc() return 0 return 1 @@ -128,16 +129,17 @@ def testNotWfEntDtd(filename, id): return -1 ctxt.replaceEntities(1) ctxt.loadSubset(1) - ctxt.parseDocument() + ret = ctxt.parseDocument() try: doc = ctxt.doc() except: doc = None - if error_nr == 0 or ctxt.wellFormed() != 0: + if doc != None: + doc.freeDoc() + if ret == 0 or ctxt.wellFormed() != 0: print "%s: error: Well Formedness error not detected" % (id) log.write("%s: error: Well Formedness error not detected\n" % (id)) - doc.freeDoc() return 0 return 1 @@ -154,15 +156,17 @@ def testWfEntDtd(filename, id): return -1 ctxt.replaceEntities(1) ctxt.loadSubset(1) - ctxt.parseDocument() + ret = ctxt.parseDocument() try: doc = ctxt.doc() except: doc = None - if ctxt.wellFormed() == 0: + if doc == None or ret != 0 or ctxt.wellFormed() == 0: print "%s: error: wrongly failed to parse the document" % (id) log.write("%s: error: wrongly failed to parse the document\n" % (id)) + if doc != None: + doc.freeDoc() return 0 if error_nr != 0: print "%s: warning: WF document generated an error msg" % (id) @@ -185,12 +189,14 @@ def testError(filename, id): return -1 ctxt.replaceEntities(1) ctxt.loadSubset(1) - ctxt.parseDocument() + ret = ctxt.parseDocument() try: doc = ctxt.doc() except: doc = None + if doc != None: + doc.freeDoc() if ctxt.wellFormed() == 0: print "%s: warning: failed to parse the document but accepted" % (id) log.write("%s: warning: failed to parse the document but accepte\n" % (id)) @@ -198,9 +204,7 @@ def testError(filename, id): if error_nr != 0: print "%s: warning: WF document generated an error msg" % (id) log.write("%s: error: WF document generated an error msg\n" % (id)) - doc.freeDoc() return 2 - doc.freeDoc() return 1 def testInvalid(filename, id): @@ -215,7 +219,7 @@ def testInvalid(filename, id): if ctxt == None: return -1 ctxt.validate(1) - ctxt.parseDocument() + ret = ctxt.parseDocument() try: doc = ctxt.doc() diff --git a/parser.c b/parser.c index ed4ce25d..e5ce1dac 100644 --- a/parser.c +++ b/parser.c @@ -1766,9 +1766,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { } if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && - (RAW == '<') && (NXT(1) == '?') && - (NXT(2) == 'x') && (NXT(3) == 'm') && - (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + (memcmp(CUR_PTR, "input; SHRINK; SKIP(10); @@ -4203,10 +4193,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { int skipped; GROW; - if ((RAW == '<') && (NXT(1) == '!') && - (NXT(2) == 'E') && (NXT(3) == 'N') && - (NXT(4) == 'T') && (NXT(5) == 'I') && - (NXT(6) == 'T') && (NXT(7) == 'Y')) { + if (memcmp(CUR_PTR, "input; SHRINK; SKIP(8); @@ -4344,9 +4331,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) { "Space required before 'NDATA'\n"); } SKIP_BLANKS; - if ((RAW == 'N') && (NXT(1) == 'D') && - (NXT(2) == 'A') && (NXT(3) == 'T') && - (NXT(4) == 'A')) { + if (memcmp(CUR_PTR, "NDATA", 5) == 0) { SKIP(5); if (!IS_BLANK(CUR)) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, @@ -4463,25 +4448,16 @@ xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) { xmlChar *ret; *value = NULL; - if ((RAW == '#') && (NXT(1) == 'R') && - (NXT(2) == 'E') && (NXT(3) == 'Q') && - (NXT(4) == 'U') && (NXT(5) == 'I') && - (NXT(6) == 'R') && (NXT(7) == 'E') && - (NXT(8) == 'D')) { + if (memcmp(CUR_PTR, "#REQUIRED", 9) == 0) { SKIP(9); return(XML_ATTRIBUTE_REQUIRED); } - if ((RAW == '#') && (NXT(1) == 'I') && - (NXT(2) == 'M') && (NXT(3) == 'P') && - (NXT(4) == 'L') && (NXT(5) == 'I') && - (NXT(6) == 'E') && (NXT(7) == 'D')) { + if (memcmp(CUR_PTR, "#IMPLIED", 8) == 0) { SKIP(8); return(XML_ATTRIBUTE_IMPLIED); } val = XML_ATTRIBUTE_NONE; - if ((RAW == '#') && (NXT(1) == 'F') && - (NXT(2) == 'I') && (NXT(3) == 'X') && - (NXT(4) == 'E') && (NXT(5) == 'D')) { + if (memcmp(CUR_PTR, "#FIXED", 6) == 0) { SKIP(6); val = XML_ATTRIBUTE_FIXED; if (!IS_BLANK(CUR)) { @@ -4623,10 +4599,7 @@ xmlParseEnumerationType(xmlParserCtxtPtr ctxt) { int xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { - if ((RAW == 'N') && (NXT(1) == 'O') && - (NXT(2) == 'T') && (NXT(3) == 'A') && - (NXT(4) == 'T') && (NXT(5) == 'I') && - (NXT(6) == 'O') && (NXT(7) == 'N')) { + if (memcmp(CUR_PTR, "NOTATION", 8) == 0) { SKIP(8); if (!IS_BLANK(CUR)) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, @@ -4691,45 +4664,28 @@ xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { int xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { SHRINK; - if ((RAW == 'C') && (NXT(1) == 'D') && - (NXT(2) == 'A') && (NXT(3) == 'T') && - (NXT(4) == 'A')) { + if (memcmp(CUR_PTR, "CDATA", 5) == 0) { SKIP(5); return(XML_ATTRIBUTE_CDATA); - } else if ((RAW == 'I') && (NXT(1) == 'D') && - (NXT(2) == 'R') && (NXT(3) == 'E') && - (NXT(4) == 'F') && (NXT(5) == 'S')) { + } else if (memcmp(CUR_PTR, "IDREFS", 6) == 0) { SKIP(6); return(XML_ATTRIBUTE_IDREFS); - } else if ((RAW == 'I') && (NXT(1) == 'D') && - (NXT(2) == 'R') && (NXT(3) == 'E') && - (NXT(4) == 'F')) { + } else if (memcmp(CUR_PTR, "IDREF", 5) == 0) { SKIP(5); return(XML_ATTRIBUTE_IDREF); } else if ((RAW == 'I') && (NXT(1) == 'D')) { SKIP(2); return(XML_ATTRIBUTE_ID); - } else if ((RAW == 'E') && (NXT(1) == 'N') && - (NXT(2) == 'T') && (NXT(3) == 'I') && - (NXT(4) == 'T') && (NXT(5) == 'Y')) { + } else if (memcmp(CUR_PTR, "ENTITY", 6) == 0) { SKIP(6); return(XML_ATTRIBUTE_ENTITY); - } else if ((RAW == 'E') && (NXT(1) == 'N') && - (NXT(2) == 'T') && (NXT(3) == 'I') && - (NXT(4) == 'T') && (NXT(5) == 'I') && - (NXT(6) == 'E') && (NXT(7) == 'S')) { + } else if (memcmp(CUR_PTR, "ENTITIES", 8) == 0) { SKIP(8); return(XML_ATTRIBUTE_ENTITIES); - } else if ((RAW == 'N') && (NXT(1) == 'M') && - (NXT(2) == 'T') && (NXT(3) == 'O') && - (NXT(4) == 'K') && (NXT(5) == 'E') && - (NXT(6) == 'N') && (NXT(7) == 'S')) { + } else if (memcmp(CUR_PTR, "NMTOKENS", 8) == 0) { SKIP(8); return(XML_ATTRIBUTE_NMTOKENS); - } else if ((RAW == 'N') && (NXT(1) == 'M') && - (NXT(2) == 'T') && (NXT(3) == 'O') && - (NXT(4) == 'K') && (NXT(5) == 'E') && - (NXT(6) == 'N')) { + } else if (memcmp(CUR_PTR, "NMTOKEN", 7) == 0) { SKIP(7); return(XML_ATTRIBUTE_NMTOKEN); } @@ -4753,11 +4709,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { const xmlChar *attrName; xmlEnumerationPtr tree; - if ((RAW == '<') && (NXT(1) == '!') && - (NXT(2) == 'A') && (NXT(3) == 'T') && - (NXT(4) == 'T') && (NXT(5) == 'L') && - (NXT(6) == 'I') && (NXT(7) == 'S') && - (NXT(8) == 'T')) { + if (memcmp(CUR_PTR, "input; SKIP(9); @@ -4902,10 +4854,7 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { const xmlChar *elem = NULL; GROW; - if ((RAW == '#') && (NXT(1) == 'P') && - (NXT(2) == 'C') && (NXT(3) == 'D') && - (NXT(4) == 'A') && (NXT(5) == 'T') && - (NXT(6) == 'A')) { + if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) { SKIP(7); SKIP_BLANKS; SHRINK; @@ -5287,10 +5236,7 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name, NEXT; GROW; SKIP_BLANKS; - if ((RAW == '#') && (NXT(1) == 'P') && - (NXT(2) == 'C') && (NXT(3) == 'D') && - (NXT(4) == 'A') && (NXT(5) == 'T') && - (NXT(6) == 'A')) { + if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) { tree = xmlParseElementMixedContentDecl(ctxt, inputid); res = XML_ELEMENT_TYPE_MIXED; } else { @@ -5322,11 +5268,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { xmlElementContentPtr content = NULL; GROW; - if ((RAW == '<') && (NXT(1) == '!') && - (NXT(2) == 'E') && (NXT(3) == 'L') && - (NXT(4) == 'E') && (NXT(5) == 'M') && - (NXT(6) == 'E') && (NXT(7) == 'N') && - (NXT(8) == 'T')) { + if (memcmp(CUR_PTR, "input; SKIP(9); @@ -5348,9 +5290,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { "Space required after the element name\n"); } SKIP_BLANKS; - if ((RAW == 'E') && (NXT(1) == 'M') && - (NXT(2) == 'P') && (NXT(3) == 'T') && - (NXT(4) == 'Y')) { + if (memcmp(CUR_PTR, "EMPTY", 5) == 0) { SKIP(5); /* * Element must always be empty. @@ -5424,9 +5364,7 @@ static void xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { SKIP(3); SKIP_BLANKS; - if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') && - (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') && - (NXT(6) == 'E')) { + if (memcmp(CUR_PTR, "INCLUDE", 7) == 0) { SKIP(7); SKIP_BLANKS; if (RAW != '[') { @@ -5477,8 +5415,7 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { "Leaving INCLUDE Conditional Section\n"); } - } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && - (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { + } else if (memcmp(CUR_PTR, "IGNORE", 6) == 0) { int state; xmlParserInputState instate; int depth = 0; @@ -5617,9 +5554,7 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) { /* * We know that 'errNo == XML_ERR_UNSUPPORTED_ENCODING) { /* @@ -6047,9 +5980,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { input = xmlNewEntityInputStream(ctxt, ent); xmlPushInput(ctxt, input); if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && - (RAW == '<') && (NXT(1) == '?') && - (NXT(2) == 'x') && (NXT(3) == 'm') && - (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + (memcmp(CUR_PTR, "errNo == XML_ERR_UNSUPPORTED_ENCODING) { /* @@ -6436,7 +6367,8 @@ xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) { * NOTE: misleading but this is handled. */ void -xmlParsePEReference(xmlParserCtxtPtr ctxt) { +xmlParsePEReference(xmlParserCtxtPtr ctxt) +{ const xmlChar *name; xmlEntityPtr entity = NULL; xmlParserInputPtr input; @@ -6444,84 +6376,86 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) { if (RAW == '%') { NEXT; name = xmlParseName(ctxt); - if (name == NULL) { - xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, - "xmlParsePEReference: no name\n"); - } else { - if (RAW == ';') { - NEXT; - if ((ctxt->sax != NULL) && - (ctxt->sax->getParameterEntity != NULL)) - entity = ctxt->sax->getParameterEntity(ctxt->userData, - name); - if (entity == NULL) { - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", ... - * ... The declaration of a parameter entity must precede - * any reference to it... - */ - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", name); - } else { - /* - * [ VC: Entity Declared ] - * In a document with an external subset or external - * parameter entities with "standalone='no'", ... - * ... The declaration of a parameter entity must precede - * any reference to it... - */ - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name, NULL); - ctxt->valid = 0; - } - } else { - /* - * Internal checking in case the entity quest barfed - */ - if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "Internal: %%%s; is not a parameter entity\n", - name, NULL); - } else if (ctxt->input->free != deallocblankswrapper) { - input = xmlNewBlanksWrapperInputStream(ctxt, entity); - xmlPushInput(ctxt, input); - } else { - /* - * TODO !!! - * handle the extra spaces added before and after - * c.f. http://www.w3.org/TR/REC-xml#as-PE - */ - input = xmlNewEntityInputStream(ctxt, entity); - xmlPushInput(ctxt, input); - if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && - (RAW == '<') && (NXT(1) == '?') && - (NXT(2) == 'x') && (NXT(3) == 'm') && - (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { - xmlParseTextDecl(ctxt); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { - /* - * The XML REC instructs us to stop parsing - * right here - */ - ctxt->instate = XML_PARSER_EOF; - return; - } - } - } - } - ctxt->hasPErefs = 1; - } else { - xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); - } - } + if (name == NULL) { + xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED, + "xmlParsePEReference: no name\n"); + } else { + if (RAW == ';') { + NEXT; + if ((ctxt->sax != NULL) && + (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, + name); + if (entity == NULL) { + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", + name); + } else { + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must + * precede any reference to it... + */ + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "PEReference: %%%s; not found\n", + name, NULL); + ctxt->valid = 0; + } + } else { + /* + * Internal checking in case the entity quest barfed + */ + if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) && + (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) { + xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, + "Internal: %%%s; is not a parameter entity\n", + name, NULL); + } else if (ctxt->input->free != deallocblankswrapper) { + input = + xmlNewBlanksWrapperInputStream(ctxt, entity); + xmlPushInput(ctxt, input); + } else { + /* + * TODO !!! + * handle the extra spaces added before and after + * c.f. http://www.w3.org/TR/REC-xml#as-PE + */ + input = xmlNewEntityInputStream(ctxt, entity); + xmlPushInput(ctxt, input); + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + (memcmp(CUR_PTR, "errNo == + XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing + * right here + */ + ctxt->instate = XML_PARSER_EOF; + return; + } + } + } + } + ctxt->hasPErefs = 1; + } else { + xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL); + } + } } } @@ -8052,11 +7986,8 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { int cur, l; int count = 0; - if ((NXT(0) == '<') && (NXT(1) == '!') && - (NXT(2) == '[') && (NXT(3) == 'C') && - (NXT(4) == 'D') && (NXT(5) == 'A') && - (NXT(6) == 'T') && (NXT(7) == 'A') && - (NXT(8) == '[')) { + /* Check 2.6.0 was NXT(0) not RAW */ + if (memcmp(CUR_PTR, "inSubset = 1; xmlParseDocTypeDecl(ctxt); @@ -9035,9 +8946,7 @@ xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) { * Check for the XMLDecl in the Prolog. */ GROW; - if ((RAW == '<') && (NXT(1) == '?') && - (NXT(2) == 'x') && (NXT(3) == 'm') && - (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + if ((memcmp(CUR_PTR, "