From 39a1f9a3a736c66a1ca491f4c4b339f573bd3f39 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Sun, 17 Jan 1999 19:11:59 +0000 Subject: [PATCH] Speed, conformance testing, more parsing, general improvements, Daniel. --- ChangeLog | 9 + Makefile.am | 6 +- SAX.c | 2 + debugXML.c | 2 + encoding.c | 2 - encoding.h | 2 - entities.c | 35 +- entities.h | 2 +- error.c | 2 + include/libxml/encoding.h | 2 - include/libxml/entities.h | 2 +- include/libxml/parser.h | 35 +- include/libxml/tree.h | 52 +- include/libxml/valid.h | 18 + parser.c | 964 ++++++++++++++++++++++++++++++++++---- parser.h | 35 +- tester.c | 18 +- tree.c | 179 ++++--- tree.h | 52 +- valid.c | 162 +++++++ valid.h | 18 + 21 files changed, 1397 insertions(+), 202 deletions(-) create mode 100644 include/libxml/valid.h create mode 100644 valid.c create mode 100644 valid.h diff --git a/ChangeLog b/ChangeLog index 425103ab..d0e3ba7d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Sun Jan 17 20:06:36 CET 1999 Daniel Veillard + + * parser.c, tree.[ch] : more work toward conformance testing, + added a last element to accelerate parsing of very flat structures + started working on internal subset Element content declaration. + * valid.[ch] : first cut at adding code toward validation. + * previous changes had also small impact on most files, especially + the conformance testing using James Clark test suite. + Sun Jan 17 14:45:06 CET 1999 Daniel Veillard * test/* : updated the examples, most of them were not well diff --git a/Makefile.am b/Makefile.am index df550e8f..87a2baea 100644 --- a/Makefile.am +++ b/Makefile.am @@ -16,7 +16,8 @@ libxml_la_SOURCES = \ error.h \ parser.c \ debugXML.c \ - tree.c + tree.c \ + valid.c xmlincdir = $(includedir)/gnome-xml xmlinc_HEADERS = \ @@ -24,7 +25,8 @@ xmlinc_HEADERS = \ encoding.h \ parser.h \ debugXML.h \ - tree.h + tree.h \ + valid.h DEPS = $(top_builddir)/libxml.la LDADDS = $(top_builddir)/libxml.la @Z_LIBS@ diff --git a/SAX.c b/SAX.c index 9b7e504f..46afbb9b 100644 --- a/SAX.c +++ b/SAX.c @@ -1,6 +1,8 @@ /* * SAX.c : Default SAX handler to build a tree. * + * See Copyright for the status of this software. + * * Daniel Veillard */ diff --git a/debugXML.c b/debugXML.c index bb913fd9..5abec63e 100644 --- a/debugXML.c +++ b/debugXML.c @@ -2,6 +2,8 @@ * debugXML.c : This is a set of routines used for debugging the tree * produced by the XML parser. * + * See Copyright for the status of this software. + * * Daniel Veillard */ diff --git a/encoding.c b/encoding.c index ce86fff7..295ca39a 100644 --- a/encoding.c +++ b/encoding.c @@ -16,8 +16,6 @@ * * See Copyright for the status of this software. * - * $Id$ - * * Daniel.Veillard@w3.org */ diff --git a/encoding.h b/encoding.h index 72348c3c..b5a11f99 100644 --- a/encoding.h +++ b/encoding.h @@ -17,8 +17,6 @@ * * See Copyright for the status of this software. * - * $Id$ - * * Daniel.Veillard@w3.org */ diff --git a/entities.c b/entities.c index 30506cb4..ef6aa792 100644 --- a/entities.c +++ b/entities.c @@ -3,7 +3,7 @@ * * See Copyright for the status of this software. * - * $Id$ + * Daniel.Veillard@w3.org */ #include @@ -186,14 +186,15 @@ xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type, const CHAR *ExternalID, const CHAR *SystemID, CHAR *content) { xmlEntitiesTablePtr table; - if (doc->dtd == NULL) { - fprintf(stderr, "xmlAddDtdEntity: document without Dtd !\n"); + if (doc->extSubset == NULL) { + fprintf(stderr, + "xmlAddDtdEntity: document without external subset !\n"); return; } - table = (xmlEntitiesTablePtr) doc->dtd->entities; + table = (xmlEntitiesTablePtr) doc->extSubset->entities; if (table == NULL) { table = xmlCreateEntitiesTable(); - doc->dtd->entities = table; + doc->extSubset->entities = table; } xmlAddEntity(table, name, type, ExternalID, SystemID, content); } @@ -214,12 +215,22 @@ xmlAddDocEntity(xmlDocPtr doc, const CHAR *name, int type, const CHAR *ExternalID, const CHAR *SystemID, CHAR *content) { xmlEntitiesTablePtr table; - table = (xmlEntitiesTablePtr) doc->entities; + if (doc == NULL) { + fprintf(stderr, + "xmlAddDocEntity: document is NULL !\n"); + return; + } + if (doc->intSubset == NULL) { + fprintf(stderr, + "xmlAddDtdEntity: document without internal subset !\n"); + return; + } + table = (xmlEntitiesTablePtr) doc->intSubset->entities; if (table == NULL) { table = xmlCreateEntitiesTable(); - doc->entities = table; + doc->intSubset->entities = table; } - xmlAddEntity(doc->entities, name, type, ExternalID, SystemID, content); + xmlAddEntity(table, name, type, ExternalID, SystemID, content); } /** @@ -238,8 +249,8 @@ xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name) { xmlEntityPtr cur; xmlEntitiesTablePtr table; - if ((doc->dtd != NULL) && (doc->dtd->entities != NULL)) { - table = (xmlEntitiesTablePtr) doc->dtd->entities; + if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->extSubset->entities; for (i = 0;i < table->nb_entities;i++) { cur = &table->table[i]; if (!xmlStrcmp(cur->name, name)) return(cur); @@ -265,8 +276,8 @@ xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) { xmlEntityPtr cur; xmlEntitiesTablePtr table; - if (doc->entities != NULL) { - table = (xmlEntitiesTablePtr) doc->entities; + if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->intSubset->entities; for (i = 0;i < table->nb_entities;i++) { cur = &table->table[i]; if (!xmlStrcmp(cur->name, name)) return(cur); diff --git a/entities.h b/entities.h index 0830101e..86c425fe 100644 --- a/entities.h +++ b/entities.h @@ -3,7 +3,7 @@ * * See Copyright for the status of this software. * - * $Id$ + * Daniel.Veillard@w3.org */ #ifndef __XML_ENTITIES_H__ diff --git a/error.c b/error.c index a67bdbd9..eb93d96e 100644 --- a/error.c +++ b/error.c @@ -1,6 +1,8 @@ /* * error.c: module displaying/handling XML parser errors * + * See Copyright for the status of this software. + * * Daniel Veillard */ diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h index 72348c3c..b5a11f99 100644 --- a/include/libxml/encoding.h +++ b/include/libxml/encoding.h @@ -17,8 +17,6 @@ * * See Copyright for the status of this software. * - * $Id$ - * * Daniel.Veillard@w3.org */ diff --git a/include/libxml/entities.h b/include/libxml/entities.h index 0830101e..86c425fe 100644 --- a/include/libxml/entities.h +++ b/include/libxml/entities.h @@ -3,7 +3,7 @@ * * See Copyright for the status of this software. * - * $Id$ + * Daniel.Veillard@w3.org */ #ifndef __XML_ENTITIES_H__ diff --git a/include/libxml/parser.h b/include/libxml/parser.h index 4cd2ead9..a06d4818 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -3,7 +3,7 @@ * * See Copyright for the status of this software. * - * $Id$ + * Daniel.Veillard@w3.org */ #ifndef __XML_PARSER_H__ @@ -46,6 +46,7 @@ typedef struct xmlParserNodeInfoSeq { typedef struct xmlParserCtxt { struct xmlSAXHandler *sax; /* The SAX handler */ xmlDocPtr doc; /* the document being built */ + int wellFormed; /* is the document well formed */ /* Input stream stack */ xmlParserInputPtr input; /* Current input stream */ @@ -131,15 +132,8 @@ extern xmlSAXHandler xmlDefaultSAXHandler; #include "entities.h" /* - * Interfaces + * CHAR handling */ -extern int xmlParseDocument(xmlParserCtxtPtr ctxt); -extern xmlDocPtr xmlParseDoc(CHAR *cur); -extern xmlDocPtr xmlParseMemory(char *buffer, int size); -extern xmlDocPtr xmlParseFile(const char *filename); -extern xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur); -extern xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size); -extern xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename); extern CHAR *xmlStrdup(const CHAR *input); extern CHAR *xmlStrndup(const CHAR *input, int n); extern CHAR *xmlStrchr(const CHAR *str, CHAR val); @@ -149,6 +143,29 @@ extern int xmlStrlen(const CHAR *str); extern CHAR *xmlStrcat(CHAR *cur, const CHAR *add); extern CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len); +/* + * Interfaces + */ +extern xmlDocPtr xmlParseDoc(CHAR *cur); +extern xmlDocPtr xmlParseMemory(char *buffer, int size); +extern xmlDocPtr xmlParseFile(const char *filename); + +/* + * Recovery mode + */ +extern xmlDocPtr xmlRecoverDoc(CHAR *cur); +extern xmlDocPtr xmlRecoverMemory(char *buffer, int size); +extern xmlDocPtr xmlRecoverFile(const char *filename); + +/* + * Internal routines + */ +extern int xmlParseDocument(xmlParserCtxtPtr ctxt); +extern xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery); +extern xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, + int size, int recovery); +extern xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename, + int recovery); extern void xmlInitParserCtxt(xmlParserCtxtPtr ctx); extern void xmlClearParserCtxt(xmlParserCtxtPtr ctx); extern void xmlSetupParserForBuffer(xmlParserCtxtPtr ctx, const CHAR* buffer, diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 3921f7ea..b6e26cf7 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -4,7 +4,7 @@ * * See Copyright for the status of this software. * - * $Id$ + * Daniel.Veillard@w3.org */ #ifndef __XML_TREE_H__ @@ -15,6 +15,8 @@ extern "C" { #endif +#include + /* * The different element types carried by an XML tree * @@ -61,18 +63,51 @@ typedef unsigned char CHAR; * TODO !!!! */ +#define XML_ATTRIBUTE_NONE 1 +#define XML_ATTRIBUTE_REQUIRED 2 +#define XML_ATTRIBUTE_IMPLIED 3 +#define XML_ATTRIBUTE_FIXED 4 + +#define XML_ATTRIBUTE_STRING 1 +#define XML_ATTRIBUTE_ID 2 +#define XML_ATTRIBUTE_IDREF 3 +#define XML_ATTRIBUTE_IDREFS 4 +#define XML_ATTRIBUTE_ENTITY 5 +#define XML_ATTRIBUTE_ENTITIES 6 +#define XML_ATTRIBUTE_NMTOKEN 7 +#define XML_ATTRIBUTE_NMTOKENS 8 +#define XML_ATTRIBUTE_ENUMERATED 9 + /* * a DTD Element definition. */ +#define XML_ELEMENT_CONTENT_PCDATA 1 +#define XML_ELEMENT_CONTENT_ELEMENT 2 +#define XML_ELEMENT_CONTENT_SEQ 3 +#define XML_ELEMENT_CONTENT_OR 4 + +#define XML_ELEMENT_CONTENT_ONCE 1 +#define XML_ELEMENT_CONTENT_OPT 2 +#define XML_ELEMENT_CONTENT_MULT 3 +#define XML_ELEMENT_CONTENT_PLUS 4 + +typedef struct xmlElementContent { + int type; /* PCDATA, ELEMENT, SEQ or OR */ + int ocur; /* ONCE, OPT, MULT or PLUS */ + const CHAR *name; /* Element name */ + struct xmlElementContent *c1; /* first child */ + struct xmlElementContent *c2; /* second child */ +} xmlElementContent, *xmlElementContentPtr; + #define XML_ELEMENT_TYPE_EMPTY 1 #define XML_ELEMENT_TYPE_ANY 2 #define XML_ELEMENT_TYPE_MIXED 3 #define XML_ELEMENT_TYPE_ELEMENT 4 typedef struct xmlElement { - const CHAR *name; /* Element name */ - int type; /* type (too simple, to extend ...) */ - /* TODO !!! more needed */ + const CHAR *name; /* Element name */ + int type; /* The type */ + xmlElementContentPtr content; /* the allowed element content */ } xmlElement, *xmlElementPtr; /* @@ -132,6 +167,7 @@ typedef struct xmlNode { struct xmlNode *next; /* next sibling link */ struct xmlNode *prev; /* previous sibling link */ struct xmlNode *childs; /* parent->childs link */ + struct xmlNode *last; /* last child link */ struct xmlAttr *properties; /* properties list */ const CHAR *name; /* the name of the node, or the entity */ xmlNs *ns; /* pointer to the associated namespace */ @@ -153,9 +189,9 @@ typedef struct xmlDoc { const CHAR *encoding; /* encoding, if any */ int compression;/* level of zlib compression */ int standalone; /* standalone document (no external refs) */ - struct xmlDtd *dtd; /* the document DTD if available */ + struct xmlDtd *intSubset; /* the document internal subset */ + struct xmlDtd *extSubset; /* the document external subset */ struct xmlNs *oldNs; /* Global namespace, the old way */ - void *entities; /* Hash table for general entities if any */ struct xmlNode *root; /* the document tree */ } xmlDoc, *xmlDocPtr; @@ -169,6 +205,8 @@ extern int xmlIndentTreeOutput; /* try to indent the tree dumps */ /* * Creating/freeing new structures */ +extern xmlDtdPtr xmlCreateIntSubset(xmlDocPtr doc, const CHAR *name, + const CHAR *ExternalID, const CHAR *SystemID); extern xmlDtdPtr xmlNewDtd(xmlDocPtr doc, const CHAR *name, const CHAR *ExternalID, const CHAR *SystemID); extern void xmlFreeDtd(xmlDtdPtr cur); @@ -240,7 +278,7 @@ extern xmlNsPtr xmlCopyNamespaceList(xmlNsPtr cur); */ extern xmlAttrPtr xmlSetProp(xmlNodePtr node, const CHAR *name, const CHAR *value); -extern const CHAR *xmlGetProp(xmlNodePtr node, const CHAR *name); +extern CHAR *xmlGetProp(xmlNodePtr node, const CHAR *name); extern xmlNodePtr xmlStringGetNodeList(xmlDocPtr doc, const CHAR *value); extern xmlNodePtr xmlStringLenGetNodeList(xmlDocPtr doc, const CHAR *value, int len); diff --git a/include/libxml/valid.h b/include/libxml/valid.h new file mode 100644 index 00000000..d6765ff4 --- /dev/null +++ b/include/libxml/valid.h @@ -0,0 +1,18 @@ +/* + * valid.h : interface to the DTD handling and the validity checking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef __XML_VALID_H__ +#define __XML_VALID_H__ +#include "tree.h" + +extern xmlElementPtr xmlAddElementDecl(xmlDtdPtr dtd, char *name, int type, + xmlElementContentPtr content); +extern xmlElementContentPtr xmlNewElementContent(CHAR *name, int type); +extern void xmlFreeElementContent(xmlElementContentPtr cur); +#endif /* __XML_VALID_H__ */ diff --git a/parser.c b/parser.c index 5464b263..a466b8b2 100644 --- a/parser.c +++ b/parser.c @@ -3,7 +3,7 @@ * * See Copyright for the status of this software. * - * $Id$ + * Daniel.Veillard@w3.org */ #ifdef WIN32 @@ -30,6 +30,7 @@ #include "tree.h" #include "parser.h" #include "entities.h" +#include "valid.h" /************************************************************************ * * @@ -182,6 +183,38 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { return(input); } +/** + * xmlNewStringInputStream: + * @ctxt: an XML parser context + * @entity: an Entity pointer + * + * Create a new input stream based on a memory buffer. + * return vakues: the new input stream + */ +xmlParserInputPtr +xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *string) { + xmlParserInputPtr input; + + if (string == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "internal: xmlNewStringInputStream string = NULL\n"); + return(NULL); + } + input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput)); + if (input == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n"); + return(NULL); + } + input->filename = NULL; + input->base = string; + input->cur = string; + input->line = 1; + input->col = 1; + return(input); +} + /* * A few macros needed to help building the parser. */ @@ -198,8 +231,10 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ #define IS_CHAR(c) \ - (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ - (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) + ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ + (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \ + (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \ + ((c) <= 0x10FFFF)) /* * [3] S ::= (#x20 | #x9 | #xD | #xA)+ @@ -579,8 +614,9 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ #define IS_CHAR(c) \ - (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\ - ((c) == 0xa)) + ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ + (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \ + (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF)) /* * [85] BaseChar ::= ... long list see REC ... @@ -935,6 +971,7 @@ xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n", entity->name); + ctxt->wellFormed = 0; return; } len = xmlStrlen(entity->content); @@ -1081,6 +1118,7 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) { if (CUR != '"') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q); + ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; @@ -1092,6 +1130,7 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) { if (CUR != '\'') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q); + ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; @@ -1184,6 +1223,7 @@ xmlParseNamespace(xmlParserCtxtPtr ctxt) { if (!garbage) if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n"); + ctxt->wellFormed = 0; NEXT; } } @@ -1321,6 +1361,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt) { if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished EntityValue\n"); + ctxt->wellFormed = 0; } else { ret = xmlStrncat(ret, q, CUR_PTR - q); NEXT; @@ -1354,6 +1395,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt) { if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished EntityValue\n"); + ctxt->wellFormed = 0; } else { ret = xmlStrncat(ret, q, CUR_PTR - q); NEXT; @@ -1361,6 +1403,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt) { } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n"); + ctxt->wellFormed = 0; } return(ret); @@ -1387,6 +1430,12 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '"')) { + if (CUR == '<') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Unescaped '<' not allowed in attributes values\n"); + ctxt->wellFormed = 0; + } if (CUR == '&') { ret = xmlStrncat(ret, q, CUR_PTR - q); cur = xmlParseReference(ctxt); @@ -1419,6 +1468,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished AttValue\n"); + ctxt->wellFormed = 0; } else { ret = xmlStrncat(ret, q, CUR_PTR - q); NEXT; @@ -1427,6 +1477,12 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '\'')) { + if (CUR == '<') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Unescaped '<' not allowed in attributes values\n"); + ctxt->wellFormed = 0; + } if (CUR == '&') { ret = xmlStrncat(ret, q, CUR_PTR - q); cur = xmlParseReference(ctxt); @@ -1459,6 +1515,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished AttValue\n"); + ctxt->wellFormed = 0; } else { ret = xmlStrncat(ret, q, CUR_PTR - q); NEXT; @@ -1466,6 +1523,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n"); + ctxt->wellFormed = 0; } return(ret); @@ -1494,6 +1552,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; @@ -1506,6 +1565,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; @@ -1513,6 +1573,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; } return(ret); @@ -1540,6 +1601,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (CUR != '"') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; @@ -1552,6 +1614,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { if (!IS_LETTER(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; @@ -1559,6 +1622,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; } return(ret); @@ -1583,9 +1647,17 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '<') && (CUR != '&')) { + if ((CUR == ']') && (NXT(1) == ']') && + (NXT(2) == '>')) { + if (cdata) break; + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Sequence ']]>' not allowed in content\n"); + ctxt->wellFormed = 0; + } + } NEXT; - if ((cdata) && (CUR == ']') && (NXT(1) == ']') && - (NXT(2) == '>')) break; } if (q == CUR_PTR) return; @@ -1621,28 +1693,52 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) { (NXT(2) == 'S') && (NXT(3) == 'T') && (NXT(4) == 'E') && (NXT(5) == 'M')) { SKIP(6); + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Space required after 'SYSTEM'\n"); + ctxt->wellFormed = 0; + } SKIP_BLANKS; URI = xmlParseSystemLiteral(ctxt); - if (URI == NULL) + if (URI == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseExternalID: SYSTEM, no URI\n"); + ctxt->wellFormed = 0; + } } else if ((CUR == 'P') && (NXT(1) == 'U') && (NXT(2) == 'B') && (NXT(3) == 'L') && (NXT(4) == 'I') && (NXT(5) == 'C')) { SKIP(6); + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Space required after 'PUBLIC'\n"); + ctxt->wellFormed = 0; + } SKIP_BLANKS; *publicID = xmlParsePubidLiteral(ctxt); - if (*publicID == NULL) + if (*publicID == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseExternalID: PUBLIC, no Public Identifier\n"); + ctxt->wellFormed = 0; + } + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Space required after the Public Identifier\n"); + ctxt->wellFormed = 0; + } SKIP_BLANKS; URI = xmlParseSystemLiteral(ctxt); - if (URI == NULL) + if (URI == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "xmlParseExternalID: PUBLIC, no URI\n"); + ctxt->wellFormed = 0; + } } return(URI); } @@ -1682,15 +1778,18 @@ xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) { while (IS_CHAR(CUR) && ((CUR == ':') || (CUR != '>') || (*r != '-') || (*q != '-'))) { - if ((*r == '-') && (*q == '-')) + if ((*r == '-') && (*q == '-')) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Comment must not contain '--' (double-hyphen)`\n"); + ctxt->wellFormed = 0; + } NEXT;r++;q++; } if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "Comment not terminated \n