diff --git a/ChangeLog b/ChangeLog index 04d7a0fe..50790ba4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +Tue Dec 28 18:44:22 CET 1999 Daniel Veillard + + * parser.[ch] parserInternals.h: Push parser for XML, + seems to work fine now + * tester.c debugXML.[ch]: Added an XML shell debug facility and + --push for push testing + * xpath.[ch] : cleaned up for Shell usage, added missing APIs + * testSAX.c: added --push + * HTMLtree.[ch] tree.[ch]: new functions for dumping parts of the + subtree + * xmlIO.[ch] : enriched API + fixes for push mode + * entities.[ch]: added the entity content length to the struct. + * xmlmemory.[ch]: new API to show the last entries for the shell + * valid.c: added required attribute testing + * SAX.c: the cdata callback now merge contiguous fragments + * HTMLparser.c: cleanup of some macros + Wed Dec 22 12:20:53 CET 1999 Daniel Veillard * parser.c: fix for PIs name starting with xml diff --git a/HTMLparser.c b/HTMLparser.c index 7276e7c5..e3a1f723 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -57,8 +57,8 @@ * Generic function for accessing stacks in the Parser Context */ -#define PUSH_AND_POP(type, name) \ -int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \ +#define PUSH_AND_POP(scope, type, name) \ +scope int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \ if (ctxt->name##Nr >= ctxt->name##Max) { \ ctxt->name##Max *= 2; \ ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \ @@ -72,7 +72,7 @@ int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \ ctxt->name = value; \ return(ctxt->name##Nr++); \ } \ -type html##name##Pop(htmlParserCtxtPtr ctxt) { \ +scope type html##name##Pop(htmlParserCtxtPtr ctxt) { \ type ret; \ if (ctxt->name##Nr < 0) return(0); \ ctxt->name##Nr--; \ @@ -86,8 +86,8 @@ type html##name##Pop(htmlParserCtxtPtr ctxt) { \ return(ret); \ } \ -PUSH_AND_POP(xmlNodePtr, node) -PUSH_AND_POP(xmlChar*, name) +PUSH_AND_POP(extern, xmlNodePtr, node) +PUSH_AND_POP(extern, xmlChar*, name) /* * Macros for accessing the content. Those should be used only by the parser, @@ -2626,11 +2626,11 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { } -/******************************************************************************** - * * - * Parser contexts handling * - * * - ********************************************************************************/ +/************************************************************************ + * * + * Parser contexts handling * + * * + ************************************************************************/ /** * xmlInitParserCtxt: @@ -2665,6 +2665,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) ctxt->version = NULL; ctxt->encoding = NULL; ctxt->standalone = -1; + ctxt->instate = XML_PARSER_START; /* Allocate the Node stack */ ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr)); @@ -2691,6 +2692,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) ctxt->record_info = 0; ctxt->validate = 0; ctxt->nbChars = 0; + ctxt->checkIndex = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); } diff --git a/HTMLtree.c b/HTMLtree.c index c84daea5..e6142ae6 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -28,6 +28,9 @@ #include "entities.h" #include "valid.h" +static void +htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur); + /** * htmlDtdDump: * @buf: the HTML buffer output @@ -108,7 +111,7 @@ htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { } -static void +void htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur); /** * htmlNodeListDump: @@ -138,7 +141,7 @@ htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { * * Dump an HTML node, recursive behaviour,children are printed too. */ -static void +void htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { htmlElemDescPtr info; @@ -149,6 +152,10 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { /* * Special cases. */ + if (cur->type == XML_HTML_DOCUMENT_NODE) { + htmlDocContentDump(buf, (xmlDocPtr) cur); + return; + } if (cur->type == HTML_TEXT_NODE) { if (cur->content != NULL) { xmlChar *buffer; diff --git a/SAX.c b/SAX.c index be5a0abd..19e0da36 100644 --- a/SAX.c +++ b/SAX.c @@ -1101,14 +1101,22 @@ void cdataBlock(void *ctx, const xmlChar *value, int len) { xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; - xmlNodePtr ret; + xmlNodePtr ret, lastChild; #ifdef DEBUG_SAX fprintf(stderr, "SAX.pcdata(%.10s, %d)\n", value, len); #endif - ret = xmlNewCDataBlock(ctxt->myDoc, value, len); - xmlAddChild(ctxt->node, ret); - /* !!!!! merges */ + lastChild = xmlGetLastChild(ctxt->node); +#ifdef DEBUG_SAX_TREE + fprintf(stderr, "add chars to %s \n", ctxt->node->name); +#endif + if ((lastChild != NULL) && + (lastChild->type == XML_CDATA_SECTION_NODE)) { + xmlTextConcat(lastChild, value, len); + } else { + ret = xmlNewCDataBlock(ctxt->myDoc, value, len); + xmlAddChild(ctxt->node, ret); + } } /* diff --git a/debugXML.c b/debugXML.c index 2344bb22..99972ae1 100644 --- a/debugXML.c +++ b/debugXML.c @@ -13,9 +13,15 @@ #include "config.h" #endif #include +#ifdef HAVE_STDLIB_H +#include +#endif +#include "xmlmemory.h" #include "tree.h" #include "parser.h" #include "debugXML.h" +#include "HTMLtree.h" +#include "HTMLparser.h" #define IS_BLANK(c) \ (((c) == '\n') || ((c) == '\r') || ((c) == '\t') || ((c) == ' ')) @@ -43,7 +49,7 @@ void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) { if (ns->prefix != NULL) fprintf(output, "namespace %s href=", ns->prefix); else - fprintf(output, "default namespace href=", ns->prefix); + fprintf(output, "default namespace href="); xmlDebugDumpString(output, ns->href); fprintf(output, "\n"); @@ -217,7 +223,7 @@ void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth) { } -void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) { +void xmlDebugDumpDocumentHead(FILE *output, xmlDocPtr doc) { if (output == NULL) output = stdout; if (doc == NULL) { fprintf(output, "DOCUMENT == NULL !\n"); @@ -286,10 +292,21 @@ void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) { fprintf(output, "standalone=true\n"); if (doc->oldNs != NULL) xmlDebugDumpNamespaceList(output, doc->oldNs, 0); - if (doc->root != NULL) - xmlDebugDumpNodeList(output, doc->root, 1); } +void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) { + if (output == NULL) output = stdout; + if (doc == NULL) { + fprintf(output, "DOCUMENT == NULL !\n"); + return; + } + xmlDebugDumpDocumentHead(output, doc); + if (((doc->type == XML_DOCUMENT_NODE) || + (doc->type == XML_HTML_DOCUMENT_NODE)) && + (doc->root != NULL)) + xmlDebugDumpNodeList(output, doc->root, 1); +} + void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) { int i; xmlEntityPtr cur; @@ -422,3 +439,807 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) { } else fprintf(output, "No entities in external subset\n"); } + +static int xmlLsCountNode(xmlNodePtr node) { + int ret = 0; + xmlNodePtr list = NULL; + + switch (node->type) { + case XML_ELEMENT_NODE: + list = node->childs; + break; + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + list = ((xmlDocPtr) node)->root; + break; + case XML_ATTRIBUTE_NODE: + list = ((xmlAttrPtr) node)->val; + break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + if (node->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + ret = xmlStrlen(node->content); +#else + ret = xmlBufferLength(node->content); +#endif + } + break; + case XML_ENTITY_REF_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_ENTITY_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + ret = 1; + break; + } + for (;list != NULL;ret++) + list = list->next; + return(ret); +} + +void xmlLsOneNode(FILE *output, xmlNodePtr node) { + switch (node->type) { + case XML_ELEMENT_NODE: + fprintf(output, "-"); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, "a"); + break; + case XML_TEXT_NODE: + fprintf(output, "t"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, "c"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, "e"); + break; + case XML_ENTITY_NODE: + fprintf(output, "E"); + break; + case XML_PI_NODE: + fprintf(output, "p"); + break; + case XML_COMMENT_NODE: + fprintf(output, "c"); + break; + case XML_DOCUMENT_NODE: + fprintf(output, "d"); + break; + case XML_HTML_DOCUMENT_NODE: + fprintf(output, "h"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, "T"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, "F"); + break; + case XML_NOTATION_NODE: + fprintf(output, "N"); + break; + default: + fprintf(output, "?"); + } + if (node->properties != NULL) + fprintf(output, "a"); + else + fprintf(output, "-"); + if (node->nsDef != NULL) + fprintf(output, "n"); + else + fprintf(output, "-"); + + fprintf(output, " %8d ", xmlLsCountNode(node)); + + switch (node->type) { + case XML_ELEMENT_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_ATTRIBUTE_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_TEXT_NODE: + if (node->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + xmlDebugDumpString(output, node->content); +#else + xmlDebugDumpString(output, xmlBufferContent(node->content)); +#endif + } + break; + case XML_CDATA_SECTION_NODE: + break; + case XML_ENTITY_REF_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_ENTITY_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_PI_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_COMMENT_NODE: + break; + case XML_DOCUMENT_NODE: + break; + case XML_HTML_DOCUMENT_NODE: + break; + case XML_DOCUMENT_TYPE_NODE: + break; + case XML_DOCUMENT_FRAG_NODE: + break; + case XML_NOTATION_NODE: + break; + default: + if (node->name != NULL) + fprintf(output, "%s", node->name); + } + fprintf(output, "\n"); +} + +/**************************************************************** + * * + * The XML shell related functions * + * * + ****************************************************************/ + +/* + * TODO: Improvement/cleanups for the XML shell + * - allow to shell out an editor on a subpart + * - cleanup function registrations (with help) and calling + * - provide registration routines + */ + +/** + * xmlShellList: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "ls" + * Does an Unix like listing of the given node (like a directory) + * + * Returns 0 + */ +int +xmlShellList(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node, + xmlNodePtr node2) { + xmlNodePtr cur; + + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + cur = ((xmlDocPtr) node)->root; + } else if (node->childs != NULL) { + cur = node->childs; + } else { + xmlLsOneNode(stdout, node); + return(0); + } + while (cur != NULL) { + xmlLsOneNode(stdout, cur); + cur = cur->next; + } + return(0); +} + +/** + * xmlShellDir: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "dir" + * dumps informations about the node (namespace, attributes, content). + * + * Returns 0 + */ +int +xmlShellDir(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node, + xmlNodePtr node2) { + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + xmlDebugDumpDocumentHead(stdout, (xmlDocPtr) node); + } else if (node->type == XML_ATTRIBUTE_NODE) { + xmlDebugDumpAttr(stdout, (xmlAttrPtr) node, 0); + } else { + xmlDebugDumpOneNode(stdout, node, 0); + } + return(0); +} + +/** + * xmlShellCat: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "cat" + * dumps the serialization node content (XML or HTML). + * + * Returns 0 + */ +int +xmlShellCat(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node, + xmlNodePtr node2) { + xmlElemDump(stdout, ctxt->doc, node); + printf("\n"); + return(0); +} + +/** + * xmlShellLoad: + * @ctxt: the shell context + * @filename: the file name + * @node: unused + * @node2: unused + * + * Implements the XML shell function "load" + * loads a new document specified by the filename + * + * Returns 0 or -1 if loading failed + */ +int +xmlShellLoad(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node, + xmlNodePtr node2) { + xmlDocPtr doc; + int html = 0; + + if (ctxt->doc != NULL) + html = (ctxt->doc->type == XML_HTML_DOCUMENT_NODE); + + if (html) { + doc = htmlParseFile(filename, NULL); + } else { + doc = xmlParseFile(filename); + } + if (doc != NULL) { + if (ctxt->loaded == 1) { + xmlFreeDoc(ctxt->doc); + } + ctxt->loaded = 1; + xmlXPathFreeContext(ctxt->pctxt); + xmlFree(ctxt->filename); + ctxt->doc = doc; + ctxt->node = (xmlNodePtr) doc; + ctxt->pctxt = xmlXPathNewContext(doc); + ctxt->filename = (char *) xmlStrdup((xmlChar *) filename); + } else + return(-1); + return(0); +} + +/** + * xmlShellWrite: + * @ctxt: the shell context + * @filename: the file name + * @node: a node in the tree + * @node2: unused + * + * Implements the XML shell function "write" + * Write the current node to the filename, it saves the serailization + * of the subtree under the @node specified + * + * Returns 0 or -1 in case of error + */ +int +xmlShellWrite(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node, + xmlNodePtr node2) { + if (node == NULL) + return(-1); + if ((filename == NULL) || (filename[0] == 0)) { + fprintf(stderr, "Write command requires a filename argument\n"); + return(-1); + } +#ifdef W_OK + if (access((char *) filename, W_OK)) { + fprintf(stderr, "Cannot write to %s\n", filename); + return(-1); + } +#endif + switch(node->type) { + case XML_DOCUMENT_NODE: + if (xmlSaveFile((char *) filename, ctxt->doc) < -1) { + fprintf(stderr, "Failed to write to %s\n", filename); + return(-1); + } + break; + case XML_HTML_DOCUMENT_NODE: + if (htmlSaveFile((char *) filename, ctxt->doc) < 0) { + fprintf(stderr, "Failed to write to %s\n", filename); + return(-1); + } + break; + default: { + FILE *f; + + f = fopen((char *) filename, "w"); + if (f == NULL) { + fprintf(stderr, "Failed to write to %s\n", filename); + return(-1); + } + xmlElemDump(f, ctxt->doc, node); + fclose(f); + } + } + return(0); +} + +/** + * xmlShellSave: + * @ctxt: the shell context + * @filename: the file name (optionnal) + * @node: unused + * @node2: unused + * + * Implements the XML shell function "save" + * Write the current document to the filename, or it's original name + * + * Returns 0 or -1 in case of error + */ +int +xmlShellSave(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node, + xmlNodePtr node2) { + if (ctxt->doc == NULL) + return(-1); + if ((filename == NULL) || (filename[0] == 0)) + filename = ctxt->filename; +#ifdef W_OK + if (access((char *) filename, W_OK)) { + fprintf(stderr, "Cannot save to %s\n", filename); + return(-1); + } +#endif + switch(ctxt->doc->type) { + case XML_DOCUMENT_NODE: + if (xmlSaveFile((char *) filename, ctxt->doc) < 0) { + fprintf(stderr, "Failed to save to %s\n", filename); + } + break; + case XML_HTML_DOCUMENT_NODE: + if (htmlSaveFile((char *) filename, ctxt->doc) < 0) { + fprintf(stderr, "Failed to save to %s\n", filename); + } + break; + default: + fprintf(stderr, + "To save to subparts of a document use the 'write' command\n"); + return(-1); + + } + return(0); +} + +/** + * xmlShellValidate: + * @ctxt: the shell context + * @dtd: the DTD URI (optionnal) + * @node: unused + * @node2: unused + * + * Implements the XML shell function "validate" + * Validate the document, if a DTD path is provided, then the validation + * is done against the given DTD. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellValidate(xmlShellCtxtPtr ctxt, char *dtd, xmlNodePtr node, + xmlNodePtr node2) { + xmlValidCtxt vctxt; + int res = -1; + + vctxt.userData = stderr; + vctxt.error = (xmlValidityErrorFunc) fprintf; + vctxt.warning = (xmlValidityWarningFunc) fprintf; + + if ((dtd == NULL) || (dtd[0] == 0)) { + res = xmlValidateDocument(&vctxt, ctxt->doc); + } else { + xmlDtdPtr subset; + + subset = xmlParseDTD(NULL, (xmlChar *) dtd); + if (subset != NULL) { + res = xmlValidateDtd(&vctxt, ctxt->doc, subset); + + xmlFreeDtd(subset); + } + } + return(res); +} + +/** + * xmlShellDu: + * @ctxt: the shell context + * @arg: unused + * @tree: a node defining a subtree + * @node2: unused + * + * Implements the XML shell function "du" + * show the structure of the subtree under node @tree + * If @tree is null, the command works on the current node. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellDu(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr tree, + xmlNodePtr node2) { + xmlNodePtr node; + int indent = 0,i; + + if (tree == NULL) return(-1); + node = tree; + while (node != NULL) { + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + printf("/\n"); + } else if (node->type == XML_ELEMENT_NODE) { + for (i = 0;i < indent;i++) + printf(" "); + printf("%s\n", node->name); + } else { + } + + /* + * Browse the full subtree, deep first + */ + + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + node = ((xmlDocPtr) node)->root; + } else if (node->childs != NULL) { + /* deep first */ + node = node->childs; + indent++; + } else if ((node != tree) && (node->next != NULL)) { + /* then siblings */ + node = node->next; + } else if (node != tree) { + /* go up to parents->next if needed */ + while (node != tree) { + if (node->parent != NULL) { + node = node->parent; + indent--; + } + if ((node != tree) && (node->next != NULL)) { + node = node->next; + break; + } + if (node->parent == NULL) { + node = NULL; + break; + } + if (node == tree) { + node = NULL; + break; + } + } + /* exit condition */ + if (node == tree) + node = NULL; + } else + node = NULL; + } + return(0); +} + +/** + * xmlShellPwd: + * @ctxt: the shell context + * @buffer: the output buffer + * @tree: a node + * @node2: unused + * + * Implements the XML shell function "pwd" + * Show the full path from the root to the node, if needed building + * thumblers when similar elements exists at a given ancestor level. + * The output is compatible with XPath commands. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellPwd(xmlShellCtxtPtr ctxt, char *buffer, xmlNodePtr node, + xmlNodePtr node2) { + xmlNodePtr cur, tmp, next; + char buf[500]; + char sep; + const char *name; + int occur = 0; + + buffer[0] = 0; + if (node == NULL) return(-1); + cur = node; + do { + name = ""; + sep= '?'; + occur = 0; + if ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + sep = '/'; + next = NULL; + } else if (cur->type == XML_ELEMENT_NODE) { + sep = '/'; + name = (const char *)cur->name; + next = cur->parent; + + /* + * Thumbler index computation + */ + tmp = cur->prev; + while (tmp != NULL) { + if (!xmlStrcmp(cur->name, tmp->name)) + occur++; + tmp = tmp->prev; + } + if (occur == 0) { + tmp = cur->next; + while (tmp != NULL) { + if (!xmlStrcmp(cur->name, tmp->name)) + occur++; + tmp = tmp->next; + } + if (occur != 0) occur = 1; + } else + occur++; + } else if (cur->type == XML_ATTRIBUTE_NODE) { + sep = '@'; + name = (const char *) (((xmlAttrPtr) cur)->name); + next = ((xmlAttrPtr) cur)->node; + } else { + next = cur->parent; + } + if (occur == 0) + sprintf(buf, "%c%s%s", sep, name, buffer); + else + sprintf(buf, "%c%s[%d]%s", sep, name, occur, buffer); + strcpy(buffer, buf); + cur = next; + } while (cur != NULL); + return(0); +} + +/** + * xmlShell + * @doc: the initial document + * @filename: the output buffer + * @input: the line reading function + * @output: the output FILE* + * + * Implements the XML shell + * This allow to load, validate, view, modify and save a document + * using a environment similar to a UNIX commandline. + */ +void +xmlShell(xmlDocPtr doc, char *filename, xmlShellReadlineFunc input, + FILE *output) { + char prompt[500] = "/ > "; + char *cmdline = NULL; + int nbargs; + char command[100]; + char arg[400]; + xmlShellCtxtPtr ctxt; + xmlXPathObjectPtr list; + + if (doc == NULL) + return; + if (filename == NULL) + return; + if (input == NULL) + return; + if (output == NULL) + return; + ctxt = (xmlShellCtxtPtr) xmlMalloc(sizeof(xmlShellCtxt)); + if (ctxt == NULL) + return; + ctxt->loaded = 0; + ctxt->doc = doc; + ctxt->input = input; + ctxt->output = output; + ctxt->filename = (char *) xmlStrdup((xmlChar *) filename); + ctxt->node = (xmlNodePtr) ctxt->doc; + + ctxt->pctxt = xmlXPathNewContext(ctxt->doc); + if (ctxt->pctxt == NULL) { + xmlFree(ctxt); + return; + } + while (1) { + if (ctxt->node == (xmlNodePtr) ctxt->doc) + sprintf(prompt, "%s > ", "/"); + else if (ctxt->node->name) + sprintf(prompt, "%s > ", ctxt->node->name); + else + sprintf(prompt, "? > "); + + cmdline = ctxt->input(prompt); + if (cmdline == NULL) break; + + command[0] = 0; + arg[0] = 0; + nbargs = sscanf(cmdline, "%s %s", command, arg); + + if (command[0] == 0) continue; + if (!strcmp(command, "exit")) + break; + if (!strcmp(command, "quit")) + break; + if (!strcmp(command, "bye")) + break; + if (!strcmp(command, "validate")) { + xmlShellValidate(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "load")) { + xmlShellLoad(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "save")) { + xmlShellSave(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "write")) { + xmlShellWrite(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "free")) { + if (arg[0] == 0) { + xmlMemShow(stdout, 0); + } else { + int len = 0; + sscanf(arg, "%d", &len); + xmlMemShow(stdout, len); + } + } else if (!strcmp(command, "pwd")) { + char dir[500]; + if (!xmlShellPwd(ctxt, dir, ctxt->node, NULL)) + printf("%s\n", dir); + } else if (!strcmp(command, "du")) { + xmlShellDu(ctxt, NULL, ctxt->node, NULL); + } else if ((!strcmp(command, "ls")) || + (!strcmp(command, "dir"))) { + int dir = (!strcmp(command, "dir")); + if (arg[0] == 0) { + if (dir) + xmlShellDir(ctxt, NULL, ctxt->node, NULL); + else + xmlShellList(ctxt, NULL, ctxt->node, NULL); + } else { + ctxt->pctxt->node = ctxt->node; + if (ctxt->pctxt->nodelist != NULL) + xmlXPathFreeNodeSet(ctxt->pctxt->nodelist); + ctxt->pctxt->nodelist = xmlXPathNodeSetCreate(ctxt->node); + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + fprintf(stderr, "%s: no such node\n", arg); + break; + case XPATH_NODESET: { + int i; + + for (i = 0;i < list->nodesetval->nodeNr;i++) { + if (dir) + xmlShellDir(ctxt, NULL, + list->nodesetval->nodeTab[i], NULL); + else + xmlShellList(ctxt, NULL, + list->nodesetval->nodeTab[i], NULL); + } + break; + } + case XPATH_BOOLEAN: + fprintf(stderr, "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + fprintf(stderr, "%s is a number\n", arg); + break; + case XPATH_STRING: + fprintf(stderr, "%s is a string\n", arg); + break; + } + xmlXPathFreeNodeSetList(list); + } else { + fprintf(stderr, "%s: no such node\n", arg); + } + if (ctxt->pctxt->nodelist != NULL) + xmlXPathFreeNodeSet(ctxt->pctxt->nodelist); + ctxt->pctxt->nodelist = NULL; + } + } else if (!strcmp(command, "cd")) { + if (arg[0] == 0) { + ctxt->node = (xmlNodePtr) ctxt->doc; + } else { + ctxt->pctxt->node = ctxt->node; + if (ctxt->pctxt->nodelist != NULL) + xmlXPathFreeNodeSet(ctxt->pctxt->nodelist); + ctxt->pctxt->nodelist = xmlXPathNodeSetCreate(ctxt->node); + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + fprintf(stderr, "%s: no such node\n", arg); + break; + case XPATH_NODESET: + if (list->nodesetval->nodeNr == 1) { + ctxt->node = list->nodesetval->nodeTab[0]; + } else + fprintf(stderr, "%s is a %d Node Set\n", + arg, list->nodesetval->nodeNr); + break; + case XPATH_BOOLEAN: + fprintf(stderr, "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + fprintf(stderr, "%s is a number\n", arg); + break; + case XPATH_STRING: + fprintf(stderr, "%s is a string\n", arg); + break; + } + xmlXPathFreeNodeSetList(list); + } else { + fprintf(stderr, "%s: no such node\n", arg); + } + if (ctxt->pctxt->nodelist != NULL) + xmlXPathFreeNodeSet(ctxt->pctxt->nodelist); + ctxt->pctxt->nodelist = NULL; + } + } else if (!strcmp(command, "cat")) { + if (arg[0] == 0) { + xmlShellCat(ctxt, NULL, ctxt->node, NULL); + } else { + ctxt->pctxt->node = ctxt->node; + if (ctxt->pctxt->nodelist != NULL) + xmlXPathFreeNodeSet(ctxt->pctxt->nodelist); + ctxt->pctxt->nodelist = xmlXPathNodeSetCreate(ctxt->node); + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + fprintf(stderr, "%s: no such node\n", arg); + break; + case XPATH_NODESET: { + int i; + + for (i = 0;i < list->nodesetval->nodeNr;i++) { + if (i > 0) printf(" -------\n"); + xmlShellCat(ctxt, NULL, + list->nodesetval->nodeTab[i], NULL); + } + break; + } + case XPATH_BOOLEAN: + fprintf(stderr, "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + fprintf(stderr, "%s is a number\n", arg); + break; + case XPATH_STRING: + fprintf(stderr, "%s is a string\n", arg); + break; + } + xmlXPathFreeNodeSetList(list); + } else { + fprintf(stderr, "%s: no such node\n", arg); + } + if (ctxt->pctxt->nodelist != NULL) + xmlXPathFreeNodeSet(ctxt->pctxt->nodelist); + ctxt->pctxt->nodelist = NULL; + } + } else { + fprintf(stderr, "Unknown command %s\n", command); + } + free(cmdline); /* not xmlFree here ! */ + } + xmlXPathFreeContext(ctxt->pctxt); + if (ctxt->loaded) { + xmlFreeDoc(ctxt->doc); + } + xmlFree(ctxt); + if (cmdline != NULL) + free(cmdline); /* not xmlFree here ! */ +} + diff --git a/debugXML.h b/debugXML.h index 9c77496e..8774f0bd 100644 --- a/debugXML.h +++ b/debugXML.h @@ -7,19 +7,97 @@ #ifndef __DEBUG_XML__ #define __DEBUG_XML__ +#include #include "tree.h" +#include "xpath.h" #ifdef __cplusplus extern "C" { #endif -extern void xmlDebugDumpString(FILE *output, const xmlChar *str); -extern void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth); -extern void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth); -extern void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth); -extern void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth); -extern void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth); -extern void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc); -extern void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc); + +/* + * The standard Dump routines + */ +void xmlDebugDumpString (FILE *output, + const xmlChar *str); +void xmlDebugDumpAttr (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpAttrList (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpOneNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNodeList (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpDocumentHead(FILE *output, + xmlDocPtr doc); +void xmlDebugDumpDocument (FILE *output, + xmlDocPtr doc); +void xmlDebugDumpEntities (FILE *output, + xmlDocPtr doc); +void xmlLsOneNode (FILE *output, + xmlNodePtr node); + +/**************************************************************** + * * + * The XML shell related structures and functions * + * * + ****************************************************************/ + +/** + * xmlShellReadlineFunc: + * @prompt: a string prompt + * + * This is a generic signature for the XML shell input function + * + * Returns a string which will be freed by the Shell + */ +typedef char * (* xmlShellReadlineFunc)(char *prompt); + +/* + * The shell context itself + * TODO: add the defined function tables. + */ +typedef struct xmlShellCtxt { + char *filename; + xmlDocPtr doc; + xmlNodePtr node; + xmlXPathContextPtr pctxt; + int loaded; + FILE *output; + xmlShellReadlineFunc input; +} xmlShellCtxt, *xmlShellCtxtPtr; + +/** + * xmlShellCmd: + * @ctxt: a shell context + * @arg: a string argument + * @node: a first node + * @node2: a second node + * + * This is a generic signature for the XML shell functions + * + * Returns an int, negative returns indicating errors + */ +typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); + +/* + * The Shell interface. + */ +void xmlShell (xmlDocPtr doc, + char *filename, + xmlShellReadlineFunc input, + FILE *output); + #ifdef __cplusplus } #endif diff --git a/doc/xml.html b/doc/xml.html index 40d6e0c3..8bd76392 100644 --- a/doc/xml.html +++ b/doc/xml.html @@ -128,6 +128,9 @@ for really accurate description
  • working on HTML and XML links recognition layers, get in touch with me if you want to test those.
  • +
  • a Push interface for the XML parser
  • +
  • an shell like interface to the document tree (try tester --shell :-)
  • +
  • lots of bug fixes and improvement added over XMas hollidays

1.8.2: Dec 21 1999

@@ -901,6 +904,6 @@ base under gnome-xml/example

Daniel Veillard

-

$Id: xml.html,v 1.16 1997/01/04 02:49:42 veillard Exp $

+

$Id: xml.html,v 1.17 1999/12/21 15:35:27 veillard Exp $

diff --git a/entities.c b/entities.c index 2e736843..027acd05 100644 --- a/entities.c +++ b/entities.c @@ -114,10 +114,13 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type, cur->SystemID = xmlStrdup(SystemID); else cur->SystemID = NULL; - if (content != NULL) - cur->content = xmlStrdup(content); - else + if (content != NULL) { + cur->length = xmlStrlen(content); + cur->content = xmlStrndup(content, cur->length); + } else { + cur->length = 0; cur->content = NULL; + } cur->orig = NULL; table->nb_entities++; } diff --git a/entities.h b/entities.h index 3af38e38..84ad7c16 100644 --- a/entities.h +++ b/entities.h @@ -34,6 +34,7 @@ typedef struct xmlEntity { const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */ const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ xmlChar *content; /* The entity content or ndata if unparsed */ + int length; /* the content length */ xmlChar *orig; /* The entity cont without ref substitution */ } xmlEntity; typedef xmlEntity *xmlEntityPtr; diff --git a/include/libxml/debugXML.h b/include/libxml/debugXML.h index 9c77496e..8774f0bd 100644 --- a/include/libxml/debugXML.h +++ b/include/libxml/debugXML.h @@ -7,19 +7,97 @@ #ifndef __DEBUG_XML__ #define __DEBUG_XML__ +#include #include "tree.h" +#include "xpath.h" #ifdef __cplusplus extern "C" { #endif -extern void xmlDebugDumpString(FILE *output, const xmlChar *str); -extern void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth); -extern void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth); -extern void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth); -extern void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth); -extern void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth); -extern void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc); -extern void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc); + +/* + * The standard Dump routines + */ +void xmlDebugDumpString (FILE *output, + const xmlChar *str); +void xmlDebugDumpAttr (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpAttrList (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpOneNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNodeList (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpDocumentHead(FILE *output, + xmlDocPtr doc); +void xmlDebugDumpDocument (FILE *output, + xmlDocPtr doc); +void xmlDebugDumpEntities (FILE *output, + xmlDocPtr doc); +void xmlLsOneNode (FILE *output, + xmlNodePtr node); + +/**************************************************************** + * * + * The XML shell related structures and functions * + * * + ****************************************************************/ + +/** + * xmlShellReadlineFunc: + * @prompt: a string prompt + * + * This is a generic signature for the XML shell input function + * + * Returns a string which will be freed by the Shell + */ +typedef char * (* xmlShellReadlineFunc)(char *prompt); + +/* + * The shell context itself + * TODO: add the defined function tables. + */ +typedef struct xmlShellCtxt { + char *filename; + xmlDocPtr doc; + xmlNodePtr node; + xmlXPathContextPtr pctxt; + int loaded; + FILE *output; + xmlShellReadlineFunc input; +} xmlShellCtxt, *xmlShellCtxtPtr; + +/** + * xmlShellCmd: + * @ctxt: a shell context + * @arg: a string argument + * @node: a first node + * @node2: a second node + * + * This is a generic signature for the XML shell functions + * + * Returns an int, negative returns indicating errors + */ +typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); + +/* + * The Shell interface. + */ +void xmlShell (xmlDocPtr doc, + char *filename, + xmlShellReadlineFunc input, + FILE *output); + #ifdef __cplusplus } #endif diff --git a/include/libxml/entities.h b/include/libxml/entities.h index 3af38e38..84ad7c16 100644 --- a/include/libxml/entities.h +++ b/include/libxml/entities.h @@ -34,6 +34,7 @@ typedef struct xmlEntity { const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */ const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ xmlChar *content; /* The entity content or ndata if unparsed */ + int length; /* the content length */ xmlChar *orig; /* The entity cont without ref substitution */ } xmlEntity; typedef xmlEntity *xmlEntityPtr; diff --git a/include/libxml/parser.h b/include/libxml/parser.h index 8a6443cc..196aeb3b 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -40,11 +40,12 @@ typedef struct xmlParserInput { const char *filename; /* The file analyzed, if any */ const char *directory; /* the directory/base of teh file */ - const xmlChar *base; /* Base of the array to parse */ - const xmlChar *cur; /* Current char being parsed */ + const xmlChar *base; /* Base of the array to parse */ + const xmlChar *cur; /* Current char being parsed */ + int length; /* length if known */ int line; /* Current line */ int col; /* Current column */ - int consumed; /* How many xmlChars were already consumed */ + int consumed; /* How many xmlChars already consumed */ xmlParserInputDeallocate free; /* function to deallocate the base */ } xmlParserInput; typedef xmlParserInput *xmlParserInputPtr; @@ -77,20 +78,25 @@ typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; /** - * The parser is not a state based parser, but we need to maintain + * The parser is not (yet) a state based parser, but we need to maintain * minimum state informations, especially for entities processing. */ typedef enum { - XML_PARSER_EOF = 0, - XML_PARSER_PROLOG, - XML_PARSER_CONTENT, - XML_PARSER_ENTITY_DECL, - XML_PARSER_ENTITY_VALUE, - XML_PARSER_ATTRIBUTE_VALUE, - XML_PARSER_DTD, - XML_PARSER_EPILOG, - XML_PARSER_COMMENT, - XML_PARSER_CDATA_SECTION + XML_PARSER_EOF = -1, /* nothing is to be parsed */ + XML_PARSER_START = 0, /* nothing has been parsed */ + XML_PARSER_MISC, /* Misc* before int subset */ + XML_PARSER_PI, /* Whithin a processing instruction */ + XML_PARSER_DTD, /* within some DTD content */ + XML_PARSER_PROLOG, /* Misc* after internal subset */ + XML_PARSER_COMMENT, /* within a comment */ + XML_PARSER_START_TAG, /* within a start tag */ + XML_PARSER_CONTENT, /* within the content */ + XML_PARSER_CDATA_SECTION, /* within a CDATA section */ + XML_PARSER_END_TAG, /* within a closing tag */ + XML_PARSER_ENTITY_DECL, /* within an entity declaration */ + XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ + XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ + XML_PARSER_EPILOG /* the Misc* after the last end tag */ } xmlParserInputState; /** @@ -151,6 +157,7 @@ typedef struct _xmlParserCtxt { xmlChar * *nameTab; /* array of nodes */ long nbChars; /* number of xmlChar processed */ + long checkIndex; /* used by progressive parsing lookup */ } _xmlParserCtxt; typedef _xmlParserCtxt xmlParserCtxt; typedef xmlParserCtxt *xmlParserCtxtPtr; @@ -347,13 +354,35 @@ xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID, xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, const xmlChar *ExternalID, const xmlChar *SystemID); +/** + * SAX initialization routines + */ +void xmlDefaultSAXHandlerInit(void); +void htmlDefaultSAXHandlerInit(void); + +/** + * Parser contexts handling. + */ void xmlInitParserCtxt (xmlParserCtxtPtr ctxt); void xmlClearParserCtxt (xmlParserCtxtPtr ctxt); +void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, const xmlChar* buffer, const char* filename); -void xmlDefaultSAXHandlerInit(void); -void htmlDefaultSAXHandlerInit(void); +xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); + +/** + * Interfaces for the Push mode + */ +xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename); +int xmlParseChunk (xmlParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); /** * Node infos diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index 9da48462..5a7b7ffe 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -435,9 +435,10 @@ typedef unsigned char CHARVAL; * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ #define IS_CHAR(c) \ - ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ - (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \ - (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF)) + ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \ + ((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ + (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \ + (((c) >= 0x10000) && ((c) <= 0x10FFFF))) /* * [85] BaseChar ::= ... long list see REC ... @@ -595,8 +596,7 @@ void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt, xmlChar **value); xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt); -void xmlParseEndTag (xmlParserCtxtPtr ctxt, - xmlChar *tagname); +void xmlParseEndTag (xmlParserCtxtPtr ctxt); void xmlParseCDSect (xmlParserCtxtPtr ctxt); void xmlParseContent (xmlParserCtxtPtr ctxt); void xmlParseElement (xmlParserCtxtPtr ctxt); diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 3a0285bc..cce61681 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -526,6 +526,9 @@ void xmlDocDumpMemory (xmlDocPtr cur, int *size); void xmlDocDump (FILE *f, xmlDocPtr cur); +void xmlElemDump (FILE *f, + xmlDocPtr cur, + xmlNodePtr elem); int xmlSaveFile (const char *filename, xmlDocPtr cur); diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h index 2bdba5d8..bf43de25 100644 --- a/include/libxml/xmlIO.h +++ b/include/libxml/xmlIO.h @@ -37,6 +37,9 @@ typedef xmlParserInputBuffer *xmlParserInputBufferPtr; * Interfaces */ +xmlParserInputBufferPtr + xmlAllocParserInputBuffer (xmlCharEncoding enc); + xmlParserInputBufferPtr xmlParserInputBufferCreateFilename (const char *filename, xmlCharEncoding enc); diff --git a/include/libxml/xmlmemory.h b/include/libxml/xmlmemory.h index 5c1b4774..64477a1c 100644 --- a/include/libxml/xmlmemory.h +++ b/include/libxml/xmlmemory.h @@ -1,5 +1,5 @@ /* - * memory.h: interface for the memory allocation debug. + * xmlmemory.h: interface for the memory allocation debug. * * Daniel.Veillard@w3.org */ @@ -24,6 +24,7 @@ #define xmlInitMemory() #define xmlMemoryDump() #define xmlMemDisplay(x) +#define xmlMemShow(x, d) #else /* ! NO_DEBUG_MEMORY */ #include @@ -51,6 +52,7 @@ void xmlFree (void *ptr); char * xmlMemStrdup (const char *str); int xmlMemUsed (void); void xmlMemDisplay (FILE *fp); +void xmlMemShow (FILE *fp, int nr); void xmlMemoryDump (void); int xmlInitMemory (void); diff --git a/include/libxml/xpath.h b/include/libxml/xpath.h index 149b0beb..84c83052 100644 --- a/include/libxml/xpath.h +++ b/include/libxml/xpath.h @@ -205,6 +205,9 @@ xmlXPathObjectPtr xmlXPathEval (const xmlChar *str, void xmlXPathFreeObject (xmlXPathObjectPtr obj); xmlXPathObjectPtr xmlXPathEvalExpression (const xmlChar *str, xmlXPathContextPtr ctxt); +xmlNodeSetPtr xmlXPathNodeSetCreate (xmlNodePtr val); +void xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj); +void xmlXPathFreeNodeSet (xmlNodeSetPtr obj); #ifdef __cplusplus } diff --git a/parser.c b/parser.c index 98656d49..2323e8a8 100644 --- a/parser.c +++ b/parser.c @@ -43,6 +43,9 @@ #include "xmlIO.h" #include "xml-error.h" +#define XML_PARSER_BIG_BUFFER_SIZE 1000 +#define XML_PARSER_BUFFER_SIZE 100 + const char *xmlParserVersion = LIBXML_VERSION; /* @@ -61,6 +64,9 @@ const char *xmlW3CPIs[] = { ************************************************************************/ /* #define DEBUG_INPUT */ +/* #define DEBUG_STACK */ +/* #define DEBUG_PUSH */ + #define INPUT_CHUNK 250 /* we need to keep enough input to show errors in context */ @@ -170,7 +176,14 @@ xmlParserInputGrow(xmlParserInputPtr in, int len) { return(0); } - ret = xmlParserInputBufferGrow(in->buf, len); + if ((in->buf->netIO != NULL) || (in->buf->file != NULL) || +#ifdef HAVE_ZLIB_H + (in->buf->gzfile != NULL) || +#endif + (in->buf->fd >= 0)) + ret = xmlParserInputBufferGrow(in->buf, len); + else + return(0); /* * NOTE : in->base may be a "dandling" i.e. freed pointer in this @@ -256,8 +269,8 @@ xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, * Generic function for accessing stacks in the Parser Context */ -#define PUSH_AND_POP(type, name) \ -extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \ +#define PUSH_AND_POP(scope, type, name) \ +scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ if (ctxt->name##Nr >= ctxt->name##Max) { \ ctxt->name##Max *= 2; \ ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \ @@ -271,7 +284,7 @@ extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \ ctxt->name = value; \ return(ctxt->name##Nr++); \ } \ -extern type name##Pop(xmlParserCtxtPtr ctxt) { \ +scope type name##Pop(xmlParserCtxtPtr ctxt) { \ type ret; \ if (ctxt->name##Nr <= 0) return(0); \ ctxt->name##Nr--; \ @@ -284,8 +297,9 @@ extern type name##Pop(xmlParserCtxtPtr ctxt) { \ return(ret); \ } \ -PUSH_AND_POP(xmlParserInputPtr, input) -PUSH_AND_POP(xmlNodePtr, node) +PUSH_AND_POP(extern, xmlParserInputPtr, input) +PUSH_AND_POP(extern, xmlNodePtr, node) +PUSH_AND_POP(extern, xmlChar*, name) /* * Macros for accessing the content. Those should be used only by the parser, @@ -444,6 +458,7 @@ xmlNewInputStream(xmlParserCtxtPtr ctxt) { input->buf = NULL; input->free = NULL; input->consumed = 0; + input->length = 0; return(input); } @@ -507,6 +522,7 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { input->filename = (char *) entity->SystemID; /* TODO !!! char <- xmlChar */ input->base = entity->content; input->cur = entity->content; + input->length = entity->length; return(input); } @@ -535,6 +551,7 @@ xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { } input->base = buffer; input->cur = buffer; + input->length = xmlStrlen(buffer); return(input); } @@ -556,7 +573,7 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { if (ctxt == NULL) return(NULL); buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); if (buf == NULL) { - char name[1024]; + char name[XML_PARSER_BIG_BUFFER_SIZE]; if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) { #ifdef WIN32 @@ -638,7 +655,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->hasPErefs = 0; ctxt->html = 0; ctxt->external = 0; - ctxt->instate = XML_PARSER_PROLOG; + ctxt->instate = XML_PARSER_START; ctxt->token = 0; ctxt->directory = NULL; @@ -648,6 +665,12 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->nodeMax = 10; ctxt->node = NULL; + /* Allocate the Name stack */ + ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); + ctxt->nameNr = 0; + ctxt->nameMax = 10; + ctxt->name = NULL; + if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler; else { ctxt->sax = sax; @@ -664,6 +687,8 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; ctxt->record_info = 0; ctxt->nbChars = 0; + ctxt->checkIndex = 0; + ctxt->errNo = XML_ERR_OK; xmlInitNodeInfoSeq(&ctxt->node_seq); } @@ -679,13 +704,17 @@ void xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) { xmlParserInputPtr input; + xmlChar *oldname; if (ctxt == NULL) return; while ((input = inputPop(ctxt)) != NULL) { xmlFreeInputStream(input); } - + while ((oldname = namePop(ctxt)) != NULL) { + xmlFree(oldname); + } + if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab); if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); if (ctxt->version != NULL) xmlFree((char *) ctxt->version); @@ -970,9 +999,15 @@ fprintf(stderr, "xmlParserHandleReference : ctxt->token != 0\n"); GROW; if ((CUR == '&') && (NXT(1) == '#')) { switch(ctxt->instate) { + case XML_PARSER_ENTITY_DECL: + case XML_PARSER_PI: case XML_PARSER_CDATA_SECTION: - return; case XML_PARSER_COMMENT: + /* we just ignore it there */ + return; + case XML_PARSER_START_TAG: + return; + case XML_PARSER_END_TAG: return; case XML_PARSER_EOF: ctxt->errNo = XML_ERR_CHARREF_AT_EOF; @@ -981,6 +1016,8 @@ fprintf(stderr, "xmlParserHandleReference : ctxt->token != 0\n"); ctxt->wellFormed = 0; return; case XML_PARSER_PROLOG: + case XML_PARSER_START: + case XML_PARSER_MISC: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n"); @@ -999,9 +1036,6 @@ fprintf(stderr, "xmlParserHandleReference : ctxt->token != 0\n"); "CharRef are forbiden in DTDs!\n"); ctxt->wellFormed = 0; return; - case XML_PARSER_ENTITY_DECL: - /* we just ignore it there */ - return; case XML_PARSER_ENTITY_VALUE: /* * NOTE: in the case of entity values, we don't do the @@ -1023,8 +1057,13 @@ fprintf(stderr, "xmlParserHandleReference : ctxt->token != 0\n"); switch(ctxt->instate) { case XML_PARSER_CDATA_SECTION: return; + case XML_PARSER_PI: case XML_PARSER_COMMENT: return; + case XML_PARSER_START_TAG: + return; + case XML_PARSER_END_TAG: + return; case XML_PARSER_EOF: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) @@ -1032,6 +1071,8 @@ fprintf(stderr, "xmlParserHandleReference : ctxt->token != 0\n"); ctxt->wellFormed = 0; return; case XML_PARSER_PROLOG: + case XML_PARSER_START: + case XML_PARSER_MISC: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Reference in prolog!\n"); @@ -1195,6 +1236,10 @@ fprintf(stderr, "xmlParserHandlePEReference : ctxt->token != 0\n"); return; case XML_PARSER_COMMENT: return; + case XML_PARSER_START_TAG: + return; + case XML_PARSER_END_TAG: + return; case XML_PARSER_EOF: ctxt->errNo = XML_ERR_PEREF_AT_EOF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) @@ -1202,6 +1247,8 @@ fprintf(stderr, "xmlParserHandlePEReference : ctxt->token != 0\n"); ctxt->wellFormed = 0; return; case XML_PARSER_PROLOG: + case XML_PARSER_START: + case XML_PARSER_MISC: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); @@ -1210,6 +1257,7 @@ fprintf(stderr, "xmlParserHandlePEReference : ctxt->token != 0\n"); case XML_PARSER_ENTITY_DECL: case XML_PARSER_CONTENT: case XML_PARSER_ATTRIBUTE_VALUE: + case XML_PARSER_PI: /* we just ignore it there */ return; case XML_PARSER_EPILOG: @@ -1355,7 +1403,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, /* * allocate a translation buffer. */ - buffer_size = 1000; + buffer_size = XML_PARSER_BIG_BUFFER_SIZE; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { perror("xmlDecodeEntities: malloc failed"); @@ -1382,7 +1430,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, current = ent->content; while (*current != 0) { *out++ = *current++; - if (out - buffer > buffer_size - 100) { + if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { int index = out - buffer; growBuffer(buffer); @@ -1396,7 +1444,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, nbchars += i + 2; *out++ = '&'; - if (out - buffer > buffer_size - i - 100) { + if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) { int index = out - buffer; growBuffer(buffer); @@ -1427,7 +1475,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, /* invalid for UTF-8 , use COPY(out); !!!!!! */ *out++ = cur; nbchars++; - if (out - buffer > buffer_size - 100) { + if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { int index = out - buffer; growBuffer(buffer); @@ -1471,7 +1519,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, /* * allocate a translation buffer. */ - buffer_size = 500; + buffer_size = XML_PARSER_BIG_BUFFER_SIZE; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { perror("xmlDecodeEntities: malloc failed"); @@ -1498,7 +1546,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, current = ent->content; while (*current != 0) { *out++ = *current++; - if (out - buffer > buffer_size - 100) { + if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { int index = out - buffer; growBuffer(buffer); @@ -1510,7 +1558,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, const xmlChar *cur = ent->name; *out++ = '&'; - if (out - buffer > buffer_size - i - 100) { + if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) { int index = out - buffer; growBuffer(buffer); @@ -1526,7 +1574,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, current = ent->content; while (*current != 0) { *out++ = *current++; - if (out - buffer > buffer_size - 100) { + if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { int index = out - buffer; growBuffer(buffer); @@ -1537,7 +1585,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, } else { /* invalid for UTF-8 , use COPY(out); !!!!!! */ *out++ = cur; - if (out - buffer > buffer_size - 100) { + if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) { int index = out - buffer; growBuffer(buffer); @@ -2016,9 +2064,6 @@ xmlStrcat(xmlChar *cur, const xmlChar *add) { * * Is this a sequence of blank chars that one can ignore ? * - * TODO: Whether white space are significant has to be checked accordingly - * to DTD informations if available - * * Returns 1 if ignorable 0 otherwise. */ @@ -2284,7 +2329,7 @@ xmlChar * xmlParseQuotedString(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; - int size = 100; + int size = XML_PARSER_BUFFER_SIZE; xmlChar c; buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); @@ -2668,7 +2713,7 @@ xmlChar * xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { xmlChar *buf = NULL; int len = 0; - int size = 100; + int size = XML_PARSER_BUFFER_SIZE; xmlChar c; xmlChar stop; xmlChar *ret = NULL; @@ -2823,7 +2868,7 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { /* * allocate a translation buffer. */ - buffer_size = 100; + buffer_size = XML_PARSER_BUFFER_SIZE; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { perror("xmlParseAttValue: malloc failed"); @@ -2836,7 +2881,6 @@ xmlParseAttValue(xmlParserCtxtPtr ctxt) { */ cur = CUR; while ((cur != limit) && (cur != '<')) { - if (cur == 0) break; if ((cur == '&') && (NXT(1) == '#')) { int val = xmlParseCharRef(ctxt); @@ -2925,7 +2969,7 @@ xmlChar * xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; - int size = 100; + int size = XML_PARSER_BUFFER_SIZE; xmlChar cur; xmlChar stop; @@ -2996,7 +3040,7 @@ xmlChar * xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; - int size = 100; + int size = XML_PARSER_BUFFER_SIZE; xmlChar cur; xmlChar stop; @@ -3064,16 +3108,11 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { - xmlChar buf[1000]; + xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE]; int nbchar = 0; xmlChar cur; SHRINK; - /* - * !!!!!!!!!!!! - * NOTE: NXT(0) is used here to avoid breaking on < or & - * entities substitutions. - */ cur = CUR; while ((IS_CHAR(cur)) && (cur != '<') && (cur != '&')) { @@ -3082,14 +3121,13 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { if (cdata) break; else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, + ctxt->sax->warning(ctxt->userData, "Sequence ']]>' not allowed in content\n"); ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; - ctxt->wellFormed = 0; } } buf[nbchar++] = CUR; - if (nbchar == 1000) { + if (nbchar == XML_PARSER_BIG_BUFFER_SIZE) { /* * Ok the segment is to be consumed as chars. */ @@ -3244,10 +3282,11 @@ void xmlParseComment(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; - int size = 100; + int size = XML_PARSER_BUFFER_SIZE; xmlChar q; xmlChar r; xmlChar cur; + xmlParserInputState state; /* * Check that there is a comment right here. @@ -3255,12 +3294,14 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '-') || (NXT(3) != '-')) return; + state = ctxt->instate; ctxt->instate = XML_PARSER_COMMENT; SHRINK; SKIP(4); buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); + ctxt->instate = state; return; } q = CUR; @@ -3283,6 +3324,7 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); + ctxt->instate = state; return; } } @@ -3310,6 +3352,7 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { ctxt->sax->comment(ctxt->userData, buf); xmlFree(buf); } + ctxt->instate = state; } /** @@ -3362,11 +3405,14 @@ void xmlParsePI(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; - int size = 100; + int size = XML_PARSER_BUFFER_SIZE; xmlChar cur; xmlChar *target; + xmlParserInputState state; if ((CUR == '<') && (NXT(1) == '?')) { + state = ctxt->instate; + ctxt->instate = XML_PARSER_PI; /* * this is a Processing Instruction. */ @@ -3382,6 +3428,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); + ctxt->instate = state; return; } cur = CUR; @@ -3401,6 +3448,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); + ctxt->instate = state; return; } } @@ -3440,6 +3488,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { ctxt->errNo = XML_ERR_PI_NOT_STARTED; ctxt->wellFormed = 0; } + ctxt->instate = state; } } @@ -3980,13 +4029,13 @@ xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) { * * [ VC: IDREF ] * Values of type IDREF must match the Name production, and values - * of type IDREFS must match Names; TODO each IDREF Name must match the value + * of type IDREFS must match Names; each IDREF Name must match the value * of an ID attribute on some element in the XML document; i.e. IDREF * values must match the value of some ID attribute. * * [ VC: Entity Name ] * Values of type ENTITY must match the Name production, values - * of type ENTITIES must match Names; TODO each Entity Name must match the + * of type ENTITIES must match Names; each Entity Name must match the * name of an unparsed entity declared in the DTD. * * [ VC: Name Token ] @@ -5564,6 +5613,43 @@ xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL)) ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI); + /* + * Cleanup + */ + if (URI != NULL) xmlFree(URI); + if (ExternalID != NULL) xmlFree(ExternalID); + if (name != NULL) xmlFree(name); + + /* + * Is there any internal subset declarations ? + * they are handled separately in xmlParseInternalSubset() + */ + if (CUR == '[') + return; + + /* + * We should be at the end of the DOCTYPE declaration. + */ + if (CUR != '>') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n"); + ctxt->wellFormed = 0; + ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; + } + NEXT; +} + +/** + * xmlParseInternalsubset : + * @ctxt: an XML parser context + * + * parse the internal subset declaration + * + * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>' + */ + +void +xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { /* * Is there any DTD definition ? */ @@ -5592,7 +5678,7 @@ xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, - "xmlParseDocTypeDecl: error detected in Markup declaration\n"); + "xmlParseInternalSubset: error detected in Markup declaration\n"); ctxt->wellFormed = 0; ctxt->errNo = XML_ERR_INTERNAL_ERROR; break; @@ -5611,13 +5697,6 @@ xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) { ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED; } NEXT; - - /* - * Cleanup - */ - if (URI != NULL) xmlFree(URI); - if (ExternalID != NULL) xmlFree(ExternalID); - if (name != NULL) xmlFree(name); } /** @@ -5830,7 +5909,6 @@ failed: /** * xmlParseEndTag: * @ctxt: an XML parser context - * @tagname: the tag name as parsed in the opening tag. * * parse an end of tag * @@ -5842,8 +5920,9 @@ failed: */ void -xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlChar *tagname) { +xmlParseEndTag(xmlParserCtxtPtr ctxt) { xmlChar *name; + xmlChar *oldname; GROW; if ((CUR != '<') || (NXT(1) != '/')) { @@ -5876,10 +5955,10 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlChar *tagname) { * start-tag. * */ - if (xmlStrcmp(name, tagname)) { + if (xmlStrcmp(name, ctxt->name)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, - "Opening and ending tag mismatch: %s and %s\n", tagname, name); + "Opening and ending tag mismatch: %s and %s\n", ctxt->name, name); ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH; ctxt->wellFormed = 0; @@ -5893,7 +5972,13 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlChar *tagname) { if (name != NULL) xmlFree(name); - + oldname = namePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + fprintf(stderr,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } return; } @@ -5915,7 +6000,7 @@ void xmlParseCDSect(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; - int size = 100; + int size = XML_PARSER_BUFFER_SIZE; xmlChar r, s; xmlChar cur; @@ -6113,6 +6198,7 @@ void xmlParseElement(xmlParserCtxtPtr ctxt) { const xmlChar *openTag = CUR_PTR; xmlChar *name; + xmlChar *oldname; xmlParserNodeInfo node_info; xmlNodePtr ret; @@ -6127,6 +6213,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { if (name == NULL) { return; } + namePush(ctxt, name); ret = ctxt->node; /* @@ -6145,7 +6232,13 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { SKIP(2); if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, name); - xmlFree(name); + oldname = namePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + fprintf(stderr,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } return; } if (CUR == '>') { @@ -6162,7 +6255,13 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { * end of parsing of this node. */ nodePop(ctxt); - xmlFree(name); + oldname = namePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + fprintf(stderr,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } /* * Capture end position and add node @@ -6192,15 +6291,20 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { * end of parsing of this node. */ nodePop(ctxt); - xmlFree(name); + oldname = namePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + fprintf(stderr,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } return; } /* * parse the end of tag: 'instate = XML_PARSER_DTD; + xmlParseInternalSubset(ctxt); + } ctxt->instate = XML_PARSER_PROLOG; xmlParseMisc(ctxt); } @@ -6779,23 +6887,33 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { * Time to start parsing the tree itself */ GROW; - ctxt->instate = XML_PARSER_CONTENT; - xmlParseElement(ctxt); - ctxt->instate = XML_PARSER_EPILOG; - - /* - * The Misc part at the end - */ - xmlParseMisc(ctxt); - - if (CUR != 0) { + if (CUR != '<') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, - "Extra content at the end of the document\n"); + "Start tag expect, '<' not found\n"); + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; ctxt->wellFormed = 0; - ctxt->errNo = XML_ERR_DOCUMENT_END; + ctxt->instate = XML_PARSER_EOF; + } else { + ctxt->instate = XML_PARSER_CONTENT; + xmlParseElement(ctxt); + ctxt->instate = XML_PARSER_EPILOG; + + + /* + * The Misc part at the end + */ + xmlParseMisc(ctxt); + + if (CUR != 0) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + ctxt->errNo = XML_ERR_DOCUMENT_END; + } + ctxt->instate = XML_PARSER_EOF; } - ctxt->instate = XML_PARSER_EOF; /* * SAX: end of the document processing. @@ -6816,18 +6934,74 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { * xmlParseLookupSequence: * @ctxt: an XML parser context * @first: the first char to lookup - * @next: the next char to lookup + * @next: the next char to lookup or zero + * @third: the next char to lookup or zero * - * Try to find if a sequence (first, next) or just (first) if next - * is zero is available in the input stream. - * Since XML-1.0 is an LALR(2) grammar a sequence of 2 char should be - * enought. If this doesn't prove true this function call may change. + * Try to find if a sequence (first, next, third) or just (first next) or + * (first) is available in the input stream. + * This function has a side effect of (possibly) incrementing ctxt->checkIndex + * to avoid rescanning sequences of bytes, it DOES change the state of the + * parser, do not use liberally. * - * Returns 1 if the full sequence is available, 0 otherwise. + * Returns the index to the current parsing point if the full sequence + * is available, -1 otherwise. */ int -xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, xmlChar next) { - return(0); +xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, + xmlChar next, xmlChar third) { + int base, len; + xmlParserInputPtr in; + const xmlChar *buf; + + in = ctxt->input; + if (in == NULL) return(-1); + base = in->cur - in->base; + if (base < 0) return(-1); + if (ctxt->checkIndex > base) + base = ctxt->checkIndex; + if (in->buf == NULL) { + buf = in->base; + len = in->length; + } else { + buf = in->buf->buffer->content; + len = in->buf->buffer->use; + } + /* take into account the sequence length */ + if (third) len -= 2; + else if (next) len --; + for (;base < len;base++) { + if (buf[base] == first) { + if (third != 0) { + if ((buf[base + 1] != next) || + (buf[base + 2] != third)) continue; + } else if (next != 0) { + if (buf[base + 1] != next) continue; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + if (next == 0) + fprintf(stderr, "PP: lookup '%c' found at %d\n", + first, base); + else if (third == 0) + fprintf(stderr, "PP: lookup '%c%c' found at %d\n", + first, next, base); + else + fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n", + first, next, third, base); +#endif + return(base - (in->cur - in->base)); + } + } + ctxt->checkIndex = base; +#ifdef DEBUG_PUSH + if (next == 0) + fprintf(stderr, "PP: lookup '%c' failed\n", first); + else if (third == 0) + fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next); + else + fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third); +#endif + return(-1); } /** @@ -6841,23 +7015,651 @@ xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, xmlChar next) { int xmlParseTry(xmlParserCtxtPtr ctxt) { int ret = 0; + xmlParserInputPtr in; + int avail; + xmlChar cur, next; + +#ifdef DEBUG_PUSH + switch (ctxt->instate) { + case XML_PARSER_EOF: + fprintf(stderr, "PP: try EOF\n"); break; + case XML_PARSER_START: + fprintf(stderr, "PP: try START\n"); break; + case XML_PARSER_MISC: + fprintf(stderr, "PP: try MISC\n");break; + case XML_PARSER_COMMENT: + fprintf(stderr, "PP: try COMMENT\n");break; + case XML_PARSER_PROLOG: + fprintf(stderr, "PP: try PROLOG\n");break; + case XML_PARSER_START_TAG: + fprintf(stderr, "PP: try START_TAG\n");break; + case XML_PARSER_CONTENT: + fprintf(stderr, "PP: try CONTENT\n");break; + case XML_PARSER_CDATA_SECTION: + fprintf(stderr, "PP: try CDATA_SECTION\n");break; + case XML_PARSER_END_TAG: + fprintf(stderr, "PP: try END_TAG\n");break; + case XML_PARSER_ENTITY_DECL: + fprintf(stderr, "PP: try ENTITY_DECL\n");break; + case XML_PARSER_ENTITY_VALUE: + fprintf(stderr, "PP: try ENTITY_VALUE\n");break; + case XML_PARSER_ATTRIBUTE_VALUE: + fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break; + case XML_PARSER_DTD: + fprintf(stderr, "PP: try DTD\n");break; + case XML_PARSER_EPILOG: + fprintf(stderr, "PP: try EPILOG\n");break; + case XML_PARSER_PI: + fprintf(stderr, "PP: try PI\n");break; + } +#endif while (1) { + /* + * Pop-up of finished entities. + */ + while ((CUR == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + + in = ctxt->input; + if (in == NULL) break; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 1) + goto done; switch (ctxt->instate) { case XML_PARSER_EOF: - return(0); + /* + * Document parsing is done ! + */ + goto done; + case XML_PARSER_START: + /* + * Very first chars read from the document flow. + */ + cur = in->cur[0]; + if (IS_BLANK(cur)) { + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra spaces at the beginning of the document are not allowed\n"); + ctxt->errNo = XML_ERR_DOCUMENT_START; + ctxt->wellFormed = 0; + SKIP_BLANKS; + ret++; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + } + if (avail < 2) + goto done; + + cur = in->cur[0]; + next = in->cur[1]; + if (cur == 0) { + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Document is empty\n"); + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; + ctxt->wellFormed = 0; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + if ((cur == '<') && (next == '?')) { + /* PI or XML decl */ + if (avail < 5) return(ret); + if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0) + return(ret); + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + if ((in->cur[2] == 'x') && + (in->cur[3] == 'm') && + (in->cur[4] == 'l')) { + ret += 5; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing XML Decl\n"); +#endif + xmlParseXMLDecl(ctxt); + if ((ctxt->sax) && (ctxt->sax->startDocument)) + ctxt->sax->startDocument(ctxt->userData); + ctxt->instate = XML_PARSER_MISC; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering MISC\n"); +#endif + } else { + ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); + if ((ctxt->sax) && (ctxt->sax->startDocument)) + ctxt->sax->startDocument(ctxt->userData); + ctxt->instate = XML_PARSER_MISC; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering MISC\n"); +#endif + } + } else { + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, + &xmlDefaultSAXLocator); + ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION); + if ((ctxt->sax) && (ctxt->sax->startDocument)) + ctxt->sax->startDocument(ctxt->userData); + ctxt->instate = XML_PARSER_MISC; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering MISC\n"); +#endif + } + break; + case XML_PARSER_MISC: + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '?')) { + if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + } else if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_MISC; + } else if ((cur == '<') && (next == '!') && + (in->cur[2] == 'D') && (in->cur[3] == 'O') && + (in->cur[4] == 'C') && (in->cur[5] == 'T') && + (in->cur[6] == 'Y') && (in->cur[7] == 'P') && + (in->cur[8] == 'E')) { + if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing internal subset\n"); +#endif + xmlParseDocTypeDecl(ctxt); + if (CUR == '[') { + ctxt->instate = XML_PARSER_DTD; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering DTD\n"); +#endif + } else { + ctxt->instate = XML_PARSER_PROLOG; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering PROLOG\n"); +#endif + } + } else if ((cur == '<') && (next == '!') && + (avail < 9)) { + goto done; + } else { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering START_TAG\n"); +#endif + } + break; case XML_PARSER_PROLOG: - case XML_PARSER_CONTENT: - case XML_PARSER_ENTITY_DECL: - case XML_PARSER_ENTITY_VALUE: - case XML_PARSER_ATTRIBUTE_VALUE: - case XML_PARSER_DTD: + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '?')) { + if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + } else if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_PROLOG; + } else if ((cur == '<') && (next == '!') && + (avail < 4)) { + goto done; + } else { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering START_TAG\n"); +#endif + } + break; case XML_PARSER_EPILOG: + SKIP_BLANKS; + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else + avail = in->buf->buffer->use - (in->cur - in->base); + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '?')) { + if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + ctxt->instate = XML_PARSER_EPILOG; + } else if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_EPILOG; + } else if ((cur == '<') && (next == '!') && + (avail < 4)) { + goto done; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + ctxt->errNo = XML_ERR_DOCUMENT_END; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + break; + case XML_PARSER_START_TAG: { + xmlChar *name, *oldname; + + if (avail < 2) + goto done; + cur = in->cur[0]; + if (cur != '<') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Start tag expect, '<' not found\n"); + ctxt->errNo = XML_ERR_DOCUMENT_EMPTY; + ctxt->wellFormed = 0; + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) + goto done; + name = xmlParseStartTag(ctxt); + if (name == NULL) { + ctxt->instate = XML_PARSER_EOF; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering EOF\n"); +#endif + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + goto done; + } + namePush(ctxt, xmlStrdup(name)); + + /* + * [ VC: Root Element Type ] + * The Name in the document type declaration must match + * the element type of the root element. + */ + if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc && + ctxt->node && (ctxt->node == ctxt->myDoc->root)) + ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); + + /* + * Check for an Empty Element. + */ + if ((CUR == '/') && (NXT(1) == '>')) { + SKIP(2); + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + xmlFree(name); + oldname = namePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + fprintf(stderr,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + if (ctxt->name == NULL) { + ctxt->instate = XML_PARSER_EPILOG; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering EPILOG\n"); +#endif + } else { + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering CONTENT\n"); +#endif + } + break; + } + if (CUR == '>') { + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Couldn't find end of Start Tag %s\n", + name); + ctxt->wellFormed = 0; + ctxt->errNo = XML_ERR_GT_REQUIRED; + + /* + * end of parsing of this node. + */ + nodePop(ctxt); + oldname = namePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG_STACK + fprintf(stderr,"Close: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } + xmlFree(name); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering CONTENT\n"); +#endif + break; + } + case XML_PARSER_CONTENT: + /* + * Handle preparsed entities and charRef + */ + if (ctxt->token != 0) { + xmlChar cur[2] = { 0 , 0 } ; + + cur[0] = (xmlChar) ctxt->token; + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, cur, 1); + ctxt->token = 0; + } + if (avail < 2) + goto done; + cur = in->cur[0]; + next = in->cur[1]; + if ((cur == '<') && (next == '?')) { + if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing PI\n"); +#endif + xmlParsePI(ctxt); + } else if ((cur == '<') && (next == '!') && + (in->cur[2] == '-') && (in->cur[3] == '-')) { + if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing Comment\n"); +#endif + xmlParseComment(ctxt); + ctxt->instate = XML_PARSER_CONTENT; + } else if ((cur == '<') && (in->cur[1] == '!') && + (in->cur[2] == '[') && (NXT(3) == 'C') && + (in->cur[4] == 'D') && (NXT(5) == 'A') && + (in->cur[6] == 'T') && (NXT(7) == 'A') && + (in->cur[8] == '[')) { + SKIP(9); + ctxt->instate = XML_PARSER_CDATA_SECTION; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering CDATA_SECTION\n"); +#endif + break; + } else if ((cur == '<') && (next == '!') && + (avail < 9)) { + goto done; + } else if ((cur == '<') && (next == '/')) { + ctxt->instate = XML_PARSER_END_TAG; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering END_TAG\n"); +#endif + break; + } else if (cur == '<') { + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering START_TAG\n"); +#endif + break; + } else if (cur == '&') { + if (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0) + goto done; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing Reference\n"); +#endif + /* TODO: check generation of subtrees if noent !!! */ + xmlParseReference(ctxt); + } else { + /* TODO Avoid the extra copy, handle directly !!!!!! */ + /* + * Goal of the following test is : + * - minimize calls to the SAX 'character' callback + * when they are mergeable + * - handle an problem for isBlank when we only parse + * a sequence of blank chars and the next one is + * not available to check against '<' presence. + * - tries to homogenize the differences in SAX + * callbacks beween the push and pull versions + * of the parser. + */ + if ((ctxt->inputNr == 1) && + (avail < XML_PARSER_BIG_BUFFER_SIZE)) { + if (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0) + goto done; + } + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: Parsing char data\n"); +#endif + xmlParseCharData(ctxt, 0); + } + /* + * Pop-up of finished entities. + */ + while ((CUR == 0) && (ctxt->inputNr > 1)) + xmlPopInput(ctxt); + break; + case XML_PARSER_CDATA_SECTION: { + /* + * The Push mode need to have the SAX callback for + * cdataBlock merge back contiguous callbacks. + */ + int base; + + in = ctxt->input; + base = xmlParseLookupSequence(ctxt, ']', ']', '>'); + if (base < 0) { + if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) { + if (ctxt->sax != NULL) { + if (ctxt->sax->cdataBlock != NULL) + ctxt->sax->cdataBlock(ctxt->userData, in->cur, + XML_PARSER_BIG_BUFFER_SIZE); + } + SKIP(XML_PARSER_BIG_BUFFER_SIZE); + ctxt->checkIndex = 0; + } + goto done; + } else { + if ((ctxt->sax != NULL) && (base > 0)) { + if (ctxt->sax->cdataBlock != NULL) + ctxt->sax->cdataBlock(ctxt->userData, + in->cur, base); + } + SKIP(base + 3); + ctxt->checkIndex = 0; + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering CONTENT\n"); +#endif + } + break; + } + case XML_PARSER_END_TAG: { + if (avail < 2) + goto done; + if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) + goto done; + xmlParseEndTag(ctxt); + if (ctxt->name == NULL) { + ctxt->instate = XML_PARSER_EPILOG; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering EPILOG\n"); +#endif + } else { + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering CONTENT\n"); +#endif + } + break; + } + case XML_PARSER_DTD: { + /* + * Sorry but progressive parsing of the internal subset + * is not expected to be supported. We first check that + * the full content of the internal subset is available and + * the parsing is launched only at that point. + * Internal subset ends up with "']' S? '>'" in an unescaped + * section and not in a ']]>' sequence which are conditional + * sections (whoever argued to keep that crap in XML deserve + * a place in hell !). + */ + int base, i; + xmlChar *buf; + xmlChar quote = 0; + + base = in->cur - in->base; + if (base < 0) return(0); + if (ctxt->checkIndex > base) + base = ctxt->checkIndex; + buf = in->buf->buffer->content; + for (;base < in->buf->buffer->use;base++) { + if (quote != 0) { + if (buf[base] == quote) + quote = 0; + continue; + } + if (buf[base] == '"') { + quote = '"'; + continue; + } + if (buf[base] == '\'') { + quote = '\''; + continue; + } + if (buf[base] == ']') { + if (base +1 >= in->buf->buffer->use) + break; + if (buf[base + 1] == ']') { + /* conditional crap, skip both ']' ! */ + base++; + continue; + } + for (i = 0;base + i < in->buf->buffer->use;i++) { + if (buf[base + i] == '>') + goto found_end_int_subset; + } + break; + } + } + /* + * We didn't found the end of the Internal subset + */ + if (quote == 0) + ctxt->checkIndex = base; +#ifdef DEBUG_PUSH + if (next == 0) + fprintf(stderr, "PP: lookup of int subset end filed\n"); +#endif + goto done; + +found_end_int_subset: + xmlParseInternalSubset(ctxt); + ctxt->instate = XML_PARSER_PROLOG; + ctxt->checkIndex = 0; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering PROLOG\n"); +#endif + break; + } case XML_PARSER_COMMENT: - case XML_PARSER_CDATA_SECTION: - break; + fprintf(stderr, "PP: internal error, state == COMMENT\n"); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_PI: + fprintf(stderr, "PP: internal error, state == PI\n"); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering CONTENT\n"); +#endif + break; + case XML_PARSER_ENTITY_DECL: + fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n"); + ctxt->instate = XML_PARSER_DTD; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering DTD\n"); +#endif + break; + case XML_PARSER_ENTITY_VALUE: + fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n"); + ctxt->instate = XML_PARSER_CONTENT; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering DTD\n"); +#endif + break; + case XML_PARSER_ATTRIBUTE_VALUE: + fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n"); + ctxt->instate = XML_PARSER_START_TAG; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: entering START_TAG\n"); +#endif + break; } } +done: +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: done %d\n", ret); +#endif return(ret); } @@ -6872,12 +7674,38 @@ xmlParseTry(xmlParserCtxtPtr ctxt) { * * Returns zero if no error, the xmlParserErrors otherwise. */ -xmlParserErrors +int xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate) { if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && - (ctxt->input->buf != NULL)) { + (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { + int base = ctxt->input->base - ctxt->input->buf->buffer->content; + int cur = ctxt->input->cur - ctxt->input->base; + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + ctxt->input->base = ctxt->input->buf->buffer->content + base; + ctxt->input->cur = ctxt->input->base + cur; +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: pushed %d\n", size); +#endif + + xmlParseTry(ctxt); + } else if (ctxt->instate != XML_PARSER_EOF) + xmlParseTry(ctxt); + if (terminate) { + if ((ctxt->instate != XML_PARSER_EOF) && + (ctxt->instate != XML_PARSER_EPILOG)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Extra content at the end of the document\n"); + ctxt->wellFormed = 0; + ctxt->errNo = XML_ERR_DOCUMENT_END; + } + if (ctxt->instate != XML_PARSER_EOF) { + if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) + ctxt->sax->endDocument(ctxt->userData); + } + ctxt->instate = XML_PARSER_EOF; } return((xmlParserErrors) ctxt->errNo); } @@ -6888,6 +7716,89 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, * * ************************************************************************/ +/** + * xmlCreatePushParserCtxt : + * @sax: a SAX handler + * @user_data: The user data returned on SAX callbacks + * @chunk: a pointer to an array of chars + * @size: number of chars in the array + * @filename: an optional file name or URI + * + * Create a parser context for using the XML parser in push mode + * To allow content encoding detection, @size should be >= 4 + * The value of @filename is used for fetching external entities + * and error/warning reports. + * + * Returns the new parser context or NULL + */ +xmlParserCtxtPtr +xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, + const char *chunk, int size, const char *filename) { + xmlParserCtxtPtr ctxt; + xmlParserInputPtr inputStream; + xmlParserInputBufferPtr buf; + xmlCharEncoding enc = XML_CHAR_ENCODING_NONE; + + /* + * plug some encoding conversion routines here. !!! + */ + if ((chunk != NULL) && (size >= 4)) + enc = xmlDetectCharEncoding((const xmlChar *) chunk); + + buf = xmlAllocParserInputBuffer(enc); + if (buf == NULL) return(NULL); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) { + xmlFree(buf); + return(NULL); + } + if (sax != NULL) { + if (ctxt->sax != &xmlDefaultSAXHandler) + xmlFree(ctxt->sax); + ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler)); + if (ctxt->sax == NULL) { + xmlFree(buf); + xmlFree(ctxt); + return(NULL); + } + memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler)); + if (user_data != NULL) + ctxt->userData = user_data; + } + if (filename == NULL) { + ctxt->directory = NULL; + } else { + ctxt->directory = xmlParserGetDirectory(filename); + } + + inputStream = xmlNewInputStream(ctxt); + if (inputStream == NULL) { + xmlFreeParserCtxt(ctxt); + return(NULL); + } + + if (filename == NULL) + inputStream->filename = NULL; + else + inputStream->filename = xmlMemStrdup(filename); + inputStream->buf = buf; + inputStream->base = inputStream->buf->buffer->content; + inputStream->cur = inputStream->buf->buffer->content; + + inputPush(ctxt, inputStream); + + if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && + (ctxt->input->buf != NULL)) { + xmlParserInputBufferPush(ctxt->input->buf, size, chunk); +#ifdef DEBUG_PUSH + fprintf(stderr, "PP: pushed %d\n", size); +#endif + } + + return(ctxt); +} + /** * xmlCreateDocParserCtxt : * @cur: a pointer to an array of xmlChar @@ -7397,7 +8308,8 @@ xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data, if (ctxt->sax != &xmlDefaultSAXHandler) xmlFree(ctxt->sax); ctxt->sax = sax; - ctxt->userData = user_data; + if (user_data != NULL) + ctxt->userData = user_data; xmlParseDocument(ctxt); diff --git a/parser.h b/parser.h index 8a6443cc..196aeb3b 100644 --- a/parser.h +++ b/parser.h @@ -40,11 +40,12 @@ typedef struct xmlParserInput { const char *filename; /* The file analyzed, if any */ const char *directory; /* the directory/base of teh file */ - const xmlChar *base; /* Base of the array to parse */ - const xmlChar *cur; /* Current char being parsed */ + const xmlChar *base; /* Base of the array to parse */ + const xmlChar *cur; /* Current char being parsed */ + int length; /* length if known */ int line; /* Current line */ int col; /* Current column */ - int consumed; /* How many xmlChars were already consumed */ + int consumed; /* How many xmlChars already consumed */ xmlParserInputDeallocate free; /* function to deallocate the base */ } xmlParserInput; typedef xmlParserInput *xmlParserInputPtr; @@ -77,20 +78,25 @@ typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; /** - * The parser is not a state based parser, but we need to maintain + * The parser is not (yet) a state based parser, but we need to maintain * minimum state informations, especially for entities processing. */ typedef enum { - XML_PARSER_EOF = 0, - XML_PARSER_PROLOG, - XML_PARSER_CONTENT, - XML_PARSER_ENTITY_DECL, - XML_PARSER_ENTITY_VALUE, - XML_PARSER_ATTRIBUTE_VALUE, - XML_PARSER_DTD, - XML_PARSER_EPILOG, - XML_PARSER_COMMENT, - XML_PARSER_CDATA_SECTION + XML_PARSER_EOF = -1, /* nothing is to be parsed */ + XML_PARSER_START = 0, /* nothing has been parsed */ + XML_PARSER_MISC, /* Misc* before int subset */ + XML_PARSER_PI, /* Whithin a processing instruction */ + XML_PARSER_DTD, /* within some DTD content */ + XML_PARSER_PROLOG, /* Misc* after internal subset */ + XML_PARSER_COMMENT, /* within a comment */ + XML_PARSER_START_TAG, /* within a start tag */ + XML_PARSER_CONTENT, /* within the content */ + XML_PARSER_CDATA_SECTION, /* within a CDATA section */ + XML_PARSER_END_TAG, /* within a closing tag */ + XML_PARSER_ENTITY_DECL, /* within an entity declaration */ + XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ + XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ + XML_PARSER_EPILOG /* the Misc* after the last end tag */ } xmlParserInputState; /** @@ -151,6 +157,7 @@ typedef struct _xmlParserCtxt { xmlChar * *nameTab; /* array of nodes */ long nbChars; /* number of xmlChar processed */ + long checkIndex; /* used by progressive parsing lookup */ } _xmlParserCtxt; typedef _xmlParserCtxt xmlParserCtxt; typedef xmlParserCtxt *xmlParserCtxtPtr; @@ -347,13 +354,35 @@ xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID, xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, const xmlChar *ExternalID, const xmlChar *SystemID); +/** + * SAX initialization routines + */ +void xmlDefaultSAXHandlerInit(void); +void htmlDefaultSAXHandlerInit(void); + +/** + * Parser contexts handling. + */ void xmlInitParserCtxt (xmlParserCtxtPtr ctxt); void xmlClearParserCtxt (xmlParserCtxtPtr ctxt); +void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, const xmlChar* buffer, const char* filename); -void xmlDefaultSAXHandlerInit(void); -void htmlDefaultSAXHandlerInit(void); +xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); + +/** + * Interfaces for the Push mode + */ +xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename); +int xmlParseChunk (xmlParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); /** * Node infos diff --git a/parserInternals.h b/parserInternals.h index 9da48462..5a7b7ffe 100644 --- a/parserInternals.h +++ b/parserInternals.h @@ -435,9 +435,10 @@ typedef unsigned char CHARVAL; * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ #define IS_CHAR(c) \ - ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ - (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \ - (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF)) + ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \ + ((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ + (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \ + (((c) >= 0x10000) && ((c) <= 0x10FFFF))) /* * [85] BaseChar ::= ... long list see REC ... @@ -595,8 +596,7 @@ void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt, xmlChar **value); xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt); -void xmlParseEndTag (xmlParserCtxtPtr ctxt, - xmlChar *tagname); +void xmlParseEndTag (xmlParserCtxtPtr ctxt); void xmlParseCDSect (xmlParserCtxtPtr ctxt); void xmlParseContent (xmlParserCtxtPtr ctxt); void xmlParseElement (xmlParserCtxtPtr ctxt); diff --git a/testSAX.c b/testSAX.c index f33a1d6e..fd65092e 100644 --- a/testSAX.c +++ b/testSAX.c @@ -45,6 +45,7 @@ static int debug = 0; static int copy = 0; static int recovery = 0; +static int push = 0; xmlSAXHandler emptySAXHandlerStruct = { NULL, /* internalSubset */ @@ -77,29 +78,6 @@ xmlSAXHandler emptySAXHandlerStruct = { xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct; extern xmlSAXHandlerPtr debugSAXHandler; -/* - * Note: there is a couple of errors introduced on purpose. - */ -static char buffer[] = -"\n\ -\n\ -\n\ -\n\ -\n\ - \n\ - \n\ - Jim Whitehead\n\ - Roy Fielding\n\ - \n\ - \n\ - \n\ - \n\ - \n\ - \n\ -\n\ -\n\ -"; - /************************************************************************ * * * Debug Handlers * @@ -588,42 +566,76 @@ xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; void parseAndPrintFile(char *filename) { int res; - /* - * Empty callbacks for checking - */ - res = xmlSAXUserParseFile(emptySAXHandler, NULL, filename); - if (res != 0) { - fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res); - } + if (push) { + FILE *f; - /* - * Debug callback - */ - res = xmlSAXUserParseFile(debugSAXHandler, NULL, filename); - if (res != 0) { - fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res); + /* + * Empty callbacks for checking + */ + f = fopen(filename, "r"); + if (f != NULL) { + int res; + char chars[10]; + xmlParserCtxtPtr ctxt; + + res = fread(chars, 1, 4, f); + if (res > 0) { + ctxt = xmlCreatePushParserCtxt(emptySAXHandler, NULL, + chars, res, filename); + while ((res = fread(chars, 1, 3, f)) > 0) { + xmlParseChunk(ctxt, chars, res, 0); + } + xmlParseChunk(ctxt, chars, 0, 1); + xmlFreeParserCtxt(ctxt); + } + fclose(f); + } else { + fprintf(stderr, "Cannot read file %s\n", filename); + } + /* + * Debug callback + */ + f = fopen(filename, "r"); + if (f != NULL) { + int res; + char chars[10]; + xmlParserCtxtPtr ctxt; + + res = fread(chars, 1, 4, f); + if (res > 0) { + ctxt = xmlCreatePushParserCtxt(debugSAXHandler, NULL, + chars, res, filename); + while ((res = fread(chars, 1, 3, f)) > 0) { + xmlParseChunk(ctxt, chars, res, 0); + } + res = xmlParseChunk(ctxt, chars, 0, 1); + xmlFreeParserCtxt(ctxt); + if (res != 0) { + fprintf(stdout, + "xmlSAXUserParseFile returned error %d\n", res); + } + } + fclose(f); + } + } else { + /* + * Empty callbacks for checking + */ + res = xmlSAXUserParseFile(emptySAXHandler, NULL, filename); + if (res != 0) { + fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res); + } + + /* + * Debug callback + */ + res = xmlSAXUserParseFile(debugSAXHandler, NULL, filename); + if (res != 0) { + fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res); + } } } -void parseAndPrintBuffer(char *buf) { - int res; - - /* - * Empty callbacks for checking - */ - res = xmlSAXUserParseMemory(emptySAXHandler, NULL, buf, strlen(buf)); - if (res != 0) { - fprintf(stdout, "xmlSAXUserParseMemory returned error %d\n", res); - } - - /* - * Debug callback - */ - res = xmlSAXUserParseMemory(debugSAXHandler, NULL, buf, strlen(buf)); - if (res != 0) { - fprintf(stdout, "xmlSAXUserParseMemory returned error %d\n", res); - } -} int main(int argc, char **argv) { int i; @@ -637,6 +649,9 @@ int main(int argc, char **argv) { else if ((!strcmp(argv[i], "-recover")) || (!strcmp(argv[i], "--recover"))) recovery++; + else if ((!strcmp(argv[i], "-push")) || + (!strcmp(argv[i], "--push"))) + push++; } for (i = 1; i < argc ; i++) { if (argv[i][0] != '-') { @@ -644,10 +659,6 @@ int main(int argc, char **argv) { files ++; } } - if (files == 0) { - printf("\nFirst test for the parser, with errors\n"); - parseAndPrintBuffer(buffer); - } xmlCleanupParser(); xmlMemoryDump(); diff --git a/tester.c b/tester.c index 3ee565b6..8bb3e595 100644 --- a/tester.c +++ b/tester.c @@ -30,10 +30,19 @@ #ifdef HAVE_STDLIB_H #include #endif +#ifdef HAVE_LIBREADLINE +#include +#ifdef HAVE_LIBHISTORY +#include +#endif +#endif #include "xmlmemory.h" #include "parser.h" +#include "HTMLparser.h" +#include "HTMLtree.h" #include "tree.h" +#include "xpath.h" #include "debugXML.h" static int debug = 0; @@ -47,20 +56,89 @@ static int postvalid = 0; static int repeat = 0; static int insert = 0; static int compress = 0; +static int html = 0; +static int shell = 0; +static int push = 0; extern int xmlDoValidityCheckingDefaultValue; +/** + * xmlShellReadline: + * @prompt: the prompt value + * + * Read a string + * + * Returns a pointer to it or NULL on EOF the caller is expected to + * free the returned string. + */ +char * +xmlShellReadline(char *prompt) { +#ifdef HAVE_LIBREADLINE + char *line_read; + + /* Get a line from the user. */ + line_read = readline (prompt); + + /* If the line has any text in it, save it on the history. */ + if (line_read && *line_read) + add_history (line_read); + + return (line_read); +#else + char line_read[501]; + + if (prompt != NULL) + fprintf(stdout, "%s", prompt); + if (!fgets(line_read, 500, stdin)) + return(NULL); + line_read[500] = 0; + return(strdup(line_read)); +#endif +} void parseAndPrintFile(char *filename) { - xmlDocPtr doc, tmp; + xmlDocPtr doc = NULL, tmp; + + if (html) { + doc = htmlParseFile(filename, NULL); + } else { + /* + * build an XML tree from a string; + */ + if (push) { + FILE *f; + + f = fopen(filename, "r"); + if (f != NULL) { + int res, size = 3; + char chars[1024]; + xmlParserCtxtPtr ctxt; + + if (repeat) + size = 1024; + res = fread(chars, 1, 4, f); + if (res > 0) { + ctxt = xmlCreatePushParserCtxt(NULL, NULL, + chars, res, filename); + while ((res = fread(chars, 1, size, f)) > 0) { + xmlParseChunk(ctxt, chars, res, 0); + } + xmlParseChunk(ctxt, chars, 0, 1); + doc = ctxt->myDoc; + xmlFreeParserCtxt(ctxt); + } + } + } else if (recovery) + doc = xmlRecoverFile(filename); + else + doc = xmlParseFile(filename); + } /* - * build an XML tree from a string; + * shell interraction */ - if (recovery) - doc = xmlRecoverFile(filename); - else - doc = xmlParseFile(filename); + if (shell) + xmlShell(doc, filename, xmlShellReadline, stdout); /* * test intermediate copy if needed. @@ -71,7 +149,7 @@ void parseAndPrintFile(char *filename) { xmlFreeDoc(tmp); } - if (insert) { + if ((insert) && (!html)) { const xmlChar* list[256]; int nb, i; xmlNodePtr node; @@ -116,7 +194,7 @@ void parseAndPrintFile(char *filename) { xmlValidateDocument(&cvp, doc); } - if (debugent) + if ((debugent) && (!html)) xmlDebugDumpEntities(stdout, doc); /* @@ -157,11 +235,23 @@ int main(int argc, char **argv) { else if ((!strcmp(argv[i], "-repeat")) || (!strcmp(argv[i], "--repeat"))) repeat++; + else if ((!strcmp(argv[i], "-push")) || + (!strcmp(argv[i], "--push"))) + push++; else if ((!strcmp(argv[i], "-compress")) || (!strcmp(argv[i], "--compress"))) { compress++; xmlSetCompressMode(9); } + else if ((!strcmp(argv[i], "-html")) || + (!strcmp(argv[i], "--html"))) { + html++; + } + else if ((!strcmp(argv[i], "-shell")) || + (!strcmp(argv[i], "--shell"))) { + shell++; + noout = 1; + } } if (noent != 0) xmlSubstituteEntitiesDefault(1); if (valid != 0) xmlDoValidityCheckingDefaultValue = 1; @@ -190,6 +280,9 @@ int main(int argc, char **argv) { printf("\t--repeat : repeat 100 times, for timing or profiling\n"); printf("\t--insert : ad-hoc test for valid insertions\n"); printf("\t--compress : turn on gzip compression of output\n"); + printf("\t--html : use the HTML parser\n"); + printf("\t--shell : run a navigating shell\n"); + printf("\t--push : use the push mode of the parser\n"); } xmlCleanupParser(); xmlMemoryDump(); diff --git a/tree.c b/tree.c index 3242aece..c6685eaf 100644 --- a/tree.c +++ b/tree.c @@ -2498,6 +2498,7 @@ xmlNodeGetContent(xmlNodePtr cur) { return(xmlNodeListGetString(NULL, attr->val, 1)); break; } + case XML_COMMENT_NODE: case XML_PI_NODE: if (cur->content != NULL) #ifndef XML_USE_BUFFER_CONTENT @@ -2507,8 +2508,12 @@ xmlNodeGetContent(xmlNodePtr cur) { #endif return(NULL); case XML_ENTITY_REF_NODE: + /* + * Locate the entity, and get it's content + * @@@ + */ + return(NULL); case XML_ENTITY_NODE: - case XML_COMMENT_NODE: case XML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: @@ -2858,18 +2863,6 @@ xmlSearchNs(xmlDocPtr doc, xmlNodePtr node, const xmlChar *nameSpace) { } node = node->parent; } -#if 0 - /* Removed support for old namespaces */ - if (doc != NULL) { - cur = doc->oldNs; - while (cur != NULL) { - if ((cur->prefix != NULL) && (nameSpace != NULL) && - (!xmlStrcmp(cur->prefix, nameSpace))) - return(cur); - cur = cur->next; - } - } -#endif return(NULL); } @@ -2886,31 +2879,283 @@ xmlSearchNs(xmlDocPtr doc, xmlNodePtr node, const xmlChar *nameSpace) { xmlNsPtr xmlSearchNsByHref(xmlDocPtr doc, xmlNodePtr node, const xmlChar *href) { xmlNsPtr cur; + xmlNodePtr orig = node; if ((node == NULL) || (href == NULL)) return(NULL); while (node != NULL) { cur = node->nsDef; while (cur != NULL) { if ((cur->href != NULL) && (href != NULL) && - (!xmlStrcmp(cur->href, href))) + (!xmlStrcmp(cur->href, href))) { + /* + * Check that the prefix is not shadowed between orig and node + */ + xmlNodePtr check = orig; + xmlNsPtr tst; + + while (check != node) { + tst = check->nsDef; + while (tst != NULL) { + if ((tst->prefix == NULL) && (cur->prefix == NULL)) + goto shadowed; + if ((tst->prefix != NULL) && (cur->prefix != NULL) && + (!xmlStrcmp(tst->prefix, cur->prefix))) + goto shadowed; + tst = tst->next; + } + } return(cur); + } +shadowed: cur = cur->next; } node = node->parent; } -#if 0 - /* Removed support for old namespaces */ - if (doc != NULL) { - cur = doc->oldNs; - while (cur != NULL) { - if ((cur->href != NULL) && (href != NULL) && - (!xmlStrcmp(cur->href, href))) - return(cur); - cur = cur->next; + return(NULL); +} + +/** + * xmlNewReconciliedNs + * @doc: the document + * @tree: a node expected to hold the new namespace + * @ns: the original namespace + * + * This function tries to locate a namespace definition in a tree + * ancestors, or create a new namespace definition node similar to + * @ns trying to reuse the same prefix. However if the given prefix is + * null (default namespace) or reused within the subtree defined by + * @tree or on one of its ancestors then a new prefix is generated. + * Returns the (new) namespace definition or NULL in case of error + */ +xmlNsPtr +xmlNewReconciliedNs(xmlDocPtr doc, xmlNodePtr tree, xmlNsPtr ns) { + xmlNsPtr def; + xmlChar prefix[50]; + int counter = 1; + + if (tree == NULL) { +#ifdef DEBUG_TREE + fprintf(stderr, "xmlNewReconciliedNs : tree == NULL\n"); +#endif + return(NULL); + } + if (ns == NULL) { +#ifdef DEBUG_TREE + fprintf(stderr, "xmlNewReconciliedNs : ns == NULL\n"); +#endif + return(NULL); + } + /* + * Search an existing namespace definition inherited. + */ + def = xmlSearchNsByHref(doc, tree, ns->href); + if (def != NULL) + return(def); + + /* + * Find a close prefix which is not already in use. + * Let's strip namespace prefixes longer than 20 chars ! + */ + sprintf((char *) prefix, "%.20s", ns->prefix); + def = xmlSearchNs(doc, tree, prefix); + while (def != NULL) { + if (counter > 1000) return(NULL); + sprintf((char *) prefix, "%.20s%d", ns->prefix, counter++); + def = xmlSearchNs(doc, tree, prefix); + } + + /* + * Ok, now we are ready to create a new one. + */ + def = xmlNewNs(tree, ns->href, prefix); + return(def); +} + +/** + * xmlReconciliateNs + * @doc: the document + * @tree: a node defining the subtree to reconciliate + * + * This function checks that all the namespaces declared within the given + * tree are properly declared. This is needed for example after Copy or Cut + * and then paste operations. The subtree may still hold pointers to + * namespace declarations outside the subtree or invalid/masked. As much + * as possible the function try tu reuse the existing namespaces found in + * the new environment. If not possible the new namespaces are redeclared + * on @tree at the top of the given subtree. + * Returns the number of namespace declarations created or -1 in case of error. + */ +int +xmlReconciliateNs(xmlDocPtr doc, xmlNodePtr tree) { + xmlNsPtr *oldNs = NULL; + xmlNsPtr *newNs = NULL; + int sizeCache = 0; + int nbCache = 0; + + xmlNsPtr n; + xmlNodePtr node = tree; + xmlAttrPtr attr; + int ret = 0, i; + + while (node != NULL) { + /* + * Reconciliate the node namespace + */ + if (node->ns != NULL) { + /* + * initialize the cache if needed + */ + if (sizeCache == 0) { + sizeCache = 10; + oldNs = (xmlNsPtr *) xmlMalloc(sizeCache * + sizeof(xmlNsPtr)); + if (oldNs == NULL) { + fprintf(stderr, "xmlReconciliateNs : memory pbm\n"); + return(-1); + } + newNs = (xmlNsPtr *) xmlMalloc(sizeCache * + sizeof(xmlNsPtr)); + if (newNs == NULL) { + fprintf(stderr, "xmlReconciliateNs : memory pbm\n"); + xmlFree(oldNs); + return(-1); + } + } + for (i = 0;i < nbCache;i++) { + if (oldNs[i] == node->ns) { + node->ns = newNs[i]; + break; + } + } + if (i == nbCache) { + /* + * Ok we need to recreate a new namespace definition + */ + n = xmlNewReconciliedNs(doc, tree, node->ns); + if (n != NULL) { /* :-( what if else ??? */ + /* + * check if we need to grow the cache buffers. + */ + if (sizeCache <= nbCache) { + sizeCache *= 2; + oldNs = (xmlNsPtr *) xmlRealloc(oldNs, sizeCache * + sizeof(xmlNsPtr)); + if (oldNs == NULL) { + fprintf(stderr, "xmlReconciliateNs : memory pbm\n"); + xmlFree(newNs); + return(-1); + } + newNs = (xmlNsPtr *) xmlRealloc(newNs, sizeCache * + sizeof(xmlNsPtr)); + if (newNs == NULL) { + fprintf(stderr, "xmlReconciliateNs : memory pbm\n"); + xmlFree(oldNs); + return(-1); + } + } + newNs[nbCache] = n; + oldNs[nbCache++] = node->ns; + node->ns = n; + } + } + } + /* + * now check for namespace hold by attributes on the node. + */ + attr = node->properties; + while (attr != NULL) { + if (attr->ns != NULL) { + /* + * initialize the cache if needed + */ + if (sizeCache == 0) { + sizeCache = 10; + oldNs = (xmlNsPtr *) xmlMalloc(sizeCache * + sizeof(xmlNsPtr)); + if (oldNs == NULL) { + fprintf(stderr, "xmlReconciliateNs : memory pbm\n"); + return(-1); + } + newNs = (xmlNsPtr *) xmlMalloc(sizeCache * + sizeof(xmlNsPtr)); + if (newNs == NULL) { + fprintf(stderr, "xmlReconciliateNs : memory pbm\n"); + xmlFree(oldNs); + return(-1); + } + } + for (i = 0;i < nbCache;i++) { + if (oldNs[i] == attr->ns) { + node->ns = newNs[i]; + break; + } + } + if (i == nbCache) { + /* + * Ok we need to recreate a new namespace definition + */ + n = xmlNewReconciliedNs(doc, tree, attr->ns); + if (n != NULL) { /* :-( what if else ??? */ + /* + * check if we need to grow the cache buffers. + */ + if (sizeCache <= nbCache) { + sizeCache *= 2; + oldNs = (xmlNsPtr *) xmlRealloc(oldNs, sizeCache * + sizeof(xmlNsPtr)); + if (oldNs == NULL) { + fprintf(stderr, + "xmlReconciliateNs : memory pbm\n"); + xmlFree(newNs); + return(-1); + } + newNs = (xmlNsPtr *) xmlRealloc(newNs, sizeCache * + sizeof(xmlNsPtr)); + if (newNs == NULL) { + fprintf(stderr, + "xmlReconciliateNs : memory pbm\n"); + xmlFree(oldNs); + return(-1); + } + } + newNs[nbCache] = n; + oldNs[nbCache++] = attr->ns; + attr->ns = n; + } + } + } + attr = attr->next; + } + + /* + * Browse the full subtree, deep first + */ + if (node->childs != NULL) { + /* deep first */ + node = node->childs; + } else if ((node != tree) && (node->next != NULL)) { + /* then siblings */ + node = node->next; + } else if (node != tree) { + /* go up to parents->next if needed */ + while (node != tree) { + if (node->parent != NULL) + node = node->parent; + if ((node != tree) && (node->next != NULL)) { + node = node->next; + break; + } + if (node->parent == NULL) { + node = NULL; + break; + } + } + /* exit condition */ + if (node == tree) + node = NULL; } } -#endif - return(NULL); + return(ret); } /** @@ -3095,8 +3340,9 @@ void xmlTextConcat(xmlNodePtr node, const xmlChar *content, int len) { if (node == NULL) return; - if (node->type != XML_TEXT_NODE) { - fprintf(stderr, "xmlTextConcat: node is not text\n"); + if ((node->type != XML_TEXT_NODE) && + (node->type != XML_CDATA_SECTION_NODE)) { + fprintf(stderr, "xmlTextConcat: node is not text nor cdata\n"); return; } #ifndef XML_USE_BUFFER_CONTENT @@ -3376,7 +3622,11 @@ xmlBufferAdd(xmlBufferPtr buf, const xmlChar *str, int len) { if (len == 0) return; /* CJN What's this for??? */ - l = xmlStrlen(str); + if (len < 0) + l = xmlStrlen(str); + else + for (l = 0;l < len;l++) + if (str[l] == 0) break; if (l < len){ len = l; printf("xmlBufferAdd bad length\n"); } /* CJN 11.18.99 okay, now I'm using the length */ @@ -3676,6 +3926,9 @@ xmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { static void xmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level, int format); +void +htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur); + /** * xmlNodeListDump: * @buf: the XML buffer output @@ -3850,6 +4103,40 @@ xmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level, xmlBufferWriteChar(buf, ">"); } +/** + * xmlElemDump: + * @buf: the XML buffer output + * @doc: the document + * @cur: the current node + * + * Dump an XML/HTML node, recursive behaviour,children are printed too. + */ +void +xmlElemDump(FILE *f, xmlDocPtr doc, xmlNodePtr cur) { + xmlBufferPtr buf; + + if (cur == NULL) { +#ifdef DEBUG_TREE + fprintf(stderr, "xmlElemDump : cur == NULL\n"); +#endif + return; + } + if (doc == NULL) { +#ifdef DEBUG_TREE + fprintf(stderr, "xmlElemDump : doc == NULL\n"); +#endif + } + buf = xmlBufferCreate(); + if (buf == NULL) return; + if ((doc != NULL) && + (doc->type == XML_HTML_DOCUMENT_NODE)) { + htmlNodeDump(buf, doc, cur); + } else + xmlNodeDump(buf, doc, cur, 0, 1); + xmlBufferDump(f, buf); + xmlBufferFree(buf); +} + /** * xmlDocContentDump: * @buf: the XML buffer output @@ -3937,7 +4224,7 @@ xmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { * Returns 0 (uncompressed) to 9 (max compression) */ int - xmlGetDocCompressMode (xmlDocPtr doc) { +xmlGetDocCompressMode (xmlDocPtr doc) { if (doc == NULL) return(-1); return(doc->compression); } diff --git a/tree.h b/tree.h index 3a0285bc..cce61681 100644 --- a/tree.h +++ b/tree.h @@ -526,6 +526,9 @@ void xmlDocDumpMemory (xmlDocPtr cur, int *size); void xmlDocDump (FILE *f, xmlDocPtr cur); +void xmlElemDump (FILE *f, + xmlDocPtr cur, + xmlNodePtr elem); int xmlSaveFile (const char *filename, xmlDocPtr cur); diff --git a/valid.c b/valid.c index 3af1dcd7..f592c182 100644 --- a/valid.c +++ b/valid.c @@ -25,6 +25,8 @@ #include "parser.h" #include "parserInternals.h" +/* TODO: use hash table for accesses to elem and attribute dedinitions */ + #define VERROR \ if ((ctxt != NULL) && (ctxt->error != NULL)) ctxt->error @@ -494,6 +496,7 @@ xmlCopyElementTable(xmlElementTablePtr table) { else cur->name = NULL; cur->content = xmlCopyElementContent(ent->content); + /* TODO : rebuild the attribute list on the copy */ cur->attributes = NULL; } return(ret); @@ -2723,6 +2726,7 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr elem) { xmlElementPtr elemDecl; xmlElementContentPtr cont; + xmlAttributePtr attr; xmlNodePtr child; int ret = 1; const xmlChar *name; @@ -2869,7 +2873,69 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, break; } - /* TODO - [ VC: Required Attribute ] */ + /* [ VC: Required Attribute ] */ + attr = elemDecl->attributes; + while (attr != NULL) { + if (attr->def == XML_ATTRIBUTE_REQUIRED) { + xmlAttrPtr attrib; + int qualified = -1; + + attrib = elem->properties; + while (attrib != NULL) { + if (!xmlStrcmp(attrib->name, attr->name)) { + if (attr->prefix != NULL) { + xmlNsPtr nameSpace = attrib->ns; + + if (nameSpace == NULL) + nameSpace = elem->ns; + /* + * qualified names handling is problematic, having a + * different prefix should be possible but DTDs don't + * allow to define the URI instead of the prefix :-( + */ + if (nameSpace == NULL) { + if (qualified < 0) + qualified = 0; + } else if (xmlStrcmp(nameSpace->prefix, attr->prefix)) { + if (qualified < 1) + qualified = 1; + } else + goto found; + } else { + /* + * We should allow applications to define namespaces + * for their application even if the DTD doesn't + * carry one, otherwise, basically we would always + * break. + */ + goto found; + } + } + attrib = attrib->next; + } + if (qualified == -1) { + if (attr->prefix == NULL) { + VERROR(ctxt->userData, + "Element %s doesn't carry attribute %s\n", + elem->name, attr->name); + } else { + VERROR(ctxt->userData, + "Element %s doesn't carry attribute %s:%s\n", + elem->name, attr->prefix,attr->name); + } + } else if (qualified == 0) { + VWARNING(ctxt->userData, + "Element %s required attribute %s:%s has no prefix\n", + elem->name, attr->prefix,attr->name); + } else if (qualified == 1) { + VWARNING(ctxt->userData, + "Element %s required attribute %s:%s has different prefix\n", + elem->name, attr->prefix,attr->name); + } + } +found: + attr = attr->next; + } return(ret); } diff --git a/xmlIO.c b/xmlIO.c index c7c9a86c..cd3ead09 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -68,6 +68,11 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) { } memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer)); ret->buffer = xmlBufferCreate(); + if (ret->buffer == NULL) { + xmlFree(ret); + return(NULL); + } + ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT; ret->encoder = xmlGetCharEncodingHandler(enc); ret->fd = -1; ret->netIO = NULL; @@ -263,32 +268,30 @@ xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) { */ int xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) { - char *buffer = NULL; int nbchars = 0; if (len < 0) return(0); if (in->encoder != NULL) { - xmlChar *buf; + xmlChar *buffer; - buf = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar)); - if (buf == NULL) { + buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar)); + if (buffer == NULL) { fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n"); xmlFree(buffer); return(-1); } - nbchars = in->encoder->input(buf, (len + 1) * 2 * sizeof(xmlChar), - BAD_CAST buffer, len); + nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar), + (xmlChar *) buf, len); /* * TODO : we really need to have something atomic or the * encoder must report the number of bytes read */ - buf[nbchars] = 0; - xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars); - xmlFree(buf); - } else { - nbchars = len; buffer[nbchars] = 0; xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars); + xmlFree(buffer); + } else { + nbchars = len; + xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars); } #ifdef DEBUG_INPUT fprintf(stderr, "I/O: pushed %d chars, buffer %d/%d\n", @@ -401,7 +404,14 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) { int xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) { /* xmlBufferEmpty(in->buffer); */ - return(xmlParserInputBufferGrow(in, len)); + if ((in->netIO != NULL) || (in->file != NULL) || +#ifdef HAVE_ZLIB_H + (in->gzfile != NULL) || +#endif + (in->fd >= 0)) + return(xmlParserInputBufferGrow(in, len)); + else + return(0); } /* diff --git a/xmlIO.h b/xmlIO.h index 2bdba5d8..bf43de25 100644 --- a/xmlIO.h +++ b/xmlIO.h @@ -37,6 +37,9 @@ typedef xmlParserInputBuffer *xmlParserInputBufferPtr; * Interfaces */ +xmlParserInputBufferPtr + xmlAllocParserInputBuffer (xmlCharEncoding enc); + xmlParserInputBufferPtr xmlParserInputBufferCreateFilename (const char *filename, xmlCharEncoding enc); diff --git a/xmlmemory.c b/xmlmemory.c index 7076f80b..2d46f385 100644 --- a/xmlmemory.c +++ b/xmlmemory.c @@ -368,10 +368,53 @@ xmlMemUsed(void) { return(debugMemSize); } +/** + * xmlMemShow: + * @fp: a FILE descriptor used as the output file + * @nr: number of entries to dump + * + * show a show display of the memory allocated, and dump + * the @nr last allocated areas which were not freed + */ + +void +xmlMemShow(FILE *fp, int nr) +{ +#ifdef MEM_LIST + MEMHDR *p; +#endif + + if (fp != NULL) + fprintf(fp," MEMORY ALLOCATED : %lu, MAX was %lu\n", + debugMemSize, debugMaxMemSize); +#ifdef MEM_LIST + if (nr > 0) { + fprintf(fp,"NUMBER SIZE TYPE WHERE\n"); + p = memlist; + while ((p) && nr > 0) { + fprintf(fp,"%6lu %6u ",p->mh_number,p->mh_size); + switch (p->mh_type) { + case STRDUP_TYPE:fprintf(fp,"strdup() in ");break; + case MALLOC_TYPE:fprintf(fp,"malloc() in ");break; + case REALLOC_TYPE:fprintf(fp,"realloc() in ");break; + default:fprintf(fp," ??? in ");break; + } + if (p->mh_file != NULL) + fprintf(fp,"%s(%d)", p->mh_file, p->mh_line); + if (p->mh_tag != MEMTAG) + fprintf(fp," INVALID"); + fprintf(fp,"\n"); + nr--; + p = p->mh_next; + } + } +#endif /* MEM_LIST */ +} + /** * xmlMemDisplay: * @fp: a FILE descriptor used as the output file, if NULL, the result is - 8 written to the file .memorylist + * written to the file .memorylist * * show in-extenso the memory blocks allocated */ diff --git a/xmlmemory.h b/xmlmemory.h index 5c1b4774..64477a1c 100644 --- a/xmlmemory.h +++ b/xmlmemory.h @@ -1,5 +1,5 @@ /* - * memory.h: interface for the memory allocation debug. + * xmlmemory.h: interface for the memory allocation debug. * * Daniel.Veillard@w3.org */ @@ -24,6 +24,7 @@ #define xmlInitMemory() #define xmlMemoryDump() #define xmlMemDisplay(x) +#define xmlMemShow(x, d) #else /* ! NO_DEBUG_MEMORY */ #include @@ -51,6 +52,7 @@ void xmlFree (void *ptr); char * xmlMemStrdup (const char *str); int xmlMemUsed (void); void xmlMemDisplay (FILE *fp); +void xmlMemShow (FILE *fp, int nr); void xmlMemoryDump (void); int xmlInitMemory (void); diff --git a/xpath.c b/xpath.c index c5ce36af..4882b04f 100644 --- a/xpath.c +++ b/xpath.c @@ -47,6 +47,10 @@ #include "xpath.h" #include "parserInternals.h" +/* #define DEBUG */ +/* #define DEBUG_STEP */ +/* #define DEBUG_EXPR */ + /* * Setup stuff for floating point * The lack of portability of this section of the libc is annoying ! @@ -151,10 +155,6 @@ xmlXPathInit(void) { initialized = 1; } -/* #define DEBUG */ -/* #define DEBUG_STEP */ -/* #define DEBUG_EXPR */ - FILE *xmlXPathDebug = NULL; #define TODO \ @@ -747,6 +747,22 @@ xmlXPathNewNodeSetList(xmlNodeSetPtr val) { return(ret); } +/** + * xmlXPathFreeNodeSetList: + * @obj: an existing NodeSetList object + * + * Free up the xmlXPathObjectPtr @obj but don't deallocate the objects in + * the list contrary to xmlXPathFreeObject(). + */ +void +xmlXPathFreeNodeSetList(xmlXPathObjectPtr obj) { + if (obj == NULL) return; +#ifdef DEBUG + memset(obj, 0xB , (size_t) sizeof(xmlXPathObject)); +#endif + xmlFree(obj); +} + /** * xmlXPathFreeObject: * @obj: the object to free @@ -791,6 +807,12 @@ xmlXPathNewContext(xmlDocPtr doc) { } memset(ret, 0 , (size_t) sizeof(xmlXPathContext)); ret->doc = doc; + /*********** + ret->node = (xmlNodePtr) doc; + ret->nodelist = xmlXPathNodeSetCreate(ret->node); + ***********/ + ret->node = NULL; + ret->nodelist = NULL; ret->nb_variables = 0; ret->max_variables = 0; @@ -825,6 +847,10 @@ xmlXPathFreeContext(xmlXPathContextPtr ctxt) { if (ctxt->namespaces != NULL) xmlFree(ctxt->namespaces); + /*********** + if (ctxt->nodelist != NULL) + xmlXPathFreeNodeSet(ctxt->nodelist); + ***********/ #ifdef DEBUG memset(ctxt, 0xB , (size_t) sizeof(xmlXPathContext)); #endif @@ -1467,12 +1493,13 @@ xmlXPathNextSelf(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { xmlNodePtr xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { if (cur == NULL) { - if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc) - return(ctxt->context->doc->root); + if ((ctxt->context->node->type == XML_DOCUMENT_NODE) || + (ctxt->context->node->type == XML_HTML_DOCUMENT_NODE)) + return(((xmlDocPtr) ctxt->context->node)->root); return(ctxt->context->node->childs); } - if ((ctxt->context->node->type == XML_DOCUMENT_NODE) || - (ctxt->context->node->type == XML_HTML_DOCUMENT_NODE)) + if ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) return(NULL); return(cur->next); } @@ -1918,23 +1945,23 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt, int axis, ctxt->context->nodelist->nodeNr); switch (test) { case NODE_TEST_NONE: - fprintf(xmlXPathDebug, " seaching for none !!!\n"); + fprintf(xmlXPathDebug, " searching for none !!!\n"); break; case NODE_TEST_TYPE: - fprintf(xmlXPathDebug, " seaching for type %d\n", type); + fprintf(xmlXPathDebug, " searching for type %d\n", type); break; case NODE_TEST_PI: - fprintf(xmlXPathDebug, " seaching for PI !!!\n"); + fprintf(xmlXPathDebug, " searching for PI !!!\n"); break; case NODE_TEST_ALL: - fprintf(xmlXPathDebug, " seaching for *\n"); + fprintf(xmlXPathDebug, " searching for *\n"); break; case NODE_TEST_NS: - fprintf(xmlXPathDebug, " seaching for namespace %s\n", + fprintf(xmlXPathDebug, " searching for namespace %s\n", prefix); break; case NODE_TEST_NAME: - fprintf(xmlXPathDebug, " seaching for name %s\n", name); + fprintf(xmlXPathDebug, " searching for name %s\n", name); if (prefix != NULL) fprintf(xmlXPathDebug, " with namespace %s\n", prefix); @@ -1958,7 +1985,10 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt, int axis, STRANGE return(NULL); case NODE_TEST_TYPE: - if (cur->type == type) { + if ((cur->type == type) || + ((type == XML_ELEMENT_NODE) && + ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)))) { #ifdef DEBUG_STEP n++; #endif @@ -4400,6 +4430,7 @@ xmlXPathObjectPtr xmlXPathEval(const xmlChar *str, xmlXPathContextPtr ctxt) { xmlXPathParserContextPtr pctxt; xmlXPathObjectPtr res = NULL, tmp; + int stack = 0; xmlXPathInit(); @@ -4408,16 +4439,26 @@ xmlXPathEval(const xmlChar *str, xmlXPathContextPtr ctxt) { if (xmlXPathDebug == NULL) xmlXPathDebug = stderr; pctxt = xmlXPathNewParserContext(str, ctxt); + if (str[0] == '/') + xmlXPathRoot(pctxt); xmlXPathEvalLocationPath(pctxt); /* TODO: cleanup nodelist, res = valuePop(pctxt); */ do { tmp = valuePop(pctxt); - if (tmp != NULL); + if (tmp != NULL) { xmlXPathFreeObject(tmp); + stack++; + } } while (tmp != NULL); - if (res == NULL) + if (stack != 0) { + fprintf(xmlXPathDebug, "xmlXPathEval: %d object left on the stack\n", + stack); + } + if (pctxt->error == XPATH_EXPRESSION_OK) res = xmlXPathNewNodeSetList(pctxt->context->nodelist); + else + res = NULL; xmlXPathFreeParserContext(pctxt); return(res); } @@ -4436,6 +4477,7 @@ xmlXPathObjectPtr xmlXPathEvalExpression(const xmlChar *str, xmlXPathContextPtr ctxt) { xmlXPathParserContextPtr pctxt; xmlXPathObjectPtr res, tmp; + int stack = 0; xmlXPathInit(); @@ -4449,9 +4491,15 @@ xmlXPathEvalExpression(const xmlChar *str, xmlXPathContextPtr ctxt) { res = valuePop(pctxt); do { tmp = valuePop(pctxt); - if (tmp != NULL); + if (tmp != NULL) { xmlXPathFreeObject(tmp); + stack++; + } } while (tmp != NULL); + if (stack != 0) { + fprintf(xmlXPathDebug, "xmlXPathEval: %d object left on the stack\n", + stack); + } xmlXPathFreeParserContext(pctxt); return(res); } diff --git a/xpath.h b/xpath.h index 149b0beb..84c83052 100644 --- a/xpath.h +++ b/xpath.h @@ -205,6 +205,9 @@ xmlXPathObjectPtr xmlXPathEval (const xmlChar *str, void xmlXPathFreeObject (xmlXPathObjectPtr obj); xmlXPathObjectPtr xmlXPathEvalExpression (const xmlChar *str, xmlXPathContextPtr ctxt); +xmlNodeSetPtr xmlXPathNodeSetCreate (xmlNodePtr val); +void xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj); +void xmlXPathFreeNodeSet (xmlNodeSetPtr obj); #ifdef __cplusplus }