From 260a68fd34302f352aa8f4c2f2901cefa3e1d2f7 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Thu, 13 Aug 1998 03:39:55 +0000 Subject: [PATCH] Release 0.2, 80% rewrite, nothing left intact ... Daniel --- ChangeLog | 8 + Makefile.am | 16 +- Makefile.win | 34 + SAX.c | 223 +++ TODO | 25 +- autogen.sh | 10 +- config.h | 62 + configure.in | 4 +- entities.c | 426 +++++ xml_entities.h => entities.h | 28 +- error.c | 93 + include/libxml/entities.h | 70 + include/libxml/parser.h | 168 ++ include/libxml/tree.h | 180 ++ parser.c | 3334 ++++++++++++++++++++++++++++++++++ parser.h | 168 ++ result/dav1 | 7 +- result/dav10 | 6 +- result/dav11 | 6 +- result/dav12 | 6 +- result/dav13 | 6 +- result/dav14 | 17 - result/dav15 | 7 +- result/dav16 | 6 +- result/dav17 | 6 +- result/dav18 | 6 +- result/dav19 | 6 +- result/dav2 | 7 +- result/dav3 | 7 +- result/dav4 | 7 +- result/dav5 | 7 +- result/dav6 | 6 +- result/dav7 | 6 +- result/dav8 | 6 +- result/dav9 | 6 +- result/dtd1 | 3 + result/ent1 | 7 + result/ent2 | 10 + result/ent3 | 7 + result/p3p | 16 +- result/rdf1 | 81 + test/dav1 | 2 +- test/dav10 | 2 +- test/dav11 | 2 +- test/dav12 | 2 +- test/dav13 | 2 +- test/dav14 | 10 - test/dav15 | 2 +- test/dav16 | 2 +- test/dav17 | 2 +- test/dav18 | 2 +- test/dav19 | 2 +- test/dav2 | 2 +- test/dav3 | 2 +- test/dav4 | 2 +- test/dav5 | 2 +- test/dav6 | 2 +- test/dav7 | 2 +- test/dav8 | 2 +- test/dav9 | 2 +- test/dtd1 | 5 + test/ent1 | 7 + test/ent2 | 11 + test/ent3 | 8 + test/p3p | 1 + test/rdf1 | 83 + tester.c | 78 +- tree.c | 1209 ++++++++++++ tree.h | 180 ++ xml_entities.c | 353 ---- xml_parser.c | 1183 ------------ xml_parser.h | 61 - xml_tree.c | 731 -------- xml_tree.h | 113 -- 74 files changed, 6510 insertions(+), 2663 deletions(-) create mode 100644 Makefile.win create mode 100644 SAX.c create mode 100644 config.h create mode 100644 entities.c rename xml_entities.h => entities.h (50%) create mode 100644 error.c create mode 100644 include/libxml/entities.h create mode 100644 include/libxml/parser.h create mode 100644 include/libxml/tree.h create mode 100644 parser.c create mode 100644 parser.h delete mode 100644 result/dav14 create mode 100644 result/dtd1 create mode 100644 result/ent1 create mode 100644 result/ent2 create mode 100644 result/ent3 create mode 100644 result/rdf1 delete mode 100644 test/dav14 create mode 100644 test/dtd1 create mode 100644 test/ent1 create mode 100644 test/ent2 create mode 100644 test/ent3 create mode 100644 test/rdf1 create mode 100644 tree.c create mode 100644 tree.h delete mode 100644 xml_entities.c delete mode 100644 xml_parser.c delete mode 100644 xml_parser.h delete mode 100644 xml_tree.c delete mode 100644 xml_tree.h diff --git a/ChangeLog b/ChangeLog index 811b39c5..d31661ed 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Wed Aug 12 23:12:58 EDT 1998 Daniel Veillard + + * New release 0.2, removed the old xml_* files so that it's + coherent with the other CVS base (W3C), far better conformance + to standard, new namespaces, decent entities support, beginning + of a SAX-like interface. Nearly nothing left intact, even the + test examples ... + 1998-07-30 Christopher Blizzard * .cvsignore: Add .deps dir diff --git a/Makefile.am b/Makefile.am index 02a6bd89..bbc78de3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,14 +5,18 @@ noinst_PROGRAMS=tester lib_LTLIBRARIES = libxml.la libxml_la_SOURCES = \ - xml_entities.c \ - xml_parser.c \ - xml_tree.c + SAX.c \ + entities.c \ + error.c \ + parser.c \ + tester.c \ + tree.c + include_HEADERS = \ - xml_entities.h \ - xml_parser.h \ - xml_tree.h + entities.h \ + parser.h \ + tree.h DEPS = $(top_builddir)/libxml.la LDADDS = $(top_builddir)/libxml.la @Z_LIBS@ diff --git a/Makefile.win b/Makefile.win new file mode 100644 index 00000000..0d36057d --- /dev/null +++ b/Makefile.win @@ -0,0 +1,34 @@ +# This is a makefile for win32 systems (VC 5.0). +# Christopher Blizzard +# http://odin.appliedtheory.com/ + +CC = cl +CFLAGS = /c /GB /Gi /nologo /I. /DWIN32 /MT /Zi + +LD = link +LDFLAGS = /DEBUG /NODEFAULTLIB:libc + +AR = lib + +all: xml.lib + +test: tester.exe + +SHARED_OBJS = entities.obj parser.obj tree.obj SAX.obj + +xml.lib: $(SHARED_OBJS) + $(AR) /out:xml.lib $(SHARED_OBJS) + +tester.obj: $(SHARED_OBJS) + $(CC) $(CFLAGS) tester.c /out:tester.obj + +tester.exe: tester.obj xml.lib + $(LD) $(LDFLAGS) /out:tester.exe tester.obj xml.lib + +clean: + -del /f $(SHARED_OBJS) tester.obj + -del /f tester.exe + -del /f xml.lib + -del /f *.pdb + -del /f *.idb + -del /f *.ilk diff --git a/SAX.c b/SAX.c new file mode 100644 index 00000000..17a8277a --- /dev/null +++ b/SAX.c @@ -0,0 +1,223 @@ +/* + * SAX.c : Default SAX handler to build a tree. + */ + +#include +#include +#include "tree.h" +#include "parser.h" +#include "error.h" + +/* #define DEBUG_SAX */ + +/* + * Return the public ID e.g. "-//SGMLSOURCE//DTD DEMO//EN" + */ +const CHAR *getPublicId(xmlParserCtxtPtr ctxt) { + return(NULL); +} + +/* + * Return the system ID, basically URI or filename e.g. + * http://www.sgmlsource.com/dtds/memo.dtd + */ +const CHAR *getSystemId(xmlParserCtxtPtr ctxt) { + return(ctxt->input->filename); +} + +/* + * Return the line number of the current parsing point. + */ +int getLineNumber(xmlParserCtxtPtr ctxt) { + return(ctxt->input->line); +} +/* + * Return the column number of the current parsing point. + */ +int getColumnNumber(xmlParserCtxtPtr ctxt) { + return(ctxt->input->col); +} + +/* + * The default SAX Locator. + */ + +xmlSAXLocator xmlDefaultSAXLocator = { + getPublicId, getSystemId, getLineNumber, getColumnNumber +}; + +/* + * Special entity resolver, better left to the parser, it has + * more context than the application layer. + */ +xmlParserInputPtr resolveEntity(xmlParserCtxtPtr ctxt, + const CHAR *publicId, const CHAR *systemId) { + +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.resolveEntity(%s, %s)\n", publicId, systemId); +#endif + return(NULL); +} + +/* + * What to do when a notation declaration has been parsed. + * TODO Not handled currently. + */ +void notationDecl(xmlParserCtxtPtr ctxt, const CHAR *name, + const CHAR *publicId, const CHAR *systemId) { +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId); +#endif +} + +/* + * What to do when an unparsed entity declaration is parsed + * TODO Create an Entity node. + */ +void unparsedEntityDecl(xmlParserCtxtPtr ctxt, const CHAR *name, + const CHAR *publicId, const CHAR *systemId, + const CHAR *notationName) { +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", + name, publicId, systemId, notationName); +#endif +} + +/* + * Receive the document locator at startup, actually xmlDefaultSAXLocator + * Everything is available on the context, so this is useless in our case. + */ +void setDocumentLocator(xmlParserCtxtPtr ctxt, xmlSAXLocatorPtr loc) { +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.setDocumentLocator()\n"); +#endif +} + +/* + * called when the document start being processed. + */ +void startDocument(xmlParserCtxtPtr ctxt) { +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.startDocument()\n"); +#endif +} + +/* + * called when the document end has been detected. + */ +void endDocument(xmlParserCtxtPtr ctxt) { +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.endDocument()\n"); +#endif +} + +/* + * called when an opening tag has been processed. + * TODO We currently have a small pblm with the arguments ... + */ +void startElement(xmlParserCtxtPtr ctxt, const CHAR *name) { + xmlNodePtr parent; + +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.startElement(%s)\n", name); +#endif + if (ctxt->nodeNr < 2) return; + parent = ctxt->nodeTab[ctxt->nodeNr - 2]; + if (parent != NULL) + xmlAddChild(parent, ctxt->node); + +} + +/* + * called when the end of an element has been detected. + */ +void endElement(xmlParserCtxtPtr ctxt, const CHAR *name) { +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.endElement(%s)\n", name); +#endif +} + +/* + * receiving some chars from the parser. + * Question: how much at a time ??? + */ +void characters(xmlParserCtxtPtr ctxt, const CHAR *ch, + int start, int len) { + xmlNodePtr lastChild; + +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.characters(%.30s, %d, %d)\n", ch, start, len); +#endif + /* + * Handle the data if any. If there is no child + * add it as content, otherwise if the last child is text, + * concatenate it, else create a new node of type text. + */ + + lastChild = xmlGetLastChild(ctxt->node); + if (lastChild == NULL) + xmlNodeAddContentLen(ctxt->node, &ch[start], len); + else { + if (xmlNodeIsText(lastChild)) + xmlTextConcat(lastChild, &ch[start], len); + else { + lastChild = xmlNewTextLen(&ch[start], len); + xmlAddChild(ctxt->node, lastChild); + } + } +} + +/* + * receiving some ignorable whitespaces from the parser. + * Question: how much at a time ??? + */ +void ignorableWhitespace(xmlParserCtxtPtr ctxt, const CHAR *ch, + int start, int len) { +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.ignorableWhitespace(%.30s, %d, %d)\n", ch, start, len); +#endif +} + +/* + * A processing instruction has beem parsed. + */ +void processingInstruction(xmlParserCtxtPtr ctxt, const CHAR *target, + const CHAR *data) { +#ifdef DEBUG_SAX + fprintf(stderr, "SAX.processingInstruction(%s, %s)\n", target, data); +#endif +} + +xmlSAXHandler xmlDefaultSAXHandler = { + resolveEntity, + notationDecl, + unparsedEntityDecl, + setDocumentLocator, + startDocument, + endDocument, + startElement, + endElement, + characters, + ignorableWhitespace, + processingInstruction, + xmlParserWarning, + xmlParserError, + xmlParserError, +}; + +void xmlDefaultSAXHandlerInit(void) { + xmlDefaultSAXHandler.resolveEntity = resolveEntity; + xmlDefaultSAXHandler.notationDecl = notationDecl; + xmlDefaultSAXHandler.unparsedEntityDecl = unparsedEntityDecl; + xmlDefaultSAXHandler.setDocumentLocator = setDocumentLocator; + xmlDefaultSAXHandler.startDocument = startDocument; + xmlDefaultSAXHandler.endDocument = endDocument; + xmlDefaultSAXHandler.startElement = startElement; + xmlDefaultSAXHandler.endElement = endElement; + xmlDefaultSAXHandler.characters = characters; + xmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace; + xmlDefaultSAXHandler.processingInstruction = processingInstruction; + xmlDefaultSAXHandler.warning = xmlParserWarning; + xmlDefaultSAXHandler.error = xmlParserError; + xmlDefaultSAXHandler.fatalError = xmlParserError; +} diff --git a/TODO b/TODO index 8fd623c5..d8218bc0 100644 --- a/TODO +++ b/TODO @@ -1,15 +1,11 @@ TODO for the XML parser: -- Support for UTF-8 encoding -- progressive parsing. Currently the parser uses a single - string containing the full document. The good point is - that there is no context associated with the parser, the - full state is in the stack. The bad point is that such a - recursive design is hard to make progressive ... -- Better error handling, use a dedicated, overridable error - handling function. -- Keep track of line numbers for better error reporting. +- Support for UTF-8 and UTF-16 encoding (Urgent !!!). +- progressive parsing. The entity support is a first step toward + asbtraction of an input stream. A large part of the context is still + located on the stack, moving to a state machine and putting everyting + in the parsing context should provide an adequate solution. - DOM support, instead of using a proprietary in memory format for the document representation, the parser should call a DOM API to actually build the resulting document. @@ -17,14 +13,17 @@ representation of the document. Even better using RPC's the parser can actually build the document in another program. -- finish the support for Entities. - Support for Comments (bad, should be in ASAP, they are parsed - but not stored). -- Support for PI. -- Support for CDATA. + but not stored), should be configurable. +- Improve the support of entities on save (+SAX). Done: - C++ support : John Ehresman - Updated code to follow more recent specs, added compatibility flag +- Better error handling, use a dedicated, overridable error + handling function. +- Support for CDATA. +- Keep track of line numbers for better error reporting. +- Support for PI (SAX one). $Id$ diff --git a/autogen.sh b/autogen.sh index 0119b5c5..69f0382f 100755 --- a/autogen.sh +++ b/autogen.sh @@ -5,7 +5,7 @@ DIE=0 (autoconf --version) < /dev/null > /dev/null 2>&1 || { echo - echo "You must have autoconf installed to compile GLIB." + echo "You must have autoconf installed to compile gnome-xml." echo "Download the appropriate package for your distribution," echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/" DIE=1 @@ -13,7 +13,7 @@ DIE=0 (libtool --version) < /dev/null > /dev/null 2>&1 || { echo - echo "You must have libtool installed to compile GLIB." + echo "You must have libtool installed to compile gnome-xml." echo "Get ftp://alpha.gnu.org/gnu/libtool-1.0h.tar.gz" echo "(or a newer version if it is available)" DIE=1 @@ -21,7 +21,7 @@ DIE=0 (automake --version) < /dev/null > /dev/null 2>&1 || { echo - echo "You must have automake installed to compile GLIB." + echo "You must have automake installed to compile gnome-xml." echo "Get ftp://ftp.cygnus.com/pub/home/tromey/automake-1.2d.tar.gz" echo "(or a newer version if it is available)" DIE=1 @@ -31,8 +31,8 @@ if test "$DIE" -eq 1; then exit 1 fi -test -f xml_entities.h || { - echo "You must run this script in the top-level GLIB directory" +test -f entities.h || { + echo "You must run this script in the top-level gnome-xml directory" exit 1 } diff --git a/config.h b/config.h new file mode 100644 index 00000000..758ca6dd --- /dev/null +++ b/config.h @@ -0,0 +1,62 @@ +/* config.h. Generated automatically by configure. */ +/* config.h.in. Generated automatically from configure.in by autoheader. */ + +/* Define if you have the strftime function. */ +#define HAVE_STRFTIME 1 + +/* Define if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Define if you have the snprintf function. */ +#define HAVE_SNPRINTF 1 + +/* Define if you have the strdup function. */ +#define HAVE_STRDUP 1 + +/* Define if you have the strerror function. */ +#define HAVE_STRERROR 1 + +/* Define if you have the strndup function. */ +#define HAVE_STRNDUP 1 + +/* Define if you have the header file. */ +#define HAVE_CTYPE_H 1 + +/* Define if you have the header file. */ +#define HAVE_DIRENT_H 1 + +/* Define if you have the header file. */ +#define HAVE_ERRNO_H 1 + +/* Define if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define if you have the header file. */ +#define HAVE_MALLOC_H 1 + +/* Define if you have the header file. */ +/* #undef HAVE_NDIR_H */ + +/* Define if you have the header file. */ +#define HAVE_STDARG_H 1 + +/* Define if you have the header file. */ +/* #undef HAVE_SYS_DIR_H */ + +/* Define if you have the header file. */ +/* #undef HAVE_SYS_NDIR_H */ + +/* Define if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define if you have the header file. */ +#define HAVE_ZLIB_H 1 diff --git a/configure.in b/configure.in index 285c00c4..4048bb92 100644 --- a/configure.in +++ b/configure.in @@ -1,8 +1,8 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.2) -AC_INIT(xml_entities.h) +AC_INIT(entities.h) AM_CONFIG_HEADER(config.h) -AM_INIT_AUTOMAKE(libxml, 0.10) +AM_INIT_AUTOMAKE(libxml, 0.20) dnl Checks for programs. AC_PROG_CC diff --git a/entities.c b/entities.c new file mode 100644 index 00000000..47b7cfa1 --- /dev/null +++ b/entities.c @@ -0,0 +1,426 @@ +/* + * entities.c : implementation for the XML entities handking + * + * See Copyright for the status of this software. + * + * $Id$ + */ + +#include +#include +#include +#include "entities.h" + +/* + * The XML predefined entities. + */ + +struct xmlPredefinedEntityValue { + const char *name; + const char *value; +}; +struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = { + { "lt", "<" }, + { "gt", ">" }, + { "apos", "'" }, + { "quot", "\"" }, + { "amp", "&" } +}; + +xmlEntitiesTablePtr xmlPredefinedEntities = NULL; + +/* + * A buffer used for converting entities to their equivalent and back. + */ +static int buffer_size = 0; +static CHAR *buffer = NULL; + +void growBuffer(void) { + buffer_size *= 2; + buffer = (CHAR *) realloc(buffer, buffer_size * sizeof(CHAR)); + if (buffer == NULL) { + perror("realloc failed"); + exit(1); + } +} + +/* + * xmlFreeEntity : clean-up an entity record. + */ + +void xmlFreeEntity(xmlEntityPtr entity) { + if (entity == NULL) return; + + if (entity->name != NULL) + free((char *) entity->name); + if (entity->ExternalID != NULL) + free((char *) entity->ExternalID); + if (entity->SystemID != NULL) + free((char *) entity->SystemID); + if (entity->content != NULL) + free((char *) entity->content); + memset(entity, -1, sizeof(xmlEntity)); +} + +/* + * xmlAddDocEntity : register a new entity for an entities table. + * + * TODO !!! We should check here that the combination of type + * ExternalID and SystemID is valid. + */ +static void xmlAddEntity(xmlEntitiesTablePtr table, const CHAR *name, int type, + const CHAR *ExternalID, const CHAR *SystemID, CHAR *content) { + int i; + xmlEntityPtr cur; + int len; + + for (i = 0;i < table->nb_entities;i++) { + cur = &table->table[i]; + if (!xmlStrcmp(cur->name, name)) { + /* + * The entity is already defined in this Dtd, the spec says to NOT + * override it ... Is it worth a Warning ??? !!! + */ + return; + } + } + if (table->nb_entities >= table->max_entities) { + /* + * need more elements. + */ + table->max_entities *= 2; + table->table = (xmlEntityPtr) + realloc(table->table, table->max_entities * sizeof(xmlEntity)); + if (table->table) { + perror("realloc failed"); + exit(1); + } + } + cur = &table->table[table->nb_entities]; + cur->name = xmlStrdup(name); + for (len = 0;name[0] != 0;name++)len++; + cur->len = len; + cur->type = type; + if (ExternalID != NULL) + cur->ExternalID = xmlStrdup(ExternalID); + else + cur->ExternalID = NULL; + if (SystemID != NULL) + cur->SystemID = xmlStrdup(SystemID); + else + cur->SystemID = NULL; + if (content != NULL) + cur->content = xmlStrdup(content); + else + cur->content = NULL; + table->nb_entities++; +} + +/* + * Set up xmlPredefinedEntities from xmlPredefinedEntityValues. + */ +void xmlInitializePredefinedEntities(void) { + int i; + CHAR name[50]; + CHAR value[50]; + const char *in; + CHAR *out; + + if (xmlPredefinedEntities != NULL) return; + + xmlPredefinedEntities = xmlCreateEntitiesTable(); + for (i = 0;i < sizeof(xmlPredefinedEntityValues) / + sizeof(xmlPredefinedEntityValues[0]);i++) { + in = xmlPredefinedEntityValues[i].name; + out = &name[0]; + for (;(*out++ = (CHAR) *in);)in++; + in = xmlPredefinedEntityValues[i].value; + out = &value[0]; + for (;(*out++ = (CHAR) *in);)in++; + xmlAddEntity(xmlPredefinedEntities, (const CHAR *) &name[0], + XML_INTERNAL_GENERAL_ENTITY, NULL, NULL, + &value[0]); + } +} + + +/* + * xmlAddDtdEntity : register a new entity for this DTD. + */ +void xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type, + const CHAR *ExternalID, const CHAR *SystemID, CHAR *content) { + xmlEntitiesTablePtr table; + + if (doc->dtd == NULL) { + fprintf(stderr, "xmlAddDtdEntity: document without Dtd !\n"); + return; + } + table = (xmlEntitiesTablePtr) doc->dtd->entities; + if (table == NULL) { + table = xmlCreateEntitiesTable(); + doc->dtd->entities = table; + } + xmlAddEntity(table, name, type, ExternalID, SystemID, content); +} + +/* + * xmlAddDocEntity : register a new entity for this document. + */ +void xmlAddDocEntity(xmlDocPtr doc, const CHAR *name, int type, + const CHAR *ExternalID, const CHAR *SystemID, CHAR *content) { + xmlEntitiesTablePtr table; + + table = (xmlEntitiesTablePtr) doc->entities; + if (table == NULL) { + table = xmlCreateEntitiesTable(); + doc->entities = table; + } + xmlAddEntity(doc->entities, name, type, ExternalID, SystemID, content); +} + +/* + * xmlGetDtdEntity : do an entity lookup in the Dtd entity hash table and + * returns the corrsponding entity, if found, NULL otherwise. + */ +xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name) { + int i; + xmlEntityPtr cur; + xmlEntitiesTablePtr table; + + if ((doc->dtd != NULL) && (doc->dtd->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->dtd->entities; + for (i = 0;i < table->nb_entities;i++) { + cur = &table->table[i]; + if (!xmlStrcmp(cur->name, name)) return(cur); + } + } + return(NULL); +} + +/* + * xmlGetDocEntity : do an entity lookup in the document entity hash table and + * returns the corrsponding entity, otherwise a lookup is done + * in the predefined entities too. + */ +xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) { + int i; + xmlEntityPtr cur; + xmlEntitiesTablePtr table; + + if (doc->entities != NULL) { + table = (xmlEntitiesTablePtr) doc->entities; + for (i = 0;i < table->nb_entities;i++) { + cur = &table->table[i]; + if (!xmlStrcmp(cur->name, name)) return(cur); + } + } + if (xmlPredefinedEntities == NULL) + xmlInitializePredefinedEntities(); + table = xmlPredefinedEntities; + for (i = 0;i < table->nb_entities;i++) { + cur = &table->table[i]; + if (!xmlStrcmp(cur->name, name)) return(cur); + } + + return(NULL); +} + +/* + * xmlEncodeEntities : do a global encoding of a string, replacing the + * basic content with their entities form. + * TODO !!!! rewite !!! + */ +CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) { + const CHAR *cur = input; + CHAR *out = buffer; + + if (buffer == NULL) { + buffer_size = 1000; + buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR)); + if (buffer == NULL) { + perror("malloc failed"); + exit(1); + } + out = buffer; + } + while (*cur != '\0') { + if (out - buffer > buffer_size - 100) { + int index = out - buffer; + + growBuffer(); + out = &buffer[index]; + } + + /* + * By default one have to encode at least '<', '>', '"' and '&' ! + * One could try a better encoding using the entities defined and + * used as a compression code !!!. + */ + if (*cur == '<') { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '>') { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '&') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if (*cur == '"') { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '\'') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'p'; + *out++ = 'o'; + *out++ = 's'; + *out++ = ';'; + } else { + /* + * default case, just copy ! + */ + *out++ = *cur; + } + cur++; + } + *out++ = 0; + return(buffer); +} + +/* + * xmlCreateEntitiesTable : create and initialize an enmpty hash table + */ +xmlEntitiesTablePtr xmlCreateEntitiesTable(void) { + xmlEntitiesTablePtr ret; + + ret = (xmlEntitiesTablePtr) + malloc(sizeof(xmlEntitiesTable)); + if (ret == NULL) { + fprintf(stderr, "xmlCreateEntitiesTable : malloc(%d) failed\n", + sizeof(xmlEntitiesTable)); + return(NULL); + } + ret->max_entities = XML_MIN_ENTITIES_TABLE; + ret->nb_entities = 0; + ret->table = (xmlEntityPtr ) + malloc(ret->max_entities * sizeof(xmlEntity)); + if (ret == NULL) { + fprintf(stderr, "xmlCreateEntitiesTable : malloc(%d) failed\n", + ret->max_entities * sizeof(xmlEntity)); + free(ret); + return(NULL); + } + return(ret); +} + +/* + * xmlFreeEntitiesTable : clean up and free an entities hash table. + */ +void xmlFreeEntitiesTable(xmlEntitiesTablePtr table) { + int i; + + if (table == NULL) return; + + for (i = 0;i < table->nb_entities;i++) { + xmlFreeEntity(&table->table[i]); + } + free(table->table); + free(table); +} + +/* + * Dump the content of an entity table to the document output. + */ +void xmlDumpEntitiesTable(xmlEntitiesTablePtr table) { + int i; + xmlEntityPtr cur; + + if (table == NULL) return; + + for (i = 0;i < table->nb_entities;i++) { + cur = &table->table[i]; + switch (cur->type) { + case XML_INTERNAL_GENERAL_ENTITY: + xmlBufferWriteChar("name); + xmlBufferWriteChar(" \""); + xmlBufferWriteCHAR(cur->content); + xmlBufferWriteChar("\">\n"); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + xmlBufferWriteChar("name); + if (cur->ExternalID != NULL) { + xmlBufferWriteChar(" PUBLIC \""); + xmlBufferWriteCHAR(cur->ExternalID); + xmlBufferWriteChar("\" \""); + xmlBufferWriteCHAR(cur->SystemID); + xmlBufferWriteChar("\""); + } else { + xmlBufferWriteChar(" SYSTEM \""); + xmlBufferWriteCHAR(cur->SystemID); + xmlBufferWriteChar("\""); + } + xmlBufferWriteChar(">\n"); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + xmlBufferWriteChar("name); + if (cur->ExternalID != NULL) { + xmlBufferWriteChar(" PUBLIC \""); + xmlBufferWriteCHAR(cur->ExternalID); + xmlBufferWriteChar("\" \""); + xmlBufferWriteCHAR(cur->SystemID); + xmlBufferWriteChar("\""); + } else { + xmlBufferWriteChar(" SYSTEM \""); + xmlBufferWriteCHAR(cur->SystemID); + xmlBufferWriteChar("\""); + } + if (cur->content != NULL) { /* Should be true ! */ + xmlBufferWriteChar(" NDATA "); + xmlBufferWriteCHAR(cur->content); + } + xmlBufferWriteChar(">\n"); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + xmlBufferWriteChar("name); + xmlBufferWriteChar(" \""); + xmlBufferWriteCHAR(cur->content); + xmlBufferWriteChar("\">\n"); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + xmlBufferWriteChar("name); + if (cur->ExternalID != NULL) { + xmlBufferWriteChar(" PUBLIC \""); + xmlBufferWriteCHAR(cur->ExternalID); + xmlBufferWriteChar("\" \""); + xmlBufferWriteCHAR(cur->SystemID); + xmlBufferWriteChar("\""); + } else { + xmlBufferWriteChar(" SYSTEM \""); + xmlBufferWriteCHAR(cur->SystemID); + xmlBufferWriteChar("\""); + } + xmlBufferWriteChar(">\n"); + break; + default: + fprintf(stderr, + "xmlDumpEntitiesTable: internal: unknown type %d\n", + cur->type); + } + } +} diff --git a/xml_entities.h b/entities.h similarity index 50% rename from xml_entities.h rename to entities.h index a0b24ac2..0065245c 100644 --- a/xml_entities.h +++ b/entities.h @@ -8,21 +8,31 @@ #ifndef __XML_ENTITIES_H__ #define __XML_ENTITIES_H__ -#include "xml_parser.h" +#include "parser.h" #ifdef __cplusplus extern "C" { #endif +#define XML_INTERNAL_GENERAL_ENTITY 1 +#define XML_EXTERNAL_GENERAL_PARSED_ENTITY 2 +#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3 +#define XML_INTERNAL_PARAMETER_ENTITY 4 +#define XML_EXTERNAL_PARAMETER_ENTITY 5 + /* * An unit of storage for an entity, contains the string, the value * and the linkind data needed for the linking in the hash table. */ typedef struct xmlEntity { - const CHAR *id; /* The entity name */ - CHAR *value; /* The entity CHAR equivalent */ + int type; /* The entity type */ + int len; /* The lenght of the name */ + const CHAR *name; /* Name of the entity */ + const CHAR *ExternalID; /* External identifier for PUBLIC Entity */ + const CHAR *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ + CHAR *content; /* The entity content or ndata if unparsed */ } xmlEntity, *xmlEntityPtr; /* @@ -42,14 +52,16 @@ typedef struct xmlEntitiesTable { * External functions : */ -extern void xmlAddDocEntity(xmlDocPtr doc, CHAR *value, const CHAR *id); -extern void xmlAddDtdEntity(xmlDtdPtr dtd, CHAR *value, const CHAR *id); -extern CHAR *xmlGetEntity(xmlDocPtr doc, const CHAR *id); -extern CHAR *xmlSubstituteEntities(xmlDocPtr doc, const CHAR *input); +extern void xmlAddDocEntity(xmlDocPtr doc, const CHAR *name, int type, + const CHAR *ExternalID, const CHAR *SystemID, CHAR *content); +extern void xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type, + const CHAR *ExternalID, const CHAR *SystemID, CHAR *content); +extern xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name); +extern xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name); extern CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input); -extern CHAR *xmlDecodeEntities(xmlDocPtr doc, const CHAR *input, int len); extern xmlEntitiesTablePtr xmlCreateEntitiesTable(void); extern void xmlFreeEntitiesTable(xmlEntitiesTablePtr table); +extern void xmlDumpEntitiesTable(xmlEntitiesTablePtr table); #ifdef __cplusplus } diff --git a/error.c b/error.c new file mode 100644 index 00000000..4c4bac5b --- /dev/null +++ b/error.c @@ -0,0 +1,93 @@ +/* + * error.c: module displaying errors + */ + +#include +#include +#include "parser.h" + +/* + * Display and format error messages. + */ +void xmlParserError(xmlParserCtxtPtr ctxt, const char *msg, ...) { + const CHAR *cur, *base; + va_list args; + int n; + + va_start(args, msg); + if (ctxt->input->filename) + fprintf(stderr, "%s:%d: ", ctxt->input->filename, + ctxt->input->line); + else + fprintf(stderr, "line %d: ", ctxt->input->line); + + fprintf(stderr, "error: "); + vfprintf(stderr, msg, args); + va_end(ap); + cur = ctxt->input->cur; + base = ctxt->input->base; + while ((*cur == '\n') || (*cur == '\r')) { + cur--; + base--; + } + n = 0; + while ((n++ < 60) && (cur >= base) && (*cur != '\n') && (*cur != '\r')) + cur--; + if ((*cur == '\n') || (*cur == '\r')) cur++; + base = cur; + n = 0; + while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) { + fprintf(stderr, "%c", (unsigned char) *cur++); + n++; + } + fprintf(stderr, "\n"); + cur = ctxt->input->cur; + while ((*cur == '\n') || (*cur == '\r')) + cur--; + n = 0; + while ((cur != base) && (n++ < 60)) { + fprintf(stderr, " "); + base++; + } + fprintf(stderr,"^\n"); +} + +/* + * Display and format error messages. + */ +void xmlParserWarning(xmlParserCtxtPtr ctxt, const char *msg, ...) { + const CHAR *cur, *base; + va_list args; + int n; + + va_start(args, msg); + if (ctxt->input->filename) + fprintf(stderr, "%s:%d: ", ctxt->input->filename, + ctxt->input->line); + else + fprintf(stderr, "line %d: ", ctxt->input->line); + + fprintf(stderr, "warning: "); + vfprintf(stderr, msg, args); + va_end(ap); + cur = ctxt->input->cur; + base = ctxt->input->base; + n = 0; + while ((n++ < 60) && (cur >= base) && (*cur != '\n') && (*cur != '\r')) + cur--; + if ((*cur != '\n') || (*cur != '\r')) cur++; + base = cur; + n = 0; + while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) { + fprintf(stderr, "%c", (unsigned char) *cur++); + n++; + } + fprintf(stderr, "\n"); + cur = ctxt->input->cur; + n = 0; + while ((cur != base) && (n++ < 60)) { + fprintf(stderr, " "); + base++; + } + fprintf(stderr,"^\n"); +} diff --git a/include/libxml/entities.h b/include/libxml/entities.h new file mode 100644 index 00000000..0065245c --- /dev/null +++ b/include/libxml/entities.h @@ -0,0 +1,70 @@ +/* + * entities.h : interface for the XML entities handking + * + * See Copyright for the status of this software. + * + * $Id$ + */ + +#ifndef __XML_ENTITIES_H__ +#define __XML_ENTITIES_H__ +#include "parser.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +#define XML_INTERNAL_GENERAL_ENTITY 1 +#define XML_EXTERNAL_GENERAL_PARSED_ENTITY 2 +#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3 +#define XML_INTERNAL_PARAMETER_ENTITY 4 +#define XML_EXTERNAL_PARAMETER_ENTITY 5 + +/* + * An unit of storage for an entity, contains the string, the value + * and the linkind data needed for the linking in the hash table. + */ + +typedef struct xmlEntity { + int type; /* The entity type */ + int len; /* The lenght of the name */ + const CHAR *name; /* Name of the entity */ + const CHAR *ExternalID; /* External identifier for PUBLIC Entity */ + const CHAR *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ + CHAR *content; /* The entity content or ndata if unparsed */ +} xmlEntity, *xmlEntityPtr; + +/* + * ALl entities are stored in a table there is one table per DTD + * and one extra per document. + */ + +#define XML_MIN_ENTITIES_TABLE 32 + +typedef struct xmlEntitiesTable { + int nb_entities; /* number of elements stored */ + int max_entities; /* maximum number of elements */ + xmlEntityPtr table; /* the table of entities */ +} xmlEntitiesTable, *xmlEntitiesTablePtr; + +/* + * External functions : + */ + +extern void xmlAddDocEntity(xmlDocPtr doc, const CHAR *name, int type, + const CHAR *ExternalID, const CHAR *SystemID, CHAR *content); +extern void xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type, + const CHAR *ExternalID, const CHAR *SystemID, CHAR *content); +extern xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name); +extern xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name); +extern CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input); +extern xmlEntitiesTablePtr xmlCreateEntitiesTable(void); +extern void xmlFreeEntitiesTable(xmlEntitiesTablePtr table); +extern void xmlDumpEntitiesTable(xmlEntitiesTablePtr table); + +#ifdef __cplusplus +} +#endif + +# endif /* __XML_ENTITIES_H__ */ diff --git a/include/libxml/parser.h b/include/libxml/parser.h new file mode 100644 index 00000000..a25e70af --- /dev/null +++ b/include/libxml/parser.h @@ -0,0 +1,168 @@ +/* + * parser.h : constants and stuff related to the XML parser. + * + * See Copyright for the status of this software. + * + * $Id$ + */ + +#ifndef __XML_PARSER_H__ +#define __XML_PARSER_H__ + +#include "tree.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Constants. + */ +#define XML_DEFAULT_VERSION "1.0" + +typedef struct xmlParserInput { + const char *filename; /* The file analyzed, if any */ + const CHAR *base; /* Base of the array to parse */ + const CHAR *cur; /* Current char being parsed */ + int line; /* Current line */ + int col; /* Current column */ +} xmlParserInput, *xmlParserInputPtr; + +typedef struct xmlParserNodeInfo { + const struct xmlNode* node; + /* Position & line # that text that created the node begins & ends on */ + unsigned long begin_pos; + unsigned long begin_line; + unsigned long end_pos; + unsigned long end_line; +} xmlParserNodeInfo; + +typedef struct xmlParserNodeInfoSeq { + unsigned long maximum; + unsigned long length; + xmlParserNodeInfo* buffer; +} xmlParserNodeInfoSeq, *xmlParserNodeInfoSeqPtr; + +typedef struct xmlParserCtxt { + struct xmlSAXHandler *sax; /* The SAX handler */ + xmlDocPtr doc; /* the document being built */ + + /* Input stream stack */ + xmlParserInputPtr input; /* Current input stream */ + int inputNr; /* Number of current input streams */ + int inputMax; /* Max number of input streams */ + xmlParserInputPtr *inputTab; /* stack of inputs */ + + /* Node analysis stack */ + xmlNodePtr node; /* Current parsed Node */ + int nodeNr; /* Depth of the parsing stack */ + int nodeMax; /* Max depth of the parsing stack */ + xmlNodePtr *nodeTab; /* array of nodes */ + + int record_info; /* Whether node info should be kept */ + xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ +} xmlParserCtxt, *xmlParserCtxtPtr; + +/* + * a SAX Locator. + */ + +typedef struct xmlSAXLocator { + const CHAR *(*getPublicId)(xmlParserCtxtPtr ctxt); + const CHAR *(*getSystemId)(xmlParserCtxtPtr ctxt); + int (*getLineNumber)(xmlParserCtxtPtr ctxt); + int (*getColumnNumber)(xmlParserCtxtPtr ctxt); +} xmlSAXLocator, *xmlSAXLocatorPtr; + +/* + * a SAX Exception. + */ + +typedef xmlParserInputPtr (*resolveEntitySAXFunc) (xmlParserCtxtPtr ctxt, + const CHAR *publicId, const CHAR *systemId); +typedef void (*notationDeclSAXFunc)(xmlParserCtxtPtr ctxt, const CHAR *name, + const CHAR *publicId, const CHAR *systemId); +typedef void (*unparsedEntityDeclSAXFunc)(xmlParserCtxtPtr ctxt, + const CHAR *name, const CHAR *publicId, + const CHAR *systemId, const CHAR *notationName); +typedef void (*setDocumentLocatorSAXFunc) (xmlParserCtxtPtr ctxt, + xmlSAXLocatorPtr loc); +typedef void (*startDocumentSAXFunc) (xmlParserCtxtPtr ctxt); +typedef void (*endDocumentSAXFunc) (xmlParserCtxtPtr ctxt); +typedef void (*startElementSAXFunc) (xmlParserCtxtPtr ctxt, const CHAR *name); +typedef void (*endElementSAXFunc) (xmlParserCtxtPtr ctxt, const CHAR *name); +typedef void (*charactersSAXFunc) (xmlParserCtxtPtr ctxt, const CHAR *ch, + int start, int len); +typedef void (*ignorableWhitespaceSAXFunc) (xmlParserCtxtPtr ctxt, + const CHAR *ch, int start, int len); +typedef void (*processingInstructionSAXFunc) (xmlParserCtxtPtr ctxt, + const CHAR *target, const CHAR *data); +typedef void (*warningSAXFunc) (xmlParserCtxtPtr ctxt, const char *msg, ...); +typedef void (*errorSAXFunc) (xmlParserCtxtPtr ctxt, const char *msg, ...); +typedef void (*fatalErrorSAXFunc) (xmlParserCtxtPtr ctxt, const char *msg, ...); + +typedef struct xmlSAXHandler { + resolveEntitySAXFunc resolveEntity; + notationDeclSAXFunc notationDecl; + unparsedEntityDeclSAXFunc unparsedEntityDecl; + setDocumentLocatorSAXFunc setDocumentLocator; + startDocumentSAXFunc startDocument; + endDocumentSAXFunc endDocument; + startElementSAXFunc startElement; + endElementSAXFunc endElement; + charactersSAXFunc characters; + ignorableWhitespaceSAXFunc ignorableWhitespace; + processingInstructionSAXFunc processingInstruction; + warningSAXFunc warning; + errorSAXFunc error; + fatalErrorSAXFunc fatalError; +} xmlSAXHandler, *xmlSAXHandlerPtr; + +/* + * Global variables: just the SAX interface tables we are looking for full + * reentrancy of the code ! + */ +xmlSAXLocator xmlDefaultSAXLocator; +xmlSAXHandler xmlDefaultSAXHandler; + +/* + * Interfaces + */ +extern int xmlParseDocument(xmlParserCtxtPtr ctxt); +extern xmlDocPtr xmlParseDoc(CHAR *cur); +extern xmlDocPtr xmlParseMemory(char *buffer, int size); +extern xmlDocPtr xmlParseFile(const char *filename); +extern CHAR *xmlStrdup(const CHAR *input); +extern CHAR *xmlStrndup(const CHAR *input, int n); +extern CHAR *xmlStrchr(const CHAR *str, CHAR val); +extern int xmlStrcmp(const CHAR *str1, const CHAR *str2); +extern int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len); +extern int xmlStrlen(const CHAR *str); +extern CHAR *xmlStrcat(CHAR *cur, const CHAR *add); +extern CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len); + +extern void xmlInitParserCtxt(xmlParserCtxtPtr ctx); +extern void xmlClearParserCtxt(xmlParserCtxtPtr ctx); +extern void xmlSetupParserForBuffer(xmlParserCtxtPtr ctx, const CHAR* buffer, + const char* filename); + +extern void xmlParserError(xmlParserCtxtPtr ctxt, const char *msg, ...); + +extern const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* c, + const xmlNode* node); +extern void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq); +extern void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq); +unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq, + const xmlNode* node); +extern void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx, + const xmlParserNodeInfo* info); + +extern void xmlParserWarning(xmlParserCtxtPtr ctxt, const char *msg, ...); +extern void xmlParserError(xmlParserCtxtPtr ctxt, const char *msg, ...); +extern void xmlDefaultSAXHandlerInit(void); +#ifdef __cplusplus +} +#endif + +#endif /* __XML_PARSER_H__ */ + diff --git a/include/libxml/tree.h b/include/libxml/tree.h new file mode 100644 index 00000000..7a48a9bd --- /dev/null +++ b/include/libxml/tree.h @@ -0,0 +1,180 @@ +/* + * tree.h : describes the structures found in an tree resulting + * from an XML parsing. + * + * See Copyright for the status of this software. + * + * $Id$ + */ + +#ifndef __XML_TREE_H__ +#define __XML_TREE_H__ + + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Type definitions + */ +#ifdef UNICODE +typedef unsigned short CHAR; +#else +typedef unsigned char CHAR; +#endif + +/* + * a DTD Notation definition + * TODO !!!! + */ + +/* + * a DTD Attribute definition + * TODO !!!! + */ + +/* + * a DTD Element definition. + */ +#define XML_ELEMENT_TYPE_EMPTY 1 +#define XML_ELEMENT_TYPE_ANY 2 +#define XML_ELEMENT_TYPE_MIXED 3 +#define XML_ELEMENT_TYPE_ELEMENT 4 + +typedef struct xmlElement { + const CHAR *name; /* Element name */ + int type; /* type (too simple, to extend ...) */ + /* TODO !!! more needed */ +} xmlElement, *xmlElementPtr; + +/* + * An XML namespace. + * Note that prefix == NULL is valid, it defines the default namespace + * within the subtree (until overriden). + */ + +#define XML_GLOBAL_NAMESPACE 1 /* old style global namespace */ +#define XML_LOCAL_NAMESPACE 2 /* new style local scoping */ + +typedef struct xmlNs { + struct xmlNs *next; /* next Ns link for this node */ + int type; /* global or local */ + const CHAR *href; /* URL for the namespace */ + const CHAR *prefix; /* prefix for the namespace */ +} xmlNs, *xmlNsPtr; + +/* + * An XML DtD, as defined by node link */ + struct xmlAttr *next; /* parent->childs link */ + const CHAR *name; /* the name of the property */ + const CHAR *value; /* the value of the property */ +} xmlAttr, *xmlAttrPtr; + +/* + * A node in an XML tree. + */ +#define XML_TYPE_TEXT 1 +#define XML_TYPE_COMMENT 2 +#define XML_TYPE_ENTITY 3 + +typedef struct xmlNode { + struct xmlNode *parent; /* child->parent link */ + struct xmlNode *next; /* next sibling link */ + struct xmlNode *childs; /* parent->childs link */ + struct xmlAttr *properties; /* properties list */ + int type; /* type number in the DTD */ + const CHAR *name; /* the name of the node, or the entity */ + xmlNs *ns; /* pointer to the associated namespace */ + xmlNs *nsDef; /* namespace definitions on this node */ + CHAR *content; /* the content */ +} xmlNode, *xmlNodePtr; + +/* + * An XML document. + */ +typedef struct xmlDoc { + char *name; /* name/filename/URI of the document */ + const CHAR *version; /* the XML version string */ + const CHAR *encoding; /* encoding, if any */ + int standalone; /* standalone document (no external refs) */ + struct xmlDtd *dtd; /* the document DTD if available */ + struct xmlNs *oldNs; /* Global namespace, the old way */ + void *entities; /* Hash table for general entities if any */ + struct xmlNode *root; /* the document tree */ +} xmlDoc, *xmlDocPtr; + +/* + * Variables. + */ +extern xmlNsPtr baseDTD; +extern int oldXMLWDcompatibility;/* maintain compatibility with old WD */ +extern int xmlIndentTreeOutput; /* try to indent the tree dumps */ + +/* + * Functions. + */ +extern xmlDtdPtr xmlNewDtd(xmlDocPtr doc, const CHAR *name, + const CHAR *ExternalID, const CHAR *SystemID); +extern void xmlFreeDtd(xmlDtdPtr cur); +extern xmlNsPtr xmlNewGlobalNs(xmlDocPtr doc, const CHAR *href, const CHAR *AS); +extern xmlNsPtr xmlNewNs(xmlNodePtr node, const CHAR *href, const CHAR *AS); +extern void xmlFreeNs(xmlNsPtr cur); +extern xmlDocPtr xmlNewDoc(const CHAR *version); +extern void xmlFreeDoc(xmlDocPtr cur); +extern xmlAttrPtr xmlNewProp(xmlNodePtr node, const CHAR *name, + const CHAR *value); +extern xmlAttrPtr xmlSetProp(xmlNodePtr node, const CHAR *name, + const CHAR *value); +extern const CHAR *xmlGetProp(xmlNodePtr node, const CHAR *name); +extern void xmlFreePropList(xmlAttrPtr cur); +extern void xmlFreeProp(xmlAttrPtr cur); +extern xmlNodePtr xmlNewNode(xmlNsPtr ns, const CHAR *name, CHAR *content); +extern xmlNodePtr xmlNewText(const CHAR *content); +extern xmlNodePtr xmlNewTextLen(const CHAR *content, int len); +extern xmlNodePtr xmlNewComment(CHAR *content); +extern xmlNodePtr xmlAddChild(xmlNodePtr parent, xmlNodePtr cur); +extern xmlNodePtr xmlGetLastChild(xmlNodePtr node); +extern int xmlNodeIsText(xmlNodePtr node); +extern void xmlTextConcat(xmlNodePtr node, const CHAR *content, int len); +extern void xmlFreeNodeList(xmlNodePtr cur); +extern void xmlFreeNode(xmlNodePtr cur); +extern void xmlNodeSetContent(xmlNodePtr cur, const CHAR *content); +extern void xmlNodeSetContentLen(xmlNodePtr cur, const CHAR *content, int len); +extern void xmlNodeAddContent(xmlNodePtr cur, const CHAR *content); +extern void xmlNodeAddContentLen(xmlNodePtr cur, const CHAR *content, int len); +extern xmlNsPtr xmlSearchNs(xmlDocPtr doc, xmlNodePtr node, + const CHAR *nameSpace); +extern xmlNsPtr xmlSearchNsByHref(xmlDocPtr doc, xmlNodePtr node, + const CHAR *href); +extern void xmlSetNs(xmlNodePtr node, xmlNsPtr ns); +extern xmlNodePtr xmlNewChild(xmlNodePtr parent, xmlNsPtr ns, + const CHAR *name, CHAR *content); + +extern void xmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size); +extern void xmlDocDump(FILE *f, xmlDocPtr doc); +extern void xmlBufferWriteCHAR(const CHAR *string); +extern void xmlBufferWriteChar(const char *string); + + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_TREE_H__ */ + diff --git a/parser.c b/parser.c new file mode 100644 index 00000000..bb24dd58 --- /dev/null +++ b/parser.c @@ -0,0 +1,3334 @@ +/* + * parser.c : an XML 1.0 non-verifying parser + * + * See Copyright for the status of this software. + * + * $Id$ + */ + +#ifdef WIN32 +#define HAVE_FCNTL_H +#include +#else +#include +#endif +#include +#include +#include /* for memset() only */ +#include +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_ZLIB_H +#include +#endif + +#include "tree.h" +#include "parser.h" +#include "entities.h" + +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ +/* + * Generic function for accessing stacks in the Parser Context + */ + +#define PUSH_AND_POP(type, name) \ +int name##Push(xmlParserCtxtPtr ctxt, type value) { \ + if (ctxt->name##Nr >= ctxt->name##Max) { \ + ctxt->name##Max *= 2; \ + ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \ + ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ + if (ctxt->name##Tab == NULL) { \ + fprintf(stderr, "realloc failed !\n"); \ + exit(1); \ + } \ + } \ + ctxt->name##Tab[ctxt->name##Nr] = value; \ + ctxt->name = value; \ + return(ctxt->name##Nr++); \ +} \ +type name##Pop(xmlParserCtxtPtr ctxt) { \ + if (ctxt->name##Nr <= 0) return(0); \ + ctxt->name##Nr--; \ + ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ + return(ctxt->name); \ +} \ + +PUSH_AND_POP(xmlParserInputPtr, input) +PUSH_AND_POP(xmlNodePtr, node) + +/************* +#define CUR (*(ctxt->input->cur) ? *(ctxt->input->cur) : xmlPopInput(ctxt)) +#define NEXT (((*(ctxt->input->cur) == '\n') ? \ + (ctxt->input->line++, ctxt->input->col = 1) : \ + (ctxt->input->col++)), ctxt->input->cur++) + *************/ + +#define CUR (*ctxt->input->cur) +#define NEXT ((*ctxt->input->cur) ? \ + (((*(ctxt->input->cur) == '\n') ? \ + (ctxt->input->line++, ctxt->input->col = 1) : \ + (ctxt->input->col++)), ctxt->input->cur++) : \ + (xmlPopInput(ctxt), ctxt->input->cur)) + +#define CUR_PTR ctxt->input->cur + +#define NXT(val) ctxt->input->cur[(val)] + +#define SKIP(val) ctxt->input->cur += (val) +#define SKIP_BLANKS \ + while (IS_BLANK(*(ctxt->input->cur))) NEXT + + +/* + * xmlPopInput: the current input pointed by ctxt->input came to an end + * pop it and return the next char. + * + * TODO A deallocation of the popped Input structure is needed + */ +CHAR xmlPopInput(xmlParserCtxtPtr ctxt) { + if (ctxt->inputNr == 1) return(0); /* End of main Input */ + inputPop(ctxt); + return(CUR); +} + +/* + * xmlPushInput: switch to a new input stream which is stacked on top + * of the previous one(s). + */ +void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { + if (input == NULL) return; + inputPush(ctxt, input); +} + +/* + * Create a new input stream based on a memory buffer. + */ +void xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { + xmlParserInputPtr input; + + if (entity == NULL) { + xmlParserError(ctxt, + "internal: xmlNewEntityInputStream entity = NULL\n"); + return; + } + if (entity->content == NULL) { + xmlParserError(ctxt, + "internal: xmlNewEntityInputStream entity->input = NULL\n"); + return; + } + input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput)); + if (input == NULL) { + xmlParserError(ctxt, "malloc: couldn't allocate a new input stream\n"); + return; + } + input->filename = entity->SystemID; /* TODO !!! char <- CHAR */ + input->base = entity->content; + input->cur = entity->content; + input->line = 1; + input->col = 1; + xmlPushInput(ctxt, input); +} + +/* + * A few macros needed to help building the parser. + */ + +#ifdef UNICODE +/************************************************************************ + * * + * UNICODE version of the macros. * + * * + ************************************************************************/ +/* + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ +#define IS_CHAR(c) \ + (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ + (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) + +/* + * [3] S ::= (#x20 | #x9 | #xD | #xA)+ + */ +#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \ + ((c) == 0x0D)) + +/* + * [85] BaseChar ::= ... long list see REC ... + * + * VI is your friend ! + * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/ + * and + * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/ + */ +#define IS_BASECHAR(c) \ + ((((c) >= 0x0041) && ((c) <= 0x005A)) || \ + (((c) >= 0x0061) && ((c) <= 0x007A)) || \ + (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \ + (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \ + (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \ + (((c) >= 0x0100) && ((c) <= 0x0131)) || \ + (((c) >= 0x0134) && ((c) <= 0x013E)) || \ + (((c) >= 0x0141) && ((c) <= 0x0148)) || \ + (((c) >= 0x014A) && ((c) <= 0x017E)) || \ + (((c) >= 0x0180) && ((c) <= 0x01C3)) || \ + (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \ + (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \ + (((c) >= 0x01FA) && ((c) <= 0x0217)) || \ + (((c) >= 0x0250) && ((c) <= 0x02A8)) || \ + (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \ + ((c) == 0x0386) || \ + (((c) >= 0x0388) && ((c) <= 0x038A)) || \ + ((c) == 0x038C) || \ + (((c) >= 0x038E) && ((c) <= 0x03A1)) || \ + (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \ + (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \ + ((c) == 0x03DA) || \ + ((c) == 0x03DC) || \ + ((c) == 0x03DE) || \ + ((c) == 0x03E0) || \ + (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \ + (((c) >= 0x0401) && ((c) <= 0x040C)) || \ + (((c) >= 0x040E) && ((c) <= 0x044F)) || \ + (((c) >= 0x0451) && ((c) <= 0x045C)) || \ + (((c) >= 0x045E) && ((c) <= 0x0481)) || \ + (((c) >= 0x0490) && ((c) <= 0x04C4)) || \ + (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \ + (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \ + (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \ + (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \ + (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \ + (((c) >= 0x0531) && ((c) <= 0x0556)) || \ + ((c) == 0x0559) || \ + (((c) >= 0x0561) && ((c) <= 0x0586)) || \ + (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \ + (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \ + (((c) >= 0x0621) && ((c) <= 0x063A)) || \ + (((c) >= 0x0641) && ((c) <= 0x064A)) || \ + (((c) >= 0x0671) && ((c) <= 0x06B7)) || \ + (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \ + (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \ + (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \ + ((c) == 0x06D5) || \ + (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \ + (((c) >= 0x0905) && ((c) <= 0x0939)) || \ + ((c) == 0x093D) || \ + (((c) >= 0x0958) && ((c) <= 0x0961)) || \ + (((c) >= 0x0985) && ((c) <= 0x098C)) || \ + (((c) >= 0x098F) && ((c) <= 0x0990)) || \ + (((c) >= 0x0993) && ((c) <= 0x09A8)) || \ + (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \ + ((c) == 0x09B2) || \ + (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \ + (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \ + (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \ + (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \ + (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \ + (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \ + (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \ + (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \ + (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \ + (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \ + (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \ + (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \ + ((c) == 0x0A5E) || \ + (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \ + (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \ + ((c) == 0x0A8D) || \ + (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \ + (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \ + (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \ + (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \ + (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \ + ((c) == 0x0ABD) || \ + ((c) == 0x0AE0) || \ + (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \ + (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \ + (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \ + (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \ + (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \ + (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \ + ((c) == 0x0B3D) || \ + (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \ + (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \ + (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \ + (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \ + (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \ + (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \ + ((c) == 0x0B9C) || \ + (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \ + (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \ + (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \ + (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \ + (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \ + (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \ + (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \ + (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \ + (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \ + (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \ + (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \ + (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \ + (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \ + (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \ + (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \ + (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \ + ((c) == 0x0CDE) || \ + (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \ + (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \ + (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \ + (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \ + (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \ + (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \ + (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \ + ((c) == 0x0E30) || \ + (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \ + (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \ + (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \ + ((c) == 0x0E84) || \ + (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \ + ((c) == 0x0E8A) || \ + ((c) == 0x0E8D) || \ + (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \ + (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \ + (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \ + ((c) == 0x0EA5) || \ + ((c) == 0x0EA7) || \ + (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \ + (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \ + ((c) == 0x0EB0) || \ + (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \ + ((c) == 0x0EBD) || \ + (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \ + (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \ + (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \ + (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \ + (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \ + ((c) == 0x1100) || \ + (((c) >= 0x1102) && ((c) <= 0x1103)) || \ + (((c) >= 0x1105) && ((c) <= 0x1107)) || \ + ((c) == 0x1109) || \ + (((c) >= 0x110B) && ((c) <= 0x110C)) || \ + (((c) >= 0x110E) && ((c) <= 0x1112)) || \ + ((c) == 0x113C) || \ + ((c) == 0x113E) || \ + ((c) == 0x1140) || \ + ((c) == 0x114C) || \ + ((c) == 0x114E) || \ + ((c) == 0x1150) || \ + (((c) >= 0x1154) && ((c) <= 0x1155)) || \ + ((c) == 0x1159) || \ + (((c) >= 0x115F) && ((c) <= 0x1161)) || \ + ((c) == 0x1163) || \ + ((c) == 0x1165) || \ + ((c) == 0x1167) || \ + ((c) == 0x1169) || \ + (((c) >= 0x116D) && ((c) <= 0x116E)) || \ + (((c) >= 0x1172) && ((c) <= 0x1173)) || \ + ((c) == 0x1175) || \ + ((c) == 0x119E) || \ + ((c) == 0x11A8) || \ + ((c) == 0x11AB) || \ + (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \ + (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \ + ((c) == 0x11BA) || \ + (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \ + ((c) == 0x11EB) || \ + ((c) == 0x11F0) || \ + ((c) == 0x11F9) || \ + (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \ + (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \ + (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \ + (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \ + (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \ + (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \ + (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \ + ((c) == 0x1F59) || \ + ((c) == 0x1F5B) || \ + ((c) == 0x1F5D) || \ + (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \ + (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \ + (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \ + ((c) == 0x1FBE) || \ + (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \ + (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \ + (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \ + (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \ + (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \ + (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \ + (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \ + ((c) == 0x2126) || \ + (((c) >= 0x212A) && ((c) <= 0x212B)) || \ + ((c) == 0x212E) || \ + (((c) >= 0x2180) && ((c) <= 0x2182)) || \ + (((c) >= 0x3041) && ((c) <= 0x3094)) || \ + (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \ + (((c) >= 0x3105) && ((c) <= 0x312C)) || \ + (((c) >= 0xAC00) && ((c) <= 0xD7A3))) + +/* + * [88] Digit ::= ... long list see REC ... + */ +#define IS_DIGIT(c) \ + ((((c) >= 0x0030) && ((c) <= 0x0039)) || \ + (((c) >= 0x0660) && ((c) <= 0x0669)) || \ + (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \ + (((c) >= 0x0966) && ((c) <= 0x096F)) || \ + (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \ + (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \ + (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \ + (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \ + (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \ + (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \ + (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \ + (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \ + (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \ + (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \ + (((c) >= 0x0F20) && ((c) <= 0x0F29))) + +/* + * [87] CombiningChar ::= ... long list see REC ... + */ +#define IS_COMBINING(c) \ + ((((c) >= 0x0300) && ((c) <= 0x0345)) || \ + (((c) >= 0x0360) && ((c) <= 0x0361)) || \ + (((c) >= 0x0483) && ((c) <= 0x0486)) || \ + (((c) >= 0x0591) && ((c) <= 0x05A1)) || \ + (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \ + (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \ + ((c) == 0x05BF) || \ + (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \ + ((c) == 0x05C4) || \ + (((c) >= 0x064B) && ((c) <= 0x0652)) || \ + ((c) == 0x0670) || \ + (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \ + (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \ + (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \ + (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \ + (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \ + (((c) >= 0x0901) && ((c) <= 0x0903)) || \ + ((c) == 0x093C) || \ + (((c) >= 0x093E) && ((c) <= 0x094C)) || \ + ((c) == 0x094D) || \ + (((c) >= 0x0951) && ((c) <= 0x0954)) || \ + (((c) >= 0x0962) && ((c) <= 0x0963)) || \ + (((c) >= 0x0981) && ((c) <= 0x0983)) || \ + ((c) == 0x09BC) || \ + ((c) == 0x09BE) || \ + ((c) == 0x09BF) || \ + (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \ + (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \ + (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \ + ((c) == 0x09D7) || \ + (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \ + ((c) == 0x0A02) || \ + ((c) == 0x0A3C) || \ + ((c) == 0x0A3E) || \ + ((c) == 0x0A3F) || \ + (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \ + (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \ + (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \ + (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \ + (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \ + ((c) == 0x0ABC) || \ + (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \ + (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \ + (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \ + (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \ + ((c) == 0x0B3C) || \ + (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \ + (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \ + (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \ + (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \ + (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \ + (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \ + (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \ + (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \ + ((c) == 0x0BD7) || \ + (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \ + (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \ + (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \ + (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \ + (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \ + (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \ + (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \ + (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \ + (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \ + (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \ + (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \ + (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \ + (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \ + (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \ + ((c) == 0x0D57) || \ + ((c) == 0x0E31) || \ + (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \ + (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \ + ((c) == 0x0EB1) || \ + (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \ + (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \ + (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \ + (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \ + ((c) == 0x0F35) || \ + ((c) == 0x0F37) || \ + ((c) == 0x0F39) || \ + ((c) == 0x0F3E) || \ + ((c) == 0x0F3F) || \ + (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \ + (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \ + (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \ + ((c) == 0x0F97) || \ + (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \ + (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \ + ((c) == 0x0FB9) || \ + (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \ + ((c) == 0x20E1) || \ + (((c) >= 0x302A) && ((c) <= 0x302F)) || \ + ((c) == 0x3099) || \ + ((c) == 0x309A)) + +/* + * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | + * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | + * [#x309D-#x309E] | [#x30FC-#x30FE] + */ +#define IS_EXTENDER(c) \ + (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \ + ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \ + ((c) == 0xec6) || ((c) == 0x3005) \ + (((c) >= 0x3031) && ((c) <= 0x3035)) || \ + (((c) >= 0x309b) && ((c) <= 0x309e)) || \ + (((c) >= 0x30fc) && ((c) <= 0x30fe))) + +/* + * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] + */ +#define IS_IDEOGRAPHIC(c) \ + ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \ + (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \ + (((c) >= 0x3021) && ((c) <= 0x3029)) || \ + ((c) == 0x3007)) + +/* + * [84] Letter ::= BaseChar | Ideographic + */ +#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) + +#else +/************************************************************************ + * * + * 8bits / ASCII version of the macros. * + * * + ************************************************************************/ +/* + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ +#define IS_CHAR(c) \ + (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20) ||\ + ((c) == 0xa)) + +/* + * [85] BaseChar ::= ... long list see REC ... + */ +#define IS_BASECHAR(c) \ + ((((c) >= 0x41) && ((c) <= 0x5a)) || \ + (((c) >= 0x61) && ((c) <= 0x7a)) || \ + (((c) >= 0xaa) && ((c) <= 0x5b)) || \ + (((c) >= 0xc0) && ((c) <= 0xd6)) || \ + (((c) >= 0xd8) && ((c) <= 0xf6)) || \ + (((c) >= 0xf8) && ((c) <= 0xff)) || \ + ((c) == 0xba)) + +/* + * [88] Digit ::= ... long list see REC ... + */ +#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39)) + +/* + * [84] Letter ::= BaseChar | Ideographic + */ +#define IS_LETTER(c) IS_BASECHAR(c) + + +/* + * [87] CombiningChar ::= ... long list see REC ... + */ +#define IS_COMBINING(c) 0 + +/* + * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | + * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | + * [#x309D-#x309E] | [#x30FC-#x30FE] + */ +#define IS_EXTENDER(c) ((c) == 0xb7) + +#endif /* !UNICODE */ + +/* + * Blank chars. + * + * [3] S ::= (#x20 | #x9 | #xD | #xA)+ + */ +#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \ + ((c) == 0x0D)) + +/* + * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + */ +#define IS_PUBIDCHAR(c) \ + (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \ + (((c) >= 'a') && ((c) <= 'z')) || \ + (((c) >= 'A') && ((c) <= 'Z')) || \ + (((c) >= '0') && ((c) <= '9')) || \ + ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \ + ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \ + ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \ + ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \ + ((c) == '$') || ((c) == '_') || ((c) == '%')) + +#define SKIP_EOL(p) \ + if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ + if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } + +#define MOVETO_ENDTAG(p) \ + while (IS_CHAR(*p) && (*(p) != '>')) (p)++ + +#define MOVETO_STARTTAG(p) \ + while (IS_CHAR(*p) && (*(p) != '<')) (p)++ + +/************************************************************************ + * * + * Commodity functions to handle CHARs * + * * + ************************************************************************/ + +/* + * xmlStrndup : a strndup for array of CHAR's + */ + +CHAR *xmlStrndup(const CHAR *cur, int len) { + CHAR *ret = malloc((len + 1) * sizeof(CHAR)); + + if (ret == NULL) { + fprintf(stderr, "malloc of %d byte failed\n", + (len + 1) * sizeof(CHAR)); + return(NULL); + } + memcpy(ret, cur, len * sizeof(CHAR)); + ret[len] = 0; + return(ret); +} + +/* + * xmlStrdup : a strdup for CHAR's + */ + +CHAR *xmlStrdup(const CHAR *cur) { + const CHAR *p = cur; + + while (IS_CHAR(*p)) p++; + return(xmlStrndup(cur, p - cur)); +} + +/* + * xmlCharStrndup : a strndup for char's to CHAR's + */ + +CHAR *xmlCharStrndup(const char *cur, int len) { + int i; + CHAR *ret = malloc((len + 1) * sizeof(CHAR)); + + if (ret == NULL) { + fprintf(stderr, "malloc of %d byte failed\n", + (len + 1) * sizeof(CHAR)); + return(NULL); + } + for (i = 0;i < len;i++) + ret[i] = (CHAR) cur[i]; + ret[len] = 0; + return(ret); +} + +/* + * xmlCharStrdup : a strdup for char's to CHAR's + */ + +CHAR *xmlCharStrdup(const char *cur) { + const char *p = cur; + + while (*p != '\0') p++; + return(xmlCharStrndup(cur, p - cur)); +} + +/* + * xmlStrcmp : a strcmp for CHAR's + */ + +int xmlStrcmp(const CHAR *str1, const CHAR *str2) { + register int tmp; + + do { + tmp = *str1++ - *str2++; + if (tmp != 0) return(tmp); + } while ((*str1 != 0) && (*str2 != 0)); + return (*str1 - *str2); +} + +/* + * xmlStrncmp : a strncmp for CHAR's + */ + +int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) { + register int tmp; + + if (len <= 0) return(0); + do { + tmp = *str1++ - *str2++; + if (tmp != 0) return(tmp); + len--; + if (len <= 0) return(0); + } while ((*str1 != 0) && (*str2 != 0)); + return (*str1 - *str2); +} + +/* + * xmlStrchr : a strchr for CHAR's + */ + +CHAR *xmlStrchr(const CHAR *str, CHAR val) { + while (*str != 0) { + if (*str == val) return((CHAR *) str); + str++; + } + return(NULL); +} + +/* + * xmlStrlen : lenght of a CHAR's string + */ + +int xmlStrlen(const CHAR *str) { + int len = 0; + + if (str == NULL) return(0); + while (*str != 0) { + str++; + len++; + } + return(len); +} + +/* + * xmlStrncat : a strncat for array of CHAR's + */ + +CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len) { + int size; + CHAR *ret; + + if ((add == NULL) || (len == 0)) + return(cur); + if (cur == NULL) + return(xmlStrndup(add, len)); + + size = xmlStrlen(cur); + ret = realloc(cur, (size + len + 1) * sizeof(CHAR)); + if (ret == NULL) { + fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n", + (size + len + 1) * sizeof(CHAR)); + return(cur); + } + memcpy(&ret[size], add, len * sizeof(CHAR)); + ret[size + len] = 0; + return(ret); +} + +/* + * xmlStrcat : a strcat for CHAR's + */ + +CHAR *xmlStrcat(CHAR *cur, const CHAR *add) { + const CHAR *p = add; + + if (add == NULL) return(cur); + if (cur == NULL) + return(xmlStrdup(add)); + + while (IS_CHAR(*p)) p++; + return(xmlStrncat(cur, add, p - add)); +} + +/************************************************************************ + * * + * Commodity functions, cleanup needed ? * + * * + ************************************************************************/ + +/* + * Is this a sequence of blank chars that one can ignore ? + */ + +static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) { + int i; + xmlNodePtr lastChild; + + for (i = 0;i < len;i++) + if (!(IS_BLANK(str[i]))) return(0); + + if (CUR != '<') return(0); + lastChild = xmlGetLastChild(ctxt->node); + if (lastChild == NULL) { + if (ctxt->node->content != NULL) return(0); + } else if (xmlNodeIsText(lastChild)) + return(0); + return(1); +} + +/* + * Handling of defined entities, when should we define a new input + * stream ? When do we just handle that as a set of chars ? + */ + +void xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { + int len; + + if (entity->content == NULL) { + xmlParserError(ctxt, "xmlHandleEntity %s: content == NULL\n", + entity->name); + return; + } + len = xmlStrlen(entity->content); + if (len <= 2) goto handle_as_char; + + /* + * Redefine its content as an input stream. + */ + xmlNewEntityInputStream(ctxt, entity); + return; + +handle_as_char: + /* + * Just handle the content as a set of chars. + */ + if (ctxt->sax != NULL) + ctxt->sax->characters(ctxt, entity->content, 0, len); + +} + +/* + * Forward definition for recusive behaviour. + */ +xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt); +CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt, int inLine); +CHAR *xmlParseReference(xmlParserCtxtPtr ctxt, int inLine); + +/************************************************************************ + * * + * Extra stuff for namespace support * + * Relates to http://www.w3.org/TR/WD-xml-names * + * * + ************************************************************************/ + +/* + * xmlNamespaceParseNCName : parse an XML namespace name. + * + * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* + * + * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + */ + +CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) { + const CHAR *q; + CHAR *ret = NULL; + + if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL); + q = NEXT; + + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + + ret = xmlStrndup(q, CUR_PTR - q); + + return(ret); +} + +/* + * xmlNamespaceParseQName : parse an XML qualified name + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + */ + +CHAR *xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) { + CHAR *ret = NULL; + + *prefix = NULL; + ret = xmlNamespaceParseNCName(ctxt); + if (CUR == ':') { + *prefix = ret; + NEXT; + ret = xmlNamespaceParseNCName(ctxt); + } + + return(ret); +} + +/* + * xmlNamespaceParseNSDef : parse a namespace prefix declaration + * + * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral + * + * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)? + */ + +CHAR *xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) { + CHAR *name = NULL; + + if ((CUR == 'x') && (NXT(1) == 'm') && + (NXT(2) == 'l') && (NXT(3) == 'n') && + (NXT(4) == 's')) { + SKIP(5); + if (CUR == ':') { + NEXT; + name = xmlNamespaceParseNCName(ctxt); + } + } + return(name); +} + +/* + * [OLD] Parse and return a string between quotes or doublequotes + */ +CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) { + CHAR *ret = NULL; + const CHAR *q; + + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while (IS_CHAR(CUR) && (CUR != '"')) NEXT; + if (CUR != '"') + xmlParserError(ctxt, "String not closed\"%.50s\n", q); + else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\''){ + NEXT; + q = CUR_PTR; + while (IS_CHAR(CUR) && (CUR != '\'')) NEXT; + if (CUR != '\'') + xmlParserError(ctxt, "String not closed\"%.50s\n", q); + else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } + return(ret); +} + +/* + * [OLD] xmlParseNamespace: parse specific PI '')) { + /* + * We can have "ns" or "prefix" attributes + * Old encoding as 'href' or 'AS' attributes is still supported + */ + if ((CUR == 'n') && (NXT(1) == 's')) { + garbage = 0; + SKIP(2); + SKIP_BLANKS; + + if (CUR != '=') continue; + NEXT; + SKIP_BLANKS; + + href = xmlParseQuotedString(ctxt); + SKIP_BLANKS; + } else if ((CUR == 'h') && (NXT(1) == 'r') && + (NXT(2) == 'e') && (NXT(3) == 'f')) { + garbage = 0; + SKIP(4); + SKIP_BLANKS; + + if (CUR != '=') continue; + NEXT; + SKIP_BLANKS; + + href = xmlParseQuotedString(ctxt); + SKIP_BLANKS; + } else if ((CUR == 'p') && (NXT(1) == 'r') && + (NXT(2) == 'e') && (NXT(3) == 'f') && + (NXT(4) == 'i') && (NXT(5) == 'x')) { + garbage = 0; + SKIP(6); + SKIP_BLANKS; + + if (CUR != '=') continue; + NEXT; + SKIP_BLANKS; + + prefix = xmlParseQuotedString(ctxt); + SKIP_BLANKS; + } else if ((CUR == 'A') && (NXT(1) == 'S')) { + garbage = 0; + SKIP(2); + SKIP_BLANKS; + + if (CUR != '=') continue; + NEXT; + SKIP_BLANKS; + + prefix = xmlParseQuotedString(ctxt); + SKIP_BLANKS; + } else if ((CUR == '?') && (NXT(1) == '>')) { + garbage = 0; + CUR_PTR ++; + } else { + /* + * Found garbage when parsing the namespace + */ + if (!garbage) + xmlParserError(ctxt, "xmlParseNamespace found garbage\n"); + NEXT; + } + } + + MOVETO_ENDTAG(CUR_PTR); + NEXT; + + /* + * Register the DTD. + */ + if (href != NULL) + xmlNewGlobalNs(ctxt->doc, href, prefix); + + if (prefix != NULL) free(prefix); + if (href != NULL) free(href); +} + +/************************************************************************ + * * + * The parser itself * + * Relates to http://www.w3.org/TR/REC-xml * + * * + ************************************************************************/ + +/* + * xmlParseName : parse an XML name. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + */ + +CHAR *xmlParseName(xmlParserCtxtPtr ctxt) { + const CHAR *q; + CHAR *ret = NULL; + + if (!IS_LETTER(CUR) && (CUR != '_') && + (CUR != ':')) return(NULL); + q = NEXT; + + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + + ret = xmlStrndup(q, CUR_PTR - q); + + return(ret); +} + +/* + * xmlParseNmtoken : parse an XML Nmtoken. + * + * [7] Nmtoken ::= (NameChar)+ + * + * [8] Nmtokens ::= Nmtoken (S Nmtoken)* + */ + +CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) { + const CHAR *q; + CHAR *ret = NULL; + + q = NEXT; + + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + + ret = xmlStrndup(q, CUR_PTR - q); + + return(ret); +} + +/* + * xmlParseEntityValue : parse a value for ENTITY decl. + * + * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | + * "'" ([^%&'] | PEReference | Reference)* "'" + */ + +CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) { + CHAR *ret = NULL, *cur; + const CHAR *q; + + if (CUR == '"') { + NEXT; + + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) { + if (CUR == '%') { + ret = xmlStrncat(ret, q, CUR_PTR - q); + cur = xmlParsePEReference(ctxt, 1); + ret = xmlStrcat(ret, cur); + q = CUR_PTR; + } else if (CUR == '&') { + ret = xmlStrncat(ret, q, CUR_PTR - q); + cur = xmlParseReference(ctxt, 1); + ret = xmlStrcat(ret, cur); + q = CUR_PTR; + } else + NEXT; + } + if (!IS_CHAR(CUR)) { + xmlParserError(ctxt, "Unfinished EntityValue\n"); + } else { + ret = xmlStrncat(ret, q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) { + if (CUR == '%') { + ret = xmlStrncat(ret, q, CUR_PTR - q); + cur = xmlParsePEReference(ctxt, 1); + ret = xmlStrcat(ret, cur); + q = CUR_PTR; + } else if (CUR == '&') { + ret = xmlStrncat(ret, q, CUR_PTR - q); + cur = xmlParseReference(ctxt, 1); + ret = xmlStrcat(ret, cur); + q = CUR_PTR; + } else + NEXT; + } + if (!IS_CHAR(CUR)) { + xmlParserError(ctxt, "Unfinished EntityValue\n"); + } else { + ret = xmlStrncat(ret, q, CUR_PTR - q); + NEXT; + } + } else { + xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n"); + } + + return(ret); +} + +/* + * xmlParseAttValue : parse a value for an attribute + * + * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | + * "'" ([^<&'] | Reference)* "'" + */ + +CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) { + CHAR *ret = NULL, *cur; + const CHAR *q; + + if (CUR == '"') { + NEXT; + + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) { + if (CUR == '&') { + ret = xmlStrncat(ret, q, CUR_PTR - q); + cur = xmlParseReference(ctxt, 1); + ret = xmlStrcat(ret, cur); + q = CUR_PTR; + } else + NEXT; + } + if (!IS_CHAR(CUR)) { + xmlParserError(ctxt, "Unfinished AttValue\n"); + } else { + ret = xmlStrncat(ret, q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) { + if (CUR == '&') { + ret = xmlStrncat(ret, q, CUR_PTR - q); + cur = xmlParseReference(ctxt, 1); + ret = xmlStrcat(ret, cur); + q = CUR_PTR; + } else + NEXT; + } + if (!IS_CHAR(CUR)) { + xmlParserError(ctxt, "Unfinished AttValue\n"); + } else { + ret = xmlStrncat(ret, q, CUR_PTR - q); + NEXT; + } + } else { + xmlParserError(ctxt, "AttValue: \" or ' expected\n"); + } + + return(ret); +} + +/* + * xmlParseSystemLiteral : parse an XML Literal + * + * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") + */ + +CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { + const CHAR *q; + CHAR *ret = NULL; + + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) + NEXT; + if (!IS_CHAR(CUR)) { + xmlParserError(ctxt, "Unfinished SystemLiteral\n"); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_CHAR(CUR)) { + xmlParserError(ctxt, "Unfinished SystemLiteral\n"); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + xmlParserError(ctxt, "SystemLiteral \" or ' expected\n"); + } + + return(ret); +} + +/* + * xmlParsePubidLiteral: parse an XML public literal + * + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + */ + +CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { + const CHAR *q; + CHAR *ret = NULL; + /* + * Name ::= (Letter | '_') (NameChar)* + */ + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while (IS_PUBIDCHAR(CUR)) NEXT; + if (CUR != '"') { + xmlParserError(ctxt, "Unfinished PubidLiteral\n"); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_LETTER(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_LETTER(CUR)) { + xmlParserError(ctxt, "Unfinished PubidLiteral\n"); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + xmlParserError(ctxt, "SystemLiteral \" or ' expected\n"); + } + + return(ret); +} + +/* + * xmlParseCharData: parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { + const CHAR *q; + + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '<') && + (CUR != '&')) { + NEXT; + if ((cdata) && (CUR == ']') && (NXT(1) == ']') && + (NXT(2) == '>')) break; + } + if (q == CUR_PTR) return; + + /* + * Ok the segment [q CUR_PTR] is to be consumed as chars. + */ + if (ctxt->sax != NULL) { + if (areBlanks(ctxt, q, CUR_PTR - q)) + ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q); + else + ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q); + } +} + +/* + * xmlParseExternalID: Parse an External ID + * + * [75] ExternalID ::= 'SYSTEM' S SystemLiteral + * | 'PUBLIC' S PubidLiteral S SystemLiteral + */ + +CHAR *xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) { + CHAR *URI = NULL; + + if ((CUR == 'S') && (NXT(1) == 'Y') && + (NXT(2) == 'S') && (NXT(3) == 'T') && + (NXT(4) == 'E') && (NXT(5) == 'M')) { + SKIP(6); + SKIP_BLANKS; + URI = xmlParseSystemLiteral(ctxt); + if (URI == NULL) + xmlParserError(ctxt, + "xmlParseExternalID: SYSTEM, no URI\n"); + } else if ((CUR == 'P') && (NXT(1) == 'U') && + (NXT(2) == 'B') && (NXT(3) == 'L') && + (NXT(4) == 'I') && (NXT(5) == 'C')) { + SKIP(6); + SKIP_BLANKS; + *publicID = xmlParsePubidLiteral(ctxt); + if (*publicID == NULL) + xmlParserError(ctxt, + "xmlParseExternalID: PUBLIC, no Public Identifier\n"); + SKIP_BLANKS; + URI = xmlParseSystemLiteral(ctxt); + if (URI == NULL) + xmlParserError(ctxt, + "xmlParseExternalID: PUBLIC, no URI\n"); + } + return(URI); +} + +/* + * Skip an XML (SGML) comment + * This may or may not create a node (depending on the context) + * The spec says that "For compatibility, the string "--" (double-hyphen) + * must not occur within comments. " + * + * [15] Comment ::= '' + */ +xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) { + xmlNodePtr ret = NULL; + const CHAR *q, *start; + const CHAR *r; + CHAR *val; + + /* + * Check that there is a comment right here. + */ + if ((CUR != '<') || (NXT(1) != '!') || + (NXT(2) != '-') || (NXT(3) != '-')) return(NULL); + + SKIP(4); + start = q = CUR_PTR; + NEXT; + r = CUR_PTR; + NEXT; + while (IS_CHAR(CUR) && + ((CUR == ':') || (CUR != '>') || + (*r != '-') || (*q != '-'))) { + if ((*r == '-') && (*q == '-')) + xmlParserError(ctxt, + "Comment must not contain '--' (double-hyphen)`\n"); + NEXT;r++;q++; + } + if (!IS_CHAR(CUR)) { + xmlParserError(ctxt, "Comment not terminated \n"); + } + return; + } + if (xmlIndentTreeOutput) + for (i = 0;i < level;i++) + xmlBufferWriteChar(" "); + + xmlBufferWriteChar("<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlBufferWriteCHAR(cur->ns->prefix); + xmlBufferWriteChar(":"); + } + + xmlBufferWriteCHAR(cur->name); + if (cur->nsDef) + xmlNsListDump(cur->nsDef); + if (cur->properties != NULL) + xmlAttrListDump(doc, cur->properties); + + if ((cur->content == NULL) && (cur->childs == NULL)) { + xmlBufferWriteChar("/>\n"); + return; + } + xmlBufferWriteChar(">"); + if (cur->content != NULL) + xmlBufferWriteCHAR(xmlEncodeEntities(doc, cur->content)); + if (cur->childs != NULL) { + xmlBufferWriteChar("\n"); + xmlNodeListDump(doc, cur->childs, level + 1); + if (xmlIndentTreeOutput) + for (i = 0;i < level;i++) + xmlBufferWriteChar(" "); + } + xmlBufferWriteChar("ns != NULL) && (cur->ns->prefix != NULL)) { + xmlBufferWriteCHAR(cur->ns->prefix); + xmlBufferWriteChar(":"); + } + + xmlBufferWriteCHAR(cur->name); + xmlBufferWriteChar(">\n"); +} + +/* + * Dump an XML document + */ +static void xmlDocContentDump(xmlDocPtr cur) { + if (oldXMLWDcompatibility) + xmlBufferWriteChar("version); + xmlBufferWriteChar("\""); + if (cur->encoding != NULL) { + xmlBufferWriteChar(" encoding=\""); + xmlBufferWriteCHAR(cur->encoding); + xmlBufferWriteChar("\""); + } + switch (cur->standalone) { + case 0: + xmlBufferWriteChar(" standalone=\"no\""); + break; + case 1: + xmlBufferWriteChar(" standalone=\"yes\""); + break; + } + xmlBufferWriteChar("?>\n"); + if ((cur->dtd != NULL) || (cur->entities != NULL)) + xmlDtdDump(cur); + if (cur->root != NULL) { + /* global namespace definitions, the old way */ + if (oldXMLWDcompatibility) + xmlGlobalNsListDump(cur->oldNs); + else + xmlUpgradeOldNs(cur); + xmlNodeDump(cur, cur->root, 0); + } +} + +/* + * Dump an XML document to memory. + */ + +void xmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size) { + if (cur == NULL) { + fprintf(stderr, "xmlDocDump : document == NULL\n"); + *mem = NULL; + *size = 0; + return; + } + buffer_index = 0; + xmlDocContentDump(cur); + + *mem = buffer; + *size = buffer_index; +} + +/* + * Dump an XML document to the given FD + */ + +void xmlDocDump(FILE *f, xmlDocPtr cur) { + if (cur == NULL) { + fprintf(stderr, "xmlDocDump : document == NULL\n"); + return; + } + buffer_index = 0; + xmlDocContentDump(cur); + + fwrite(buffer, sizeof(CHAR), buffer_index, f); +} + +/************************************************************************ + * * + * Debug * + * * + ************************************************************************/ + +#ifdef STANDALONE +int main(void) { + xmlDocPtr doc; + xmlNodePtr tree, subtree; + xmlNsPtr ns1; + xmlNsPtr ns2; + + /* + * build a fake XML document + */ + doc = xmlNewDoc("1.0"); + ns1 = xmlNewNs(doc, "http://www.ietf.org/standards/dav/", "D"); + ns2 = xmlNewNs(doc, "http://www.w3.com/standards/z39.50/", "Z"); + doc->root = xmlNewNode(ns1, "multistatus", NULL); + tree = xmlNewChild(doc->root, NULL, "response", NULL); + subtree = xmlNewChild(tree, NULL, "prop", NULL); + xmlNewChild(subtree, ns2, "Authors", NULL); + subtree = xmlNewChild(tree, NULL, "status", "HTTP/1.1 420 Method Failure"); + tree = xmlNewChild(doc->root, NULL, "response", NULL); + subtree = xmlNewChild(tree, NULL, "prop", NULL); + xmlNewChild(subtree, ns2, "Copyright-Owner", NULL); + subtree = xmlNewChild(tree, NULL, "status", "HTTP/1.1 409 Conflict"); + tree = xmlNewChild(doc->root, NULL, "responsedescription", + "Copyright Owner can not be deleted or altered"); + + /* + * print it. + */ + xmlDocDump(stdout, doc); + + /* + * free it. + */ + xmlFreeDoc(doc); + return(0); +} +#endif diff --git a/tree.h b/tree.h new file mode 100644 index 00000000..7a48a9bd --- /dev/null +++ b/tree.h @@ -0,0 +1,180 @@ +/* + * tree.h : describes the structures found in an tree resulting + * from an XML parsing. + * + * See Copyright for the status of this software. + * + * $Id$ + */ + +#ifndef __XML_TREE_H__ +#define __XML_TREE_H__ + + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Type definitions + */ +#ifdef UNICODE +typedef unsigned short CHAR; +#else +typedef unsigned char CHAR; +#endif + +/* + * a DTD Notation definition + * TODO !!!! + */ + +/* + * a DTD Attribute definition + * TODO !!!! + */ + +/* + * a DTD Element definition. + */ +#define XML_ELEMENT_TYPE_EMPTY 1 +#define XML_ELEMENT_TYPE_ANY 2 +#define XML_ELEMENT_TYPE_MIXED 3 +#define XML_ELEMENT_TYPE_ELEMENT 4 + +typedef struct xmlElement { + const CHAR *name; /* Element name */ + int type; /* type (too simple, to extend ...) */ + /* TODO !!! more needed */ +} xmlElement, *xmlElementPtr; + +/* + * An XML namespace. + * Note that prefix == NULL is valid, it defines the default namespace + * within the subtree (until overriden). + */ + +#define XML_GLOBAL_NAMESPACE 1 /* old style global namespace */ +#define XML_LOCAL_NAMESPACE 2 /* new style local scoping */ + +typedef struct xmlNs { + struct xmlNs *next; /* next Ns link for this node */ + int type; /* global or local */ + const CHAR *href; /* URL for the namespace */ + const CHAR *prefix; /* prefix for the namespace */ +} xmlNs, *xmlNsPtr; + +/* + * An XML DtD, as defined by node link */ + struct xmlAttr *next; /* parent->childs link */ + const CHAR *name; /* the name of the property */ + const CHAR *value; /* the value of the property */ +} xmlAttr, *xmlAttrPtr; + +/* + * A node in an XML tree. + */ +#define XML_TYPE_TEXT 1 +#define XML_TYPE_COMMENT 2 +#define XML_TYPE_ENTITY 3 + +typedef struct xmlNode { + struct xmlNode *parent; /* child->parent link */ + struct xmlNode *next; /* next sibling link */ + struct xmlNode *childs; /* parent->childs link */ + struct xmlAttr *properties; /* properties list */ + int type; /* type number in the DTD */ + const CHAR *name; /* the name of the node, or the entity */ + xmlNs *ns; /* pointer to the associated namespace */ + xmlNs *nsDef; /* namespace definitions on this node */ + CHAR *content; /* the content */ +} xmlNode, *xmlNodePtr; + +/* + * An XML document. + */ +typedef struct xmlDoc { + char *name; /* name/filename/URI of the document */ + const CHAR *version; /* the XML version string */ + const CHAR *encoding; /* encoding, if any */ + int standalone; /* standalone document (no external refs) */ + struct xmlDtd *dtd; /* the document DTD if available */ + struct xmlNs *oldNs; /* Global namespace, the old way */ + void *entities; /* Hash table for general entities if any */ + struct xmlNode *root; /* the document tree */ +} xmlDoc, *xmlDocPtr; + +/* + * Variables. + */ +extern xmlNsPtr baseDTD; +extern int oldXMLWDcompatibility;/* maintain compatibility with old WD */ +extern int xmlIndentTreeOutput; /* try to indent the tree dumps */ + +/* + * Functions. + */ +extern xmlDtdPtr xmlNewDtd(xmlDocPtr doc, const CHAR *name, + const CHAR *ExternalID, const CHAR *SystemID); +extern void xmlFreeDtd(xmlDtdPtr cur); +extern xmlNsPtr xmlNewGlobalNs(xmlDocPtr doc, const CHAR *href, const CHAR *AS); +extern xmlNsPtr xmlNewNs(xmlNodePtr node, const CHAR *href, const CHAR *AS); +extern void xmlFreeNs(xmlNsPtr cur); +extern xmlDocPtr xmlNewDoc(const CHAR *version); +extern void xmlFreeDoc(xmlDocPtr cur); +extern xmlAttrPtr xmlNewProp(xmlNodePtr node, const CHAR *name, + const CHAR *value); +extern xmlAttrPtr xmlSetProp(xmlNodePtr node, const CHAR *name, + const CHAR *value); +extern const CHAR *xmlGetProp(xmlNodePtr node, const CHAR *name); +extern void xmlFreePropList(xmlAttrPtr cur); +extern void xmlFreeProp(xmlAttrPtr cur); +extern xmlNodePtr xmlNewNode(xmlNsPtr ns, const CHAR *name, CHAR *content); +extern xmlNodePtr xmlNewText(const CHAR *content); +extern xmlNodePtr xmlNewTextLen(const CHAR *content, int len); +extern xmlNodePtr xmlNewComment(CHAR *content); +extern xmlNodePtr xmlAddChild(xmlNodePtr parent, xmlNodePtr cur); +extern xmlNodePtr xmlGetLastChild(xmlNodePtr node); +extern int xmlNodeIsText(xmlNodePtr node); +extern void xmlTextConcat(xmlNodePtr node, const CHAR *content, int len); +extern void xmlFreeNodeList(xmlNodePtr cur); +extern void xmlFreeNode(xmlNodePtr cur); +extern void xmlNodeSetContent(xmlNodePtr cur, const CHAR *content); +extern void xmlNodeSetContentLen(xmlNodePtr cur, const CHAR *content, int len); +extern void xmlNodeAddContent(xmlNodePtr cur, const CHAR *content); +extern void xmlNodeAddContentLen(xmlNodePtr cur, const CHAR *content, int len); +extern xmlNsPtr xmlSearchNs(xmlDocPtr doc, xmlNodePtr node, + const CHAR *nameSpace); +extern xmlNsPtr xmlSearchNsByHref(xmlDocPtr doc, xmlNodePtr node, + const CHAR *href); +extern void xmlSetNs(xmlNodePtr node, xmlNsPtr ns); +extern xmlNodePtr xmlNewChild(xmlNodePtr parent, xmlNsPtr ns, + const CHAR *name, CHAR *content); + +extern void xmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size); +extern void xmlDocDump(FILE *f, xmlDocPtr doc); +extern void xmlBufferWriteCHAR(const CHAR *string); +extern void xmlBufferWriteChar(const char *string); + + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_TREE_H__ */ + diff --git a/xml_entities.c b/xml_entities.c deleted file mode 100644 index 3c9d55cd..00000000 --- a/xml_entities.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - * entities.c : implementation for the XML entities handking - * - * See Copyright for the status of this software. - * - * $Id$ - */ - -#include -#include -#include -#include "xml_entities.h" - -/* - * A buffer used for converting entities to their equivalent and back. - */ -static CHAR *buffer = NULL; -static int buffer_size = 0; - -void growBuffer(void) { - buffer_size *= 2; - buffer = (CHAR *) realloc(buffer, buffer_size * sizeof(CHAR)); - if (buffer == NULL) { - perror("realloc failed"); - exit(1); - } -} - -/* - * xmlFreeEntity : clean-up an entity record. - */ - -void xmlFreeEntity(xmlEntityPtr entity) { - if (entity == NULL) return; - - if (entity->value != NULL) free(entity->value); - entity->value = NULL; - if (entity->id != NULL) - free((char *) entity->id); -} - -/* - * xmlAddDocEntity : register a new entity for an entities table. - */ -static void xmlAddEntity(xmlEntitiesTablePtr table, CHAR *value, - const CHAR *id) { - int i; - xmlEntityPtr cur; - - for (i = 0;i < table->nb_entities;i++) { - cur = &table->table[i]; - if (!xmlStrcmp(cur->id, id)) { - free(cur->value); - cur->value = xmlStrdup(value); - } - } - if (table->nb_entities >= table->max_entities) { - /* - * need more elements. - */ - table->max_entities *= 2; - table->table = (xmlEntityPtr) - realloc(table->table, table->max_entities * sizeof(xmlEntity)); - if (table->table) { - perror("realloc failed"); - exit(1); - } - } - cur = &table->table[table->nb_entities]; - cur->value = xmlStrdup(value); - cur->id = xmlStrdup(id); - table->nb_entities++; -} - - -/* - * xmlAddDtdEntity : register a new entity for this document. - */ -void xmlAddDtdEntity(xmlDtdPtr dtd, CHAR *value, const CHAR *id) { - xmlEntitiesTablePtr table; - - table = (xmlEntitiesTablePtr) dtd->entities; - if (table == NULL) { - table = xmlCreateEntitiesTable(); - dtd->entities = table; - } - xmlAddEntity(table, value, id); -} - -/* - * xmlAddDocEntity : register a new entity for this document. - */ -void xmlAddDocEntity(xmlDocPtr doc, CHAR *value, const CHAR *id) { - xmlEntitiesTablePtr table; - - table = (xmlEntitiesTablePtr) doc->entities; - if (table == NULL) { - table = xmlCreateEntitiesTable(); - doc->entities = table; - } - xmlAddEntity(table, value, id); -} - -/* - * xmlGetEntity : do an entity lookup in the hash table and - * returns the corrsponding CHAR *, if found, zero otherwise. - */ -CHAR *xmlGetEntity(xmlDocPtr doc, const CHAR *id) { - int i; - xmlEntityPtr cur; - xmlEntitiesTablePtr table; - - if (doc->entities == NULL) return(0); - table = (xmlEntitiesTablePtr) doc->entities; - for (i = 0;i < table->nb_entities;i++) { - cur = &table->table[i]; - if (!xmlStrcmp(cur->id, id)) return(cur->value); - } - return(NULL); -} - -/* - * xmlReadEntities : read an entity. - */ -const CHAR *xmlReadEntity(xmlDocPtr doc, const CHAR **input) { - static CHAR *entity = NULL; - static int entity_size = 100; - const CHAR *cur = *input; - - if (entity == NULL) { - entity = (CHAR *) malloc(entity_size * sizeof(CHAR)); - if (entity == NULL) { - fprintf(stderr, "xmlReadEntity : cannot allocate %d bytes\n", - entity_size * sizeof(CHAR)); - return(NULL); - } - } - if (*cur == '&') { - cur++; - if (*cur == '#') { - /* TODO !!!! - fprintf(stderr, "Character reference not yet implemented\n"); */ - } else { - /* TODO !!!! - fprintf(stderr, "Entity search not yet implemented\n"); */ - } - } - - /* - * The few predefined entities. - */ - if ((cur[0] == 'a') && (cur[1] == 'm') && (cur[2] == 'p') && - (cur[3] == ';')) { - entity[0] = '%'; - entity[1] = 0; - cur += 3; - *input = cur; - return(entity); - } else if ((cur[0] == 'q') && (cur[1] == 'u') && (cur[2] == 'o') && - (cur[3] == 't') && (cur[4] == ';')) { - entity[0] = '"'; - entity[1] = 0; - cur += 4; - *input = cur; - return(entity); - } else if ((cur[0] == 'a') && (cur[1] == 'p') && (cur[2] == 'o') && - (cur[3] == 's') && (cur[4] == ';')) { - entity[0] = '\''; - entity[1] = 0; - cur += 4; - *input = cur; - return(entity); - } else if ((cur[0] == 'l') && (cur[1] == 't') && (cur[2] == ';')) { - entity[0] = '<'; - entity[1] = 0; - cur += 2; - *input = cur; - return(entity); - } else if ((cur[0] == 'g') && (cur[1] == 't') && (cur[2] == ';')) { - entity[0] = '>'; - entity[1] = 0; - cur += 2; - *input = cur; - return(entity); - } - - return(NULL); -} - -/* - * xmlDecodeEntities : do a global entities lookup on a input string - * and returns a duplicate after the entities substitution. - */ -CHAR *xmlDecodeEntities(xmlDocPtr doc, const CHAR *input, int len) { - const CHAR *cur = input; - CHAR *out = buffer; - int i; - - if (buffer == NULL) { - buffer_size = 1000; - buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR)); - if (buffer == NULL) { - perror("malloc failed"); - exit(1); - } - out = buffer; - } - for (i = 0;(*cur != 0) && (cur - input < len);cur++) { - if (*cur == '&') { - const CHAR *entity = xmlReadEntity(doc, &cur); - if (entity != NULL) - while (*entity != 0) { - *out++ = *entity++; - i++; - if (i + 10 > buffer_size) { - int index = out - buffer; - - growBuffer(); - out = &buffer[index]; - } - } - } else if (*cur == '%') { - /* TODO !!!!! - fprintf(stderr, " \n"); */ - } else { - *out++ = *cur; - i++; - } - - if (i + 10 > buffer_size) { - int index = out - buffer; - - growBuffer(); - out = &buffer[index]; - } - } - *out++ = 0; - return(buffer); -} - -/* - * xmlEncodeEntities : do a global encoding of a string, replacing the - * basic values with their entities form. - */ -CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) { - const CHAR *cur = input; - CHAR *out = buffer; - - if (buffer == NULL) { - buffer_size = 1000; - buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR)); - if (buffer == NULL) { - perror("malloc failed"); - exit(1); - } - out = buffer; - } - while (*cur != '\0') { - if (out - buffer > buffer_size - 100) { - int index = out - buffer; - - growBuffer(); - out = &buffer[index]; - } - - /* - * By default one have to encode at least '<', '>', '"' and '&' ! - * One could try a better encoding using the entities defined and - * used as a compression code !!!. - */ - if (*cur == '<') { - *out++ = '&'; - *out++ = 'l'; - *out++ = 't'; - *out++ = ';'; - } else if (*cur == '>') { - *out++ = '&'; - *out++ = 'g'; - *out++ = 't'; - *out++ = ';'; - } else if (*cur == '&') { - *out++ = '&'; - *out++ = 'a'; - *out++ = 'm'; - *out++ = 'p'; - *out++ = ';'; - } else if (*cur == '"') { - *out++ = '&'; - *out++ = 'q'; - *out++ = 'u'; - *out++ = 'o'; - *out++ = 't'; - *out++ = ';'; - } else if (*cur == '\'') { - *out++ = '&'; - *out++ = 'a'; - *out++ = 'p'; - *out++ = 'o'; - *out++ = 's'; - *out++ = ';'; - } else { - /* - * default case, just copy ! - */ - *out++ = *cur; - } - cur++; - } - *out++ = 0; - return(buffer); -} - -/* - * xmlCreateEntitiesTable : create and initialize an enmpty hash table - */ -xmlEntitiesTablePtr xmlCreateEntitiesTable(void) { - xmlEntitiesTablePtr ret; - - ret = (xmlEntitiesTablePtr) - malloc(sizeof(xmlEntitiesTable)); - if (ret == NULL) { - fprintf(stderr, "xmlCreateEntitiesTable : malloc(%d) failed\n", - sizeof(xmlEntitiesTable)); - return(NULL); - } - ret->max_entities = XML_MIN_ENTITIES_TABLE; - ret->nb_entities = 0; - ret->table = (xmlEntityPtr ) - malloc(ret->max_entities * sizeof(xmlEntity)); - if (ret == NULL) { - fprintf(stderr, "xmlCreateEntitiesTable : malloc(%d) failed\n", - ret->max_entities * sizeof(xmlEntity)); - free(ret); - return(NULL); - } - return(ret); -} - -/* - * xmlFreeEntitiesTable : clean up and free an entities hash table. - */ -void xmlFreeEntitiesTable(xmlEntitiesTablePtr table) { - int i; - - if (table == NULL) return; - - for (i = 0;i < table->nb_entities;i++) { - xmlFreeEntity(&table->table[i]); - } - free(table->table); - free(table); -} - diff --git a/xml_parser.c b/xml_parser.c deleted file mode 100644 index dfec5a78..00000000 --- a/xml_parser.c +++ /dev/null @@ -1,1183 +0,0 @@ -/* - * parser.c : an XML 1.0 non-verifying parser - * - * See Copyright for the status of this software. - * - * $Id$ - */ - -#include -#include -#include -#include /* for memset() only */ -#include -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_ZLIB_H -#include -#endif - -#include "xml_tree.h" -#include "xml_parser.h" -#include "xml_entities.h" - -/* - * A few macros needed to help building the parser. - */ - -#ifdef UNICODE -/* - * UNICODE version of the macros. Incomplete now TODO !!!! - */ -#define IS_CHAR(c) \ - (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ - (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) - -#define SKIP_BLANKS(p) \ - while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \ - (*(p) == 0x3000)) (p)++; - -/* I'm too lazy to complete this one TODO !!!! */ -#define IS_BASECHAR(c) \ - ((((c) >= 0x41) && ((c) <= 0x5a)) || \ - (((c) >= 0x61) && ((c) <= 0x7a)) || \ - (((c) >= 0xaa) && ((c) <= 0x5b)) || \ - (((c) >= 0xc0) && ((c) <= 0xd6)) || \ - (((c) >= 0xd8) && ((c) <= 0xf6)) || \ - (((c) >= 0xf8) && ((c) <= 0xff)) || \ - ((c) == 0xba)) - -/* I'm too lazy to complete this one TODO !!!! */ -#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39)) - -/* I'm too lazy to complete this one TODO !!!! */ -#define IS_COMBINING(c) 0 - -#define IS_IGNORABLE(c) \ - ((((c) >= 0x200c) && ((c) <= 0x200f)) || \ - (((c) >= 0x202a) && ((c) <= 0x202e)) || \ - (((c) >= 0x206a) && ((c) <= 0x206f)) || \ - ((c) == 0xfeff)) - -#define IS_EXTENDER(c) \ - (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \ - ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \ - ((c) == 0xec6) || ((c) == 0x3005) \ - (((c) >= 0x3031) && ((c) <= 0x3035)) || \ - (((c) >= 0x309b) && ((c) <= 0x309e)) || \ - (((c) >= 0x30fc) && ((c) <= 0x30fe)) || \ - (((c) >= 0xff70) && ((c) <= 0xff9e)) || \ - ((c) == 0xff9f)) - -#define IS_IDEOGRAPHIC(c) \ - ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \ - (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \ - (((c) >= 0x3021) && ((c) <= 0x3029)) || \ - ((c) == 0x3007)) - -#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) - -/* I'm too lazy to complete this one ! */ -#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa)) -#else -/* - * 8bits / ASCII version of the macros. - */ -#define IS_CHAR(c) \ - (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20)) - -#define IS_BASECHAR(c) \ - ((((c) >= 0x41) && ((c) <= 0x5a)) || \ - (((c) >= 0x61) && ((c) <= 0x7a)) || \ - (((c) >= 0xaa) && ((c) <= 0x5b)) || \ - (((c) >= 0xc0) && ((c) <= 0xd6)) || \ - (((c) >= 0xd8) && ((c) <= 0xf6)) || \ - (((c) >= 0xf8) && ((c) <= 0xff)) || \ - ((c) == 0xba)) - -#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39)) - -#define IS_LETTER(c) IS_BASECHAR(c) - -#define IS_COMBINING(c) 0 - -#define IS_IGNORABLE(c) 0 - -#define IS_EXTENDER(c) ((c) == 0xb7) - -#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa)) -#endif - - -#define SKIP_EOL(p) \ - if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ - if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } - -#define SKIP_BLANKS(p) \ - while (IS_BLANK(*(p))) (p)++; - -#define MOVETO_ENDTAG(p) \ - while (IS_CHAR(*p) && (*(p) != '>')) (p)++; - -#define MOVETO_STARTTAG(p) \ - while (IS_CHAR(*p) && (*(p) != '<')) (p)++; - -/* - * Forward definition for recusive behaviour. - */ -xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt); - -/* - * xmlHandleData : this routine represent's the specific application - * behaviour when reading a piece of text. - * - * For example in WebDav, any piece made only of blanks is eliminated - */ - -CHAR *xmlHandleData(CHAR *in) { - CHAR *cur; - - if (in == NULL) return(NULL); - cur = in; - while (IS_CHAR(*cur)) { - if (!IS_BLANK(*cur)) goto not_blank; - cur++; - } - free(in); - return(NULL); - -not_blank: - return(in); -} - -/* - * xmlStrndup : a strdup for array of CHAR's - */ - -CHAR *xmlStrndup(const CHAR *cur, int len) { - CHAR *ret = malloc((len + 1) * sizeof(CHAR)); - - if (ret == NULL) { - fprintf(stderr, "malloc of %d byte failed\n", - (len + 1) * sizeof(CHAR)); - return(NULL); - } - memcpy(ret, cur, len * sizeof(CHAR)); - ret[len] = 0; - return(ret); -} - -/* - * xmlStrdup : a strdup for CHAR's - */ - -CHAR *xmlStrdup(const CHAR *cur) { - const CHAR *p = cur; - - while (IS_CHAR(*p)) p++; - return(xmlStrndup(cur, p - cur)); -} - -/* - * xmlStrcmp : a strcmp for CHAR's - */ - -int xmlStrcmp(const CHAR *str1, const CHAR *str2) { - register int tmp; - - do { - tmp = *str1++ - *str2++; - if (tmp != 0) return(tmp); - } while ((*str1 != 0) && (*str2 != 0)); - return (*str1 - *str2); -} - -/* - * xmlStrncmp : a strncmp for CHAR's - */ - -int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) { - register int tmp; - - if (len <= 0) return(0); - do { - tmp = *str1++ - *str2++; - if (tmp != 0) return(tmp); - len--; - if (len <= 0) return(0); - } while ((*str1 != 0) && (*str2 != 0)); - return (*str1 - *str2); -} - -/* - * xmlStrchr : a strchr for CHAR's - */ - -CHAR *xmlStrchr(const CHAR *str, CHAR val) { - while (*str != 0) { - if (*str == val) return((CHAR *) str); - str++; - } - return(NULL); -} - -/* - * xmlParseName : parse an XML name. - */ - -CHAR *xmlParseName(xmlParserCtxtPtr ctxt) { - const CHAR *q; - CHAR *ret = NULL; - - /* - * Name ::= (Letter | '_') (NameChar)* - */ - if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL); - q = ctxt->cur++; - while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) || - (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || (ctxt->cur[0] == '_') || - (ctxt->cur[0] == ':') || - (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) || - (IS_EXTENDER(ctxt->cur[0]))) - ctxt->cur++; - - ret = xmlStrndup(q, ctxt->cur - q); - - return(ret); -} - -/* - * Parse and return a string between quotes or doublequotes - */ -CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) { - CHAR *ret = NULL; - const CHAR *q; - - if (ctxt->cur[0] == '"') { - ctxt->cur++; - q = ctxt->cur; - while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++; - if (ctxt->cur[0] != '"') - fprintf(stderr, "String not closed \"%.50s\n", q); - else { - ret = xmlStrndup(q, ctxt->cur - q); - ctxt->cur++; - } - } else if (ctxt->cur[0] == '\''){ - ctxt->cur++; - q = ctxt->cur; - while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++; - if (ctxt->cur[0] != '\'') - fprintf(stderr, "String not closed '%.50s\n", q); - else { - ret = xmlStrndup(q, ctxt->cur - q); - ctxt->cur++; - } - } - return(ret); -} - -/* - * Skip an XML (SGML) comment - * - * TODO !!!! Save the comment in the tree !!! - */ -void xmlParserSkipComment(xmlParserCtxtPtr ctxt) { - const CHAR *q, *start; - const CHAR *r; - - /* - * An extra check may avoid errors and isn't that costly ! - */ - if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') || - (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return; - - ctxt->cur += 4; - start = q = ctxt->cur; - ctxt->cur++; - r = ctxt->cur; - ctxt->cur++; - while (IS_CHAR(ctxt->cur[0]) && - ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') || - (*r != '-') || (*q != '-'))) { - ctxt->cur++;r++;q++; - } - if (!IS_CHAR(ctxt->cur[0])) { - fprintf(stderr, "Comment not terminated