diff --git a/ChangeLog b/ChangeLog index 7173b003..0f747186 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,28 +1,3 @@ -Thu Feb 23 02:03:56 CET 2001 Tomasz Kłoczko - - * *.c *.h libxml files: moved to libxml directory - this allow - simplify automake/autoconf. Now isn't neccessary hack on - am/ac level for make and remove libxml symlink (modified for this - also configure.in and main Makefile.am). Now automake abilities - are used in best way (like in many other projects with libraries). - * include/win32config.h: moved to libxml directory (now include - directory isn't neccessary). - * Makefile.am, examples/Makefile.am, libxml/Makefile.am: - added empty DEFS and in INCLUDES rest only -I$(top_builddir) - - this allow minimize parameters count passed to libtool script - (now compilation is also slyghtly more quiet). - * configure.in: simplifies libzdetestion - prepare separated - variables for keep libz name and path to libz header files isn't - realy neccessary (if someone have libz installed in non standard - prefix path to header files ald library can be passed as: - $ CFALGS="-I" LDFLAGS="-L" ./configure - * autogen.sh: check now for libxml/entities.h. - - After above building libxml pass correctly and also pass - "make install DESTDIR=" from tar ball generated by - "make dist". Seems ac/am reorganization is finished. This changes - not touches any other things on *.{c,h} files level. - Thu Feb 22 07:52:27 CET 2001 Daniel Veillard * xpath.c: finally implemented xmlXPathCompareNodeSets diff --git a/HTMLparser.c b/HTMLparser.c new file mode 100644 index 00000000..4fb40df8 --- /dev/null +++ b/HTMLparser.c @@ -0,0 +1,4966 @@ +/* + * HTMLparser.c : an HTML 4.0 non-verifying parser + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#ifdef LIBXML_HTML_ENABLED +#include +#include +#ifdef HAVE_CTYPE_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_ZLIB_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HTML_MAX_NAMELEN 1000 +#define HTML_PARSER_BIG_BUFFER_SIZE 1000 +#define HTML_PARSER_BUFFER_SIZE 100 + +/* #define DEBUG */ +/* #define DEBUG_PUSH */ + +int htmlOmittedDefaultValue = 1; + +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ + +/* + * Generic function for accessing stacks in the Parser Context + */ + +#define PUSH_AND_POP(scope, type, name) \ +scope int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \ + if (ctxt->name##Nr >= ctxt->name##Max) { \ + ctxt->name##Max *= 2; \ + ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ + ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ + if (ctxt->name##Tab == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "realloc failed !\n"); \ + return(0); \ + } \ + } \ + ctxt->name##Tab[ctxt->name##Nr] = value; \ + ctxt->name = value; \ + return(ctxt->name##Nr++); \ +} \ +scope type html##name##Pop(htmlParserCtxtPtr ctxt) { \ + type ret; \ + if (ctxt->name##Nr < 0) return(0); \ + ctxt->name##Nr--; \ + if (ctxt->name##Nr < 0) return(0); \ + if (ctxt->name##Nr > 0) \ + ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ + else \ + ctxt->name = NULL; \ + ret = ctxt->name##Tab[ctxt->name##Nr]; \ + ctxt->name##Tab[ctxt->name##Nr] = 0; \ + return(ret); \ +} \ + +PUSH_AND_POP(extern, xmlNodePtr, node) +PUSH_AND_POP(extern, xmlChar*, name) + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one need to make assumption on the context to use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled + * in ISO-Latin or UTF-8, and the current 16 bit value if compiled + * in UNICODE mode. This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR + * it should be used only to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * + * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding + * + * CURRENT Returns the current char value, with the full decoding of + * UTF-8 if we are using this mode. It returns an int. + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly + */ + +#define UPPER (toupper(*ctxt->input->cur)) + +#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val) + +#define NXT(val) ctxt->input->cur[(val)] + +#define UPP(val) (toupper(ctxt->input->cur[(val)])) + +#define CUR_PTR ctxt->input->cur + +#define SHRINK xmlParserInputShrink(ctxt->input) + +#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK) + +#define CURRENT ((int) (*ctxt->input->cur)) + +#define SKIP_BLANKS htmlSkipBlankChars(ctxt) + +/* Inported from XML */ + +/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */ +#define CUR ((int) (*ctxt->input->cur)) +#define NEXT xmlNextChar(ctxt),ctxt->nbChars++ + +#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) +#define NXT(val) ctxt->input->cur[(val)] +#define CUR_PTR ctxt->input->cur + + +#define NEXTL(l) do { \ + if (*(ctxt->input->cur) == '\n') { \ + ctxt->input->line++; ctxt->input->col = 1; \ + } else ctxt->input->col++; \ + ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ + } while (0) + +/************ + \ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ + if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); + ************/ + +#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l) +#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) + +#define COPY_BUF(l,b,i,v) \ + if (l == 1) b[i++] = (xmlChar) v; \ + else i += xmlCopyChar(l,&b[i],v) + +/** + * htmlCurrentChar: + * @ctxt: the HTML parser context + * @len: pointer to the length of the char read + * + * The current char value, if using UTF-8 this may actaully span multiple + * bytes in the input buffer. Implement the end of line normalization: + * 2.11 End-of-Line Handling + * If the encoding is unspecified, in the case we find an ISO-Latin-1 + * char, then the encoding converter is plugged in automatically. + * + * Returns the current char value and its lenght + */ + +int +htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { + if (ctxt->instate == XML_PARSER_EOF) + return(0); + + if (ctxt->token != 0) { + *len = 0; + return(ctxt->token); + } + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + const unsigned char *cur = ctxt->input->cur; + unsigned char c; + unsigned int val; + + c = *cur; + if (c & 0x80) { + if (cur[1] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + + if (cur[2] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (cur[3] == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + if (!IS_CHAR(val)) { + ctxt->errNo = XML_ERR_INVALID_ENCODING; + if ((ctxt->sax != NULL) && + (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Char 0x%X out of allowed range\n", val); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + return(val); + } else { + /* 1-byte code */ + *len = 1; + return((int) *ctxt->input->cur); + } + } + /* + * Assume it's a fixed lenght encoding (1) with + * a compatibke encoding for the ASCII set, since + * XML constructs only use < 128 chars + */ + *len = 1; + if ((int) *ctxt->input->cur < 0x80) + return((int) *ctxt->input->cur); + + /* + * Humm this is bad, do an automatic flow conversion + */ + xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); + ctxt->charset = XML_CHAR_ENCODING_UTF8; + return(xmlCurrentChar(ctxt, len)); + +encoding_error: + /* + * If we detect an UTF8 error that probably mean that the + * input encoding didn't get properly advertized in the + * declaration header. Report the error and switch the encoding + * to ISO-Latin-1 (if you don't like this policy, just declare the + * encoding !) + */ + ctxt->errNo = XML_ERR_INVALID_ENCODING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { + ctxt->sax->error(ctxt->userData, + "Input is not proper UTF-8, indicate encoding !\n"); + ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + ctxt->input->cur[0], ctxt->input->cur[1], + ctxt->input->cur[2], ctxt->input->cur[3]); + } + + ctxt->charset = XML_CHAR_ENCODING_8859_1; + *len = 1; + return((int) *ctxt->input->cur); +} + +/** + * htmlNextChar: + * @ctxt: the HTML parser context + * + * Skip to the next char input char. + */ + +void +htmlNextChar(htmlParserCtxtPtr ctxt) { + if (ctxt->instate == XML_PARSER_EOF) + return; + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { + xmlPopInput(ctxt); + } else { + if (*(ctxt->input->cur) == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else ctxt->input->col++; + ctxt->input->cur++; + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + } +} + +/** + * htmlSkipBlankChars: + * @ctxt: the HTML parser context + * + * skip all blanks character found at that point in the input streams. + * + * Returns the number of space chars skipped + */ + +int +htmlSkipBlankChars(xmlParserCtxtPtr ctxt) { + int res = 0; + + while (IS_BLANK(*(ctxt->input->cur))) { + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { + xmlPopInput(ctxt); + } else { + if (*(ctxt->input->cur) == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else ctxt->input->col++; + ctxt->input->cur++; + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + } + res++; + } + return(res); +} + + + +/************************************************************************ + * * + * The list of HTML elements and their properties * + * * + ************************************************************************/ + +/* + * Start Tag: 1 means the start tag can be ommited + * End Tag: 1 means the end tag can be ommited + * 2 means it's forbidden (empty elements) + * Depr: this element is deprecated + * DTD: 1 means that this element is valid only in the Loose DTD + * 2 means that this element is valid only in the Frameset DTD + * + * Name,Start Tag,End Tag,Save End, Empty, Depr., DTD, Description + */ +htmlElemDesc html40ElementTable[] = { +{ "a", 0, 0, 0, 0, 0, 0, "anchor " }, +{ "abbr", 0, 0, 0, 0, 0, 0, "abbreviated form" }, +{ "acronym", 0, 0, 0, 0, 0, 0, "" }, +{ "address", 0, 0, 0, 0, 0, 0, "information on author " }, +{ "applet", 0, 0, 0, 0, 1, 1, "java applet " }, +{ "area", 0, 2, 2, 1, 0, 0, "client-side image map area " }, +{ "b", 0, 0, 0, 0, 0, 0, "bold text style" }, +{ "base", 0, 2, 2, 1, 0, 0, "document base uri " }, +{ "basefont", 0, 2, 2, 1, 1, 1, "base font size " }, +{ "bdo", 0, 0, 0, 0, 0, 0, "i18n bidi over-ride " }, +{ "big", 0, 0, 0, 0, 0, 0, "large text style" }, +{ "blockquote", 0, 0, 0, 0, 0, 0, "long quotation " }, +{ "body", 1, 1, 0, 0, 0, 0, "document body " }, +{ "br", 0, 2, 2, 1, 0, 0, "forced line break " }, +{ "button", 0, 0, 0, 0, 0, 0, "push button " }, +{ "caption", 0, 0, 0, 0, 0, 0, "table caption " }, +{ "center", 0, 0, 0, 0, 1, 1, "shorthand for div align=center " }, +{ "cite", 0, 0, 0, 0, 0, 0, "citation" }, +{ "code", 0, 0, 0, 0, 0, 0, "computer code fragment" }, +{ "col", 0, 2, 2, 1, 0, 0, "table column " }, +{ "colgroup", 0, 1, 0, 0, 0, 0, "table column group " }, +{ "dd", 0, 1, 0, 0, 0, 0, "definition description " }, +{ "del", 0, 0, 0, 0, 0, 0, "deleted text " }, +{ "dfn", 0, 0, 0, 0, 0, 0, "instance definition" }, +{ "dir", 0, 0, 0, 0, 1, 1, "directory list" }, +{ "div", 0, 0, 0, 0, 0, 0, "generic language/style container"}, +{ "dl", 0, 0, 0, 0, 0, 0, "definition list " }, +{ "dt", 0, 1, 0, 0, 0, 0, "definition term " }, +{ "em", 0, 0, 0, 0, 0, 0, "emphasis" }, +{ "fieldset", 0, 0, 0, 0, 0, 0, "form control group " }, +{ "font", 0, 0, 0, 0, 1, 1, "local change to font " }, +{ "form", 0, 0, 0, 0, 0, 0, "interactive form " }, +{ "frame", 0, 2, 2, 1, 0, 2, "subwindow " }, +{ "frameset", 0, 0, 0, 0, 0, 2, "window subdivision" }, +{ "h1", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h2", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h3", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h4", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h5", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h6", 0, 0, 0, 0, 0, 0, "heading " }, +{ "head", 1, 1, 0, 0, 0, 0, "document head " }, +{ "hr", 0, 2, 2, 1, 0, 0, "horizontal rule " }, +{ "html", 1, 1, 0, 0, 0, 0, "document root element " }, +{ "i", 0, 0, 0, 0, 0, 0, "italic text style" }, +{ "iframe", 0, 0, 0, 0, 0, 1, "inline subwindow " }, +{ "img", 0, 2, 2, 1, 0, 0, "embedded image " }, +{ "input", 0, 2, 2, 1, 0, 0, "form control " }, +{ "ins", 0, 0, 0, 0, 0, 0, "inserted text" }, +{ "isindex", 0, 2, 2, 1, 1, 1, "single line prompt " }, +{ "kbd", 0, 0, 0, 0, 0, 0, "text to be entered by the user" }, +{ "label", 0, 0, 0, 0, 0, 0, "form field label text " }, +{ "legend", 0, 0, 0, 0, 0, 0, "fieldset legend " }, +{ "li", 0, 1, 1, 0, 0, 0, "list item " }, +{ "link", 0, 2, 2, 1, 0, 0, "a media-independent link " }, +{ "map", 0, 0, 0, 0, 0, 0, "client-side image map " }, +{ "menu", 0, 0, 0, 0, 1, 1, "menu list " }, +{ "meta", 0, 2, 2, 1, 0, 0, "generic metainformation " }, +{ "noframes", 0, 0, 0, 0, 0, 2, "alternate content container for non frame-based rendering " }, +{ "noscript", 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering " }, +{ "object", 0, 0, 0, 0, 0, 0, "generic embedded object " }, +{ "ol", 0, 0, 0, 0, 0, 0, "ordered list " }, +{ "optgroup", 0, 0, 0, 0, 0, 0, "option group " }, +{ "option", 0, 1, 0, 0, 0, 0, "selectable choice " }, +{ "p", 0, 1, 1, 0, 0, 0, "paragraph " }, +{ "param", 0, 2, 2, 1, 0, 0, "named property value " }, +{ "pre", 0, 0, 0, 0, 0, 0, "preformatted text " }, +{ "q", 0, 0, 0, 0, 0, 0, "short inline quotation " }, +{ "s", 0, 0, 0, 0, 1, 1, "strike-through text style" }, +{ "samp", 0, 0, 0, 0, 0, 0, "sample program output, scripts, etc." }, +{ "script", 0, 0, 0, 0, 0, 0, "script statements " }, +{ "select", 0, 0, 0, 0, 0, 0, "option selector " }, +{ "small", 0, 0, 0, 0, 0, 0, "small text style" }, +{ "span", 0, 0, 0, 0, 0, 0, "generic language/style container " }, +{ "strike", 0, 0, 0, 0, 1, 1, "strike-through text" }, +{ "strong", 0, 0, 0, 0, 0, 0, "strong emphasis" }, +{ "style", 0, 0, 0, 0, 0, 0, "style info " }, +{ "sub", 0, 0, 0, 0, 0, 0, "subscript" }, +{ "sup", 0, 0, 0, 0, 0, 0, "superscript " }, +{ "table", 0, 0, 0, 0, 0, 0, " " }, +{ "tbody", 1, 0, 0, 0, 0, 0, "table body " }, +{ "td", 0, 0, 0, 0, 0, 0, "table data cell" }, +{ "textarea", 0, 0, 0, 0, 0, 0, "multi-line text field " }, +{ "tfoot", 0, 1, 0, 0, 0, 0, "table footer " }, +{ "th", 0, 1, 0, 0, 0, 0, "table header cell" }, +{ "thead", 0, 1, 0, 0, 0, 0, "table header " }, +{ "title", 0, 0, 0, 0, 0, 0, "document title " }, +{ "tr", 0, 1, 0, 0, 0, 0, "table row " }, +{ "tt", 0, 0, 0, 0, 0, 0, "teletype or monospaced text style" }, +{ "u", 0, 0, 0, 0, 1, 1, "underlined text style" }, +{ "ul", 0, 0, 0, 0, 0, 0, "unordered list " }, +{ "var", 0, 0, 0, 0, 0, 0, "instance of a variable or program argument" }, +}; + +/* + * start tags that imply the end of a current element + * any tag of each line implies the end of the current element if the type of + * that element is in the same line + */ +char *htmlEquEnd[] = { +"dt", "dd", "li", "option", NULL, +"h1", "h2", "h3", "h4", "h5", "h6", NULL, +"ol", "menu", "dir", "address", "pre", "listing", "xmp", NULL, +NULL +}; +/* + * acording the HTML DTD, HR should be added to the 2nd line above, as it + * is not allowed within a H1, H2, H3, etc. But we should tolerate that case + * because many documents contain rules in headings... + */ + +/* + * start tags that imply the end of current element + */ +char *htmlStartClose[] = { +"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6", + "dl", "ul", "ol", "menu", "dir", "address", "pre", + "listing", "xmp", "head", NULL, +"head", "p", NULL, +"title", "p", NULL, +"body", "head", "style", "link", "title", "p", NULL, +"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address", + "pre", "listing", "xmp", "head", "li", NULL, +"hr", "p", "head", NULL, +"h1", "p", "head", NULL, +"h2", "p", "head", NULL, +"h3", "p", "head", NULL, +"h4", "p", "head", NULL, +"h5", "p", "head", NULL, +"h6", "p", "head", NULL, +"dir", "p", "head", NULL, +"address", "p", "head", "ul", NULL, +"pre", "p", "head", "ul", NULL, +"listing", "p", "head", NULL, +"xmp", "p", "head", NULL, +"blockquote", "p", "head", NULL, +"dl", "p", "dt", "menu", "dir", "address", "pre", "listing", + "xmp", "head", NULL, +"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", + "head", "dd", NULL, +"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", + "head", "dt", NULL, +"ul", "p", "head", "ol", "menu", "dir", "address", "pre", + "listing", "xmp", NULL, +"ol", "p", "head", "ul", NULL, +"menu", "p", "head", "ul", NULL, +"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL, +"div", "p", "head", NULL, +"noscript", "p", "head", NULL, +"center", "font", "b", "i", "p", "head", NULL, +"a", "a", NULL, +"caption", "p", NULL, +"colgroup", "caption", "colgroup", "col", "p", NULL, +"col", "caption", "col", "p", NULL, +"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", + "listing", "xmp", "a", NULL, +"th", "th", "td", NULL, +"td", "th", "td", "p", NULL, +"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, +"thead", "caption", "col", "colgroup", NULL, +"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", + "tbody", "p", NULL, +"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", + "tfoot", "tbody", "p", NULL, +"optgroup", "option", NULL, +"option", "option", NULL, +"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", + "pre", "listing", "xmp", "a", NULL, +NULL +}; + +/* + * The list of HTML elements which are supposed not to have + * CDATA content and where a p element will be implied + * + * TODO: extend that list by reading the HTML SGML DtD on + * implied paragraph + */ +static char *htmlNoContentElements[] = { + "html", + "head", + "body", + NULL +}; + +/* + * The list of HTML attributes which are of content %Script; + * NOTE: when adding ones, check htmlIsScriptAttribute() since + * it assumes the name starts with 'on' + */ +static char *htmlScriptAttributes[] = { + "onclick", + "ondblclick", + "onmousedown", + "onmouseup", + "onmouseover", + "onmousemove", + "onmouseout", + "onkeypress", + "onkeydown", + "onkeyup", + "onload", + "onunload", + "onfocus", + "onblur", + "onsubmit", + "onrest", + "onchange", + "onselect" +}; + + +static char** htmlStartCloseIndex[100]; +static int htmlStartCloseIndexinitialized = 0; + +/************************************************************************ + * * + * functions to handle HTML specific data * + * * + ************************************************************************/ + +/** + * htmlInitAutoClose: + * + * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. + * This is not reentrant. Call xmlInitParser() once before processing in + * case of use in multithreaded programs. + */ +void +htmlInitAutoClose(void) { + int index, i = 0; + + if (htmlStartCloseIndexinitialized) return; + + for (index = 0;index < 100;index ++) htmlStartCloseIndex[index] = NULL; + index = 0; + while ((htmlStartClose[i] != NULL) && (index < 100 - 1)) { + htmlStartCloseIndex[index++] = &htmlStartClose[i]; + while (htmlStartClose[i] != NULL) i++; + i++; + } + htmlStartCloseIndexinitialized = 1; +} + +/** + * htmlTagLookup: + * @tag: The tag name in lowercase + * + * Lookup the HTML tag in the ElementTable + * + * Returns the related htmlElemDescPtr or NULL if not found. + */ +htmlElemDescPtr +htmlTagLookup(const xmlChar *tag) { + int i; + + for (i = 0; i < (sizeof(html40ElementTable) / + sizeof(html40ElementTable[0]));i++) { + if (xmlStrEqual(tag, BAD_CAST html40ElementTable[i].name)) + return(&html40ElementTable[i]); + } + return(NULL); +} + +/** + * htmlCheckAutoClose: + * @newtag: The new tag name + * @oldtag: The old tag name + * + * Checks wether the new tag is one of the registered valid tags for closing old. + * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. + * + * Returns 0 if no, 1 if yes. + */ +int +htmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) { + int i, index; + char **close = NULL; + + if (htmlStartCloseIndexinitialized == 0) htmlInitAutoClose(); + + /* inefficient, but not a big deal */ + for (index = 0; index < 100;index++) { + close = htmlStartCloseIndex[index]; + if (close == NULL) return(0); + if (xmlStrEqual(BAD_CAST *close, newtag)) break; + } + + i = close - htmlStartClose; + i++; + while (htmlStartClose[i] != NULL) { + if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) { + return(1); + } + i++; + } + return(0); +} + +/** + * htmlAutoCloseOnClose: + * @ctxt: an HTML parser context + * @newtag: The new tag name + * + * The HTmL DtD allows an ending tag to implicitely close other tags. + */ +void +htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { + htmlElemDescPtr info; + xmlChar *oldname; + int i; + +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr); + for (i = 0;i < ctxt->nameNr;i++) + xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]); +#endif + + for (i = (ctxt->nameNr - 1);i >= 0;i--) { + if (xmlStrEqual(newtag, ctxt->nameTab[i])) break; + } + if (i < 0) return; + + while (!xmlStrEqual(newtag, ctxt->name)) { + info = htmlTagLookup(ctxt->name); + if ((info == NULL) || (info->endTag == 1)) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name); +#endif + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Opening and ending tag mismatch: %s and %s\n", + newtag, ctxt->name); + ctxt->wellFormed = 0; + } + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoCloseOnClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } +} + +/** + * htmlAutoClose: + * @ctxt: an HTML parser context + * @newtag: The new tag name or NULL + * + * The HTmL DtD allows a tag to implicitely close other tags. + * The list is kept in htmlStartClose array. This function is + * called when a new tag has been detected and generates the + * appropriates closes if possible/needed. + * If newtag is NULL this mean we are at the end of the resource + * and we should check + */ +void +htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { + xmlChar *oldname; + while ((newtag != NULL) && (ctxt->name != NULL) && + (htmlCheckAutoClose(newtag, ctxt->name))) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoClose: %s closes %s\n", newtag, ctxt->name); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } + if (newtag == NULL) { + htmlAutoCloseOnClose(ctxt, BAD_CAST"head"); + htmlAutoCloseOnClose(ctxt, BAD_CAST"body"); + htmlAutoCloseOnClose(ctxt, BAD_CAST"html"); + } + while ((newtag == NULL) && (ctxt->name != NULL) && + ((xmlStrEqual(ctxt->name, BAD_CAST"head")) || + (xmlStrEqual(ctxt->name, BAD_CAST"body")) || + (xmlStrEqual(ctxt->name, BAD_CAST"html")))) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoClose: EOF closes %s\n", ctxt->name); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } + +} + +/** + * htmlAutoCloseTag: + * @doc: the HTML document + * @name: The tag name + * @elem: the HTML element + * + * The HTmL DtD allows a tag to implicitely close other tags. + * The list is kept in htmlStartClose array. This function checks + * if the element or one of it's children would autoclose the + * given tag. + * + * Returns 1 if autoclose, 0 otherwise + */ +int +htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) { + htmlNodePtr child; + + if (elem == NULL) return(1); + if (xmlStrEqual(name, elem->name)) return(0); + if (htmlCheckAutoClose(elem->name, name)) return(1); + child = elem->children; + while (child != NULL) { + if (htmlAutoCloseTag(doc, name, child)) return(1); + child = child->next; + } + return(0); +} + +/** + * htmlIsAutoClosed: + * @doc: the HTML document + * @elem: the HTML element + * + * The HTmL DtD allows a tag to implicitely close other tags. + * The list is kept in htmlStartClose array. This function checks + * if a tag is autoclosed by one of it's child + * + * Returns 1 if autoclosed, 0 otherwise + */ +int +htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) { + htmlNodePtr child; + + if (elem == NULL) return(1); + child = elem->children; + while (child != NULL) { + if (htmlAutoCloseTag(doc, elem->name, child)) return(1); + child = child->next; + } + return(0); +} + +/** + * htmlCheckImplied: + * @ctxt: an HTML parser context + * @newtag: The new tag name + * + * The HTML DtD allows a tag to exists only implicitely + * called when a new tag has been detected and generates the + * appropriates implicit tags if missing + */ +void +htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { + if (!htmlOmittedDefaultValue) + return; + if (xmlStrEqual(newtag, BAD_CAST"html")) + return; + if (ctxt->nameNr <= 0) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Implied element html: pushed html\n"); +#endif + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"html")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); + } + if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head"))) + return; + if ((ctxt->nameNr <= 1) && + ((xmlStrEqual(newtag, BAD_CAST"script")) || + (xmlStrEqual(newtag, BAD_CAST"style")) || + (xmlStrEqual(newtag, BAD_CAST"meta")) || + (xmlStrEqual(newtag, BAD_CAST"link")) || + (xmlStrEqual(newtag, BAD_CAST"title")) || + (xmlStrEqual(newtag, BAD_CAST"base")))) { + /* + * dropped OBJECT ... i you put it first BODY will be + * assumed ! + */ +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Implied element head: pushed head\n"); +#endif + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"head")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); + } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && + (!xmlStrEqual(newtag, BAD_CAST"frame")) && + (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { + int i; + for (i = 0;i < ctxt->nameNr;i++) { + if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { + return; + } + if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) { + return; + } + } + +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Implied element body: pushed body\n"); +#endif + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"body")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL); + } +} + +/** + * htmlCheckParagraph + * @ctxt: an HTML parser context + * + * Check whether a p element need to be implied before inserting + * characters in the current element. + * + * Returns 1 if a paragraph has been inserted, 0 if not and -1 + * in case of error. + */ + +int +htmlCheckParagraph(htmlParserCtxtPtr ctxt) { + const xmlChar *tag; + int i; + + if (ctxt == NULL) + return(-1); + tag = ctxt->name; + if (tag == NULL) { + htmlAutoClose(ctxt, BAD_CAST"p"); + htmlCheckImplied(ctxt, BAD_CAST"p"); + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"p")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL); + return(1); + } + if (!htmlOmittedDefaultValue) + return(0); + for (i = 0; htmlNoContentElements[i] != NULL; i++) { + if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Implied element paragraph\n"); +#endif + htmlAutoClose(ctxt, BAD_CAST"p"); + htmlCheckImplied(ctxt, BAD_CAST"p"); + htmlnamePush(ctxt, xmlStrdup(BAD_CAST"p")); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL); + return(1); + } + } + return(0); +} + +/** + * htmlIsScriptAttribute: + * @name: an attribute name + * + * Check if an attribute is of content type Script + * + * Returns 1 is the attribute is a script 0 otherwise + */ +int +htmlIsScriptAttribute(const xmlChar *name) { + int i; + + if (name == NULL) + return(0); + /* + * all script attributes start with 'on' + */ + if ((name[0] != 'o') || (name[1] != 'n')) + return(0); + for (i = 0; + i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]); + i++) { + if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i])) + return(1); + } + return(0); +} + +/************************************************************************ + * * + * The list of HTML predefined entities * + * * + ************************************************************************/ + + +htmlEntityDesc html40EntitiesTable[] = { +/* + * the 4 absolute ones, plus apostrophe. + */ +{ 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" }, +{ 38, "amp", "ampersand, U+0026 ISOnum" }, +{ 39, "apos", "single quote" }, +{ 60, "lt", "less-than sign, U+003C ISOnum" }, +{ 62, "gt", "greater-than sign, U+003E ISOnum" }, + +/* + * A bunch still in the 128-255 range + * Replacing them depend really on the charset used. + */ +{ 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" }, +{ 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" }, +{ 162, "cent", "cent sign, U+00A2 ISOnum" }, +{ 163, "pound","pound sign, U+00A3 ISOnum" }, +{ 164, "curren","currency sign, U+00A4 ISOnum" }, +{ 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" }, +{ 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" }, +{ 167, "sect", "section sign, U+00A7 ISOnum" }, +{ 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" }, +{ 169, "copy", "copyright sign, U+00A9 ISOnum" }, +{ 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" }, +{ 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" }, +{ 172, "not", "not sign, U+00AC ISOnum" }, +{ 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" }, +{ 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" }, +{ 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" }, +{ 176, "deg", "degree sign, U+00B0 ISOnum" }, +{ 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" }, +{ 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" }, +{ 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" }, +{ 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" }, +{ 181, "micro","micro sign, U+00B5 ISOnum" }, +{ 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" }, +{ 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" }, +{ 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" }, +{ 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" }, +{ 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" }, +{ 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" }, +{ 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" }, +{ 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" }, +{ 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" }, +{ 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" }, +{ 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" }, +{ 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" }, +{ 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" }, +{ 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" }, +{ 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" }, +{ 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" }, +{ 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" }, +{ 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" }, +{ 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" }, +{ 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" }, +{ 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" }, +{ 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" }, +{ 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" }, +{ 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" }, +{ 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" }, +{ 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" }, +{ 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" }, +{ 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" }, +{ 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" }, +{ 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" }, +{ 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" }, +{ 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" }, +{ 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" }, +{ 215, "times","multiplication sign, U+00D7 ISOnum" }, +{ 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" }, +{ 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" }, +{ 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" }, +{ 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" }, +{ 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" }, +{ 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" }, +{ 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" }, +{ 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" }, +{ 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" }, +{ 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" }, +{ 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" }, +{ 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" }, +{ 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" }, +{ 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" }, +{ 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" }, +{ 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" }, +{ 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" }, +{ 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" }, +{ 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" }, +{ 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" }, +{ 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" }, +{ 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" }, +{ 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" }, +{ 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" }, +{ 240, "eth", "latin small letter eth, U+00F0 ISOlat1" }, +{ 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" }, +{ 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" }, +{ 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" }, +{ 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" }, +{ 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" }, +{ 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" }, +{ 247, "divide","division sign, U+00F7 ISOnum" }, +{ 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" }, +{ 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" }, +{ 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" }, +{ 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" }, +{ 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" }, +{ 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" }, +{ 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" }, +{ 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" }, + +{ 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" }, +{ 339, "oelig","latin small ligature oe, U+0153 ISOlat2" }, +{ 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" }, +{ 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" }, +{ 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" }, + +/* + * Anything below should really be kept as entities references + */ +{ 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" }, + +{ 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" }, +{ 732, "tilde","small tilde, U+02DC ISOdia" }, + +{ 913, "Alpha","greek capital letter alpha, U+0391" }, +{ 914, "Beta", "greek capital letter beta, U+0392" }, +{ 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" }, +{ 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" }, +{ 917, "Epsilon","greek capital letter epsilon, U+0395" }, +{ 918, "Zeta", "greek capital letter zeta, U+0396" }, +{ 919, "Eta", "greek capital letter eta, U+0397" }, +{ 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" }, +{ 921, "Iota", "greek capital letter iota, U+0399" }, +{ 922, "Kappa","greek capital letter kappa, U+039A" }, +{ 923, "Lambda""greek capital letter lambda, U+039B ISOgrk3" }, +{ 924, "Mu", "greek capital letter mu, U+039C" }, +{ 925, "Nu", "greek capital letter nu, U+039D" }, +{ 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" }, +{ 927, "Omicron","greek capital letter omicron, U+039F" }, +{ 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" }, +{ 929, "Rho", "greek capital letter rho, U+03A1" }, +{ 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" }, +{ 932, "Tau", "greek capital letter tau, U+03A4" }, +{ 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" }, +{ 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" }, +{ 935, "Chi", "greek capital letter chi, U+03A7" }, +{ 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" }, +{ 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" }, + +{ 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" }, +{ 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" }, +{ 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" }, +{ 948, "delta","greek small letter delta, U+03B4 ISOgrk3" }, +{ 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" }, +{ 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" }, +{ 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" }, +{ 952, "theta","greek small letter theta, U+03B8 ISOgrk3" }, +{ 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" }, +{ 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" }, +{ 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" }, +{ 956, "mu", "greek small letter mu, U+03BC ISOgrk3" }, +{ 957, "nu", "greek small letter nu, U+03BD ISOgrk3" }, +{ 958, "xi", "greek small letter xi, U+03BE ISOgrk3" }, +{ 959, "omicron","greek small letter omicron, U+03BF NEW" }, +{ 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" }, +{ 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" }, +{ 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" }, +{ 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" }, +{ 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" }, +{ 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" }, +{ 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" }, +{ 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" }, +{ 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" }, +{ 969, "omega","greek small letter omega, U+03C9 ISOgrk3" }, +{ 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" }, +{ 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" }, +{ 982, "piv", "greek pi symbol, U+03D6 ISOgrk3" }, + +{ 8194, "ensp", "en space, U+2002 ISOpub" }, +{ 8195, "emsp", "em space, U+2003 ISOpub" }, +{ 8201, "thinsp","thin space, U+2009 ISOpub" }, +{ 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" }, +{ 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" }, +{ 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" }, +{ 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" }, +{ 8211, "ndash","en dash, U+2013 ISOpub" }, +{ 8212, "mdash","em dash, U+2014 ISOpub" }, +{ 8216, "lsquo","left single quotation mark, U+2018 ISOnum" }, +{ 8217, "rsquo","right single quotation mark, U+2019 ISOnum" }, +{ 8218, "sbquo","single low-9 quotation mark, U+201A NEW" }, +{ 8220, "ldquo","left double quotation mark, U+201C ISOnum" }, +{ 8221, "rdquo","right double quotation mark, U+201D ISOnum" }, +{ 8222, "bdquo","double low-9 quotation mark, U+201E NEW" }, +{ 8224, "dagger","dagger, U+2020 ISOpub" }, +{ 8225, "Dagger","double dagger, U+2021 ISOpub" }, + +{ 8226, "bull", "bullet = black small circle, U+2022 ISOpub" }, +{ 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" }, + +{ 8240, "permil","per mille sign, U+2030 ISOtech" }, + +{ 8242, "prime","prime = minutes = feet, U+2032 ISOtech" }, +{ 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" }, + +{ 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" }, +{ 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" }, + +{ 8254, "oline","overline = spacing overscore, U+203E NEW" }, +{ 8260, "frasl","fraction slash, U+2044 NEW" }, + +{ 8364, "euro", "euro sign, U+20AC NEW" }, + +{ 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" }, +{ 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" }, +{ 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" }, +{ 8482, "trade","trade mark sign, U+2122 ISOnum" }, +{ 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" }, +{ 8592, "larr", "leftwards arrow, U+2190 ISOnum" }, +{ 8593, "uarr", "upwards arrow, U+2191 ISOnum" }, +{ 8594, "rarr", "rightwards arrow, U+2192 ISOnum" }, +{ 8595, "darr", "downwards arrow, U+2193 ISOnum" }, +{ 8596, "harr", "left right arrow, U+2194 ISOamsa" }, +{ 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" }, +{ 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" }, +{ 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" }, +{ 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" }, +{ 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" }, +{ 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" }, + +{ 8704, "forall","for all, U+2200 ISOtech" }, +{ 8706, "part", "partial differential, U+2202 ISOtech" }, +{ 8707, "exist","there exists, U+2203 ISOtech" }, +{ 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" }, +{ 8711, "nabla","nabla = backward difference, U+2207 ISOtech" }, +{ 8712, "isin", "element of, U+2208 ISOtech" }, +{ 8713, "notin","not an element of, U+2209 ISOtech" }, +{ 8715, "ni", "contains as member, U+220B ISOtech" }, +{ 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" }, +{ 8721, "sum", "n-ary sumation, U+2211 ISOamsb" }, +{ 8722, "minus","minus sign, U+2212 ISOtech" }, +{ 8727, "lowast","asterisk operator, U+2217 ISOtech" }, +{ 8730, "radic","square root = radical sign, U+221A ISOtech" }, +{ 8733, "prop", "proportional to, U+221D ISOtech" }, +{ 8734, "infin","infinity, U+221E ISOtech" }, +{ 8736, "ang", "angle, U+2220 ISOamso" }, +{ 8743, "and", "logical and = wedge, U+2227 ISOtech" }, +{ 8744, "or", "logical or = vee, U+2228 ISOtech" }, +{ 8745, "cap", "intersection = cap, U+2229 ISOtech" }, +{ 8746, "cup", "union = cup, U+222A ISOtech" }, +{ 8747, "int", "integral, U+222B ISOtech" }, +{ 8756, "there4","therefore, U+2234 ISOtech" }, +{ 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" }, +{ 8773, "cong", "approximately equal to, U+2245 ISOtech" }, +{ 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" }, +{ 8800, "ne", "not equal to, U+2260 ISOtech" }, +{ 8801, "equiv","identical to, U+2261 ISOtech" }, +{ 8804, "le", "less-than or equal to, U+2264 ISOtech" }, +{ 8805, "ge", "greater-than or equal to, U+2265 ISOtech" }, +{ 8834, "sub", "subset of, U+2282 ISOtech" }, +{ 8835, "sup", "superset of, U+2283 ISOtech" }, +{ 8836, "nsub", "not a subset of, U+2284 ISOamsn" }, +{ 8838, "sube", "subset of or equal to, U+2286 ISOtech" }, +{ 8839, "supe", "superset of or equal to, U+2287 ISOtech" }, +{ 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" }, +{ 8855, "otimes","circled times = vector product, U+2297 ISOamsb" }, +{ 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" }, +{ 8901, "sdot", "dot operator, U+22C5 ISOamsb" }, +{ 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" }, +{ 8969, "rceil","right ceiling, U+2309 ISOamsc" }, +{ 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" }, +{ 8971, "rfloor","right floor, U+230B ISOamsc" }, +{ 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" }, +{ 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" }, +{ 9674, "loz", "lozenge, U+25CA ISOpub" }, + +{ 9824, "spades","black spade suit, U+2660 ISOpub" }, +{ 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" }, +{ 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" }, +{ 9830, "diams","black diamond suit, U+2666 ISOpub" }, + +}; + +/************************************************************************ + * * + * Commodity functions to handle entities * + * * + ************************************************************************/ + +/* + * Macro used to grow the current buffer. + */ +#define growBuffer(buffer) { \ + buffer##_size *= 2; \ + buffer = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + perror("realloc failed"); \ + return(NULL); \ + } \ +} + +/** + * htmlEntityLookup: + * @name: the entity name + * + * Lookup the given entity in EntitiesTable + * + * TODO: the linear scan is really ugly, an hash table is really needed. + * + * Returns the associated htmlEntityDescPtr if found, NULL otherwise. + */ +htmlEntityDescPtr +htmlEntityLookup(const xmlChar *name) { + int i; + + for (i = 0;i < (sizeof(html40EntitiesTable)/ + sizeof(html40EntitiesTable[0]));i++) { + if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", name); +#endif + return(&html40EntitiesTable[i]); + } + } + return(NULL); +} + +/** + * htmlEntityValueLookup: + * @value: the entity's unicode value + * + * Lookup the given entity in EntitiesTable + * + * TODO: the linear scan is really ugly, an hash table is really needed. + * + * Returns the associated htmlEntityDescPtr if found, NULL otherwise. + */ +htmlEntityDescPtr +htmlEntityValueLookup(int value) { + int i; +#ifdef DEBUG + int lv = 0; +#endif + + for (i = 0;i < (sizeof(html40EntitiesTable)/ + sizeof(html40EntitiesTable[0]));i++) { + if ((unsigned int) html40EntitiesTable[i].value >= value) { + if ((unsigned int) html40EntitiesTable[i].value > value) + break; +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Found entity %s\n", html40EntitiesTable[i].name); +#endif + return(&html40EntitiesTable[i]); + } +#ifdef DEBUG + if (lv > html40EntitiesTable[i].value) { + xmlGenericError(xmlGenericErrorContext, + "html40EntitiesTable[] is not sorted (%d > %d)!\n", + lv, html40EntitiesTable[i].value); + } + lv = html40EntitiesTable[i].value; +#endif + } + return(NULL); +} + +/** + * UTF8ToHtml: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an ASCII + * plus HTML entities block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of octets consumed. + */ +int +UTF8ToHtml(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + const unsigned char* processed = in; + const unsigned char* outend; + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend; + unsigned int c, d; + int trailing; + + if (in == NULL) { + /* + * initialization nothing to do + */ + *outlen = 0; + *inlen = 0; + return(0); + } + inend = in + (*inlen); + outend = out + (*outlen); + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) + break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x80) { + if (out + 1 >= outend) + break; + *out++ = c; + } else { + int len; + htmlEntityDescPtr ent; + + /* + * Try to lookup a predefined HTML entity for it + */ + + ent = htmlEntityValueLookup(c); + if (ent == NULL) { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + len = strlen(ent->name); + if (out + 2 + len >= outend) + break; + *out++ = '&'; + memcpy(out, ent->name, len); + out += len; + *out++ = ';'; + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + +/** + * htmlEncodeEntities: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * @quoteChar: the quote character to escape (' or ") or zero. + * + * Take a block of UTF-8 chars in and try to convert it to an ASCII + * plus HTML entities block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of octets consumed. + */ +int +htmlEncodeEntities(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen, int quoteChar) { + const unsigned char* processed = in; + const unsigned char* outend = out + (*outlen); + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend = in + (*inlen); + unsigned int c, d; + int trailing; + + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) + break; + + while (trailing--) { + if (((d= *in++) & 0xC0) != 0x80) { + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x80 && c != quoteChar && c != '&' && c != '<' && c != '>') { + if (out >= outend) + break; + *out++ = c; + } else { + htmlEntityDescPtr ent; + const char *cp; + char nbuf[16]; + int len; + + /* + * Try to lookup a predefined HTML entity for it + */ + ent = htmlEntityValueLookup(c); + if (ent == NULL) { + sprintf(nbuf, "#%u", c); + cp = nbuf; + } + else + cp = ent->name; + len = strlen(cp); + if (out + 2 + len > outend) + break; + *out++ = '&'; + memcpy(out, cp, len); + out += len; + *out++ = ';'; + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + +/** + * htmlDecodeEntities: + * @ctxt: the parser context + * @len: the len to decode (in bytes !), -1 for no size limit + * @end: an end marker xmlChar, 0 if none + * @end2: an end marker xmlChar, 0 if none + * @end3: an end marker xmlChar, 0 if none + * + * Subtitute the HTML entities by their value + * + * DEPRECATED !!!! + * + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +xmlChar * +htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, + xmlChar end, xmlChar end2, xmlChar end3) { + xmlChar *name = NULL; + xmlChar *buffer = NULL; + unsigned int buffer_size = 0; + unsigned int nbchars = 0; + htmlEntityDescPtr ent; + unsigned int max = (unsigned int) len; + int c,l; + + if (ctxt->depth > 40) { + ctxt->errNo = XML_ERR_ENTITY_LOOP; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Detected entity reference loop\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return(NULL); + } + + /* + * allocate a translation buffer. + */ + buffer_size = HTML_PARSER_BIG_BUFFER_SIZE; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + perror("xmlDecodeEntities: malloc failed"); + return(NULL); + } + + /* + * Ok loop until we reach one of the ending char or a size limit. + */ + c = CUR_CHAR(l); + while ((nbchars < max) && (c != end) && + (c != end2) && (c != end3)) { + + if (c == 0) break; + if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) { + int val = htmlParseCharRef(ctxt); + COPY_BUF(0,buffer,nbchars,val); + NEXTL(l); + } else if ((c == '&') && (ctxt->token != '&')) { + ent = htmlParseEntityRef(ctxt, &name); + if (name != NULL) { + if (ent != NULL) { + int val = ent->value; + COPY_BUF(0,buffer,nbchars,val); + NEXTL(l); + } else { + const xmlChar *cur = name; + + buffer[nbchars++] = '&'; + if (nbchars > buffer_size - HTML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + while (*cur != 0) { + buffer[nbchars++] = *cur++; + } + buffer[nbchars++] = ';'; + } + } + } else { + COPY_BUF(l,buffer,nbchars,c); + NEXTL(l); + if (nbchars > buffer_size - HTML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + c = CUR_CHAR(l); + } + buffer[nbchars++] = 0; + return(buffer); +} + +/************************************************************************ + * * + * Commodity functions to handle streams * + * * + ************************************************************************/ + +/** + * htmlFreeInputStream: + * @input: an htmlParserInputPtr + * + * Free up an input stream. + */ +void +htmlFreeInputStream(htmlParserInputPtr input) { + if (input == NULL) return; + + if (input->filename != NULL) xmlFree((char *) input->filename); + if (input->directory != NULL) xmlFree((char *) input->directory); + if ((input->free != NULL) && (input->base != NULL)) + input->free((xmlChar *) input->base); + if (input->buf != NULL) + xmlFreeParserInputBuffer(input->buf); + memset(input, -1, sizeof(htmlParserInput)); + xmlFree(input); +} + +/** + * htmlNewInputStream: + * @ctxt: an HTML parser context + * + * Create a new input stream structure + * Returns the new input stream or NULL + */ +htmlParserInputPtr +htmlNewInputStream(htmlParserCtxtPtr ctxt) { + htmlParserInputPtr input; + + input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput)); + if (input == NULL) { + ctxt->errNo = XML_ERR_NO_MEMORY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "malloc: couldn't allocate a new input stream\n"); + return(NULL); + } + memset(input, 0, sizeof(htmlParserInput)); + input->filename = NULL; + input->directory = NULL; + input->base = NULL; + input->cur = NULL; + input->buf = NULL; + input->line = 1; + input->col = 1; + input->buf = NULL; + input->free = NULL; + input->version = NULL; + input->consumed = 0; + input->length = 0; + return(input); +} + + +/************************************************************************ + * * + * Commodity functions, cleanup needed ? * + * * + ************************************************************************/ + +/** + * areBlanks: + * @ctxt: an HTML parser context + * @str: a xmlChar * + * @len: the size of @str + * + * Is this a sequence of blank chars that one can ignore ? + * + * Returns 1 if ignorable 0 otherwise. + */ + +static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) { + int i; + xmlNodePtr lastChild; + + for (i = 0;i < len;i++) + if (!(IS_BLANK(str[i]))) return(0); + + if (CUR == 0) return(1); + if (CUR != '<') return(0); + if (ctxt->name == NULL) + return(1); + if (xmlStrEqual(ctxt->name, BAD_CAST"html")) + return(1); + if (xmlStrEqual(ctxt->name, BAD_CAST"head")) + return(1); + if (xmlStrEqual(ctxt->name, BAD_CAST"body")) + return(1); + if (ctxt->node == NULL) return(0); + lastChild = xmlGetLastChild(ctxt->node); + if (lastChild == NULL) { + if (ctxt->node->content != NULL) return(0); + } else if (xmlNodeIsText(lastChild)) { + return(0); + } else if (xmlStrEqual(lastChild->name, BAD_CAST"b")) { + return(0); + } else if (xmlStrEqual(lastChild->name, BAD_CAST"bold")) { + return(0); + } else if (xmlStrEqual(lastChild->name, BAD_CAST"em")) { + return(0); + } + return(1); +} + +/** + * htmlHandleEntity: + * @ctxt: an HTML parser context + * @entity: an XML entity pointer. + * + * Default handling of an HTML entity, call the parser with the + * substitution string + */ + +void +htmlHandleEntity(htmlParserCtxtPtr ctxt, xmlEntityPtr entity) { + int len; + + if (entity->content == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "htmlHandleEntity %s: content == NULL\n", + entity->name); + ctxt->wellFormed = 0; + return; + } + len = xmlStrlen(entity->content); + + /* + * Just handle the content as a set of chars. + */ + htmlCheckParagraph(ctxt); + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, entity->content, len); + +} + +/** + * htmlNewDocNoDtD: + * @URI: URI for the dtd, or NULL + * @ExternalID: the external ID of the DTD, or NULL + * + * Returns a new document, do not intialize the DTD if not provided + */ +htmlDocPtr +htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) { + xmlDocPtr cur; + + /* + * Allocate a new document and fill the fields. + */ + cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewDoc : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlDoc)); + + cur->type = XML_HTML_DOCUMENT_NODE; + cur->version = NULL; + cur->intSubset = NULL; + if ((ExternalID != NULL) || + (URI != NULL)) + xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI); + cur->doc = cur; + cur->name = NULL; + cur->children = NULL; + cur->extSubset = NULL; + cur->oldNs = NULL; + cur->encoding = NULL; + cur->standalone = 1; + cur->compression = 0; + cur->ids = NULL; + cur->refs = NULL; +#ifndef XML_WITHOUT_CORBA + cur->_private = NULL; +#endif + return(cur); +} + +/** + * htmlNewDoc: + * @URI: URI for the dtd, or NULL + * @ExternalID: the external ID of the DTD, or NULL + * + * Returns a new document + */ +htmlDocPtr +htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) { + if ((URI == NULL) && (ExternalID == NULL)) + return(htmlNewDocNoDtD( + BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", + BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd")); + + return(htmlNewDocNoDtD(URI, ExternalID)); +} + + +/************************************************************************ + * * + * The parser itself * + * Relates to http://www.w3.org/TR/html40 * + * * + ************************************************************************/ + +/************************************************************************ + * * + * The parser itself * + * * + ************************************************************************/ + +/** + * htmlParseHTMLName: + * @ctxt: an HTML parser context + * + * parse an HTML tag or attribute name, note that we convert it to lowercase + * since HTML names are not case-sensitive. + * + * Returns the Tag Name parsed or NULL + */ + +xmlChar * +htmlParseHTMLName(htmlParserCtxtPtr ctxt) { + xmlChar *ret = NULL; + int i = 0; + xmlChar loc[HTML_PARSER_BUFFER_SIZE]; + + if (!IS_LETTER(CUR) && (CUR != '_') && + (CUR != ':')) return(NULL); + + while ((i < HTML_PARSER_BUFFER_SIZE) && + ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == ':') || (CUR == '-') || (CUR == '_'))) { + if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20; + else loc[i] = CUR; + i++; + + NEXT; + } + + ret = xmlStrndup(loc, i); + + return(ret); +} + +/** + * htmlParseName: + * @ctxt: an HTML parser context + * + * parse an HTML name, this routine is case sensistive. + * + * Returns the Name parsed or NULL + */ + +xmlChar * +htmlParseName(htmlParserCtxtPtr ctxt) { + xmlChar buf[HTML_MAX_NAMELEN]; + int len = 0; + + GROW; + if (!IS_LETTER(CUR) && (CUR != '_')) { + return(NULL); + } + + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) { + buf[len++] = CUR; + NEXT; + if (len >= HTML_MAX_NAMELEN) { + xmlGenericError(xmlGenericErrorContext, + "htmlParseName: reached HTML_MAX_NAMELEN limit\n"); + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + break; + } + } + return(xmlStrndup(buf, len)); +} + +/** + * htmlParseHTMLAttribute: + * @ctxt: an HTML parser context + * @stop: a char stop value + * + * parse an HTML attribute value till the stop (quote), if + * stop is 0 then it stops at the first space + * + * Returns the attribute parsed or NULL + */ + +xmlChar * +htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { + xmlChar *buffer = NULL; + int buffer_size = 0; + xmlChar *out = NULL; + xmlChar *name = NULL; + + xmlChar *cur = NULL; + htmlEntityDescPtr ent; + + /* + * allocate a translation buffer. + */ + buffer_size = HTML_PARSER_BUFFER_SIZE; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + perror("htmlParseHTMLAttribute: malloc failed"); + return(NULL); + } + out = buffer; + + /* + * Ok loop until we reach one of the ending chars + */ + while ((CUR != 0) && (CUR != stop) && (CUR != '>')) { + if ((stop == 0) && (IS_BLANK(CUR))) break; + if (CUR == '&') { + if (NXT(1) == '#') { + unsigned int c; + int bits; + + c = htmlParseCharRef(ctxt); + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + } else { + ent = htmlParseEntityRef(ctxt, &name); + if (name == NULL) { + *out++ = '&'; + if (out - buffer > buffer_size - 100) { + int index = out - buffer; + + growBuffer(buffer); + out = &buffer[index]; + } + } else if (ent == NULL) { + *out++ = '&'; + cur = name; + while (*cur != 0) { + if (out - buffer > buffer_size - 100) { + int index = out - buffer; + + growBuffer(buffer); + out = &buffer[index]; + } + *out++ = *cur++; + } + xmlFree(name); + } else { + unsigned int c; + int bits; + + if (out - buffer > buffer_size - 100) { + int index = out - buffer; + + growBuffer(buffer); + out = &buffer[index]; + } + c = (xmlChar)ent->value; + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + xmlFree(name); + } + } + } else { + unsigned int c; + int bits, l; + + if (out - buffer > buffer_size - 100) { + int index = out - buffer; + + growBuffer(buffer); + out = &buffer[index]; + } + c = CUR_CHAR(l); + if (c < 0x80) + { *out++ = c; bits= -6; } + else if (c < 0x800) + { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) + { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else + { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + *out++ = ((c >> bits) & 0x3F) | 0x80; + } + NEXT; + } + } + *out++ = 0; + return(buffer); +} + +/** + * htmlParseNmtoken: + * @ctxt: an HTML parser context + * + * parse an HTML Nmtoken. + * + * Returns the Nmtoken parsed or NULL + */ + +xmlChar * +htmlParseNmtoken(htmlParserCtxtPtr ctxt) { + xmlChar buf[HTML_MAX_NAMELEN]; + int len = 0; + + GROW; + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) { + buf[len++] = CUR; + NEXT; + if (len >= HTML_MAX_NAMELEN) { + xmlGenericError(xmlGenericErrorContext, + "htmlParseNmtoken: reached HTML_MAX_NAMELEN limit\n"); + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + break; + } + } + return(xmlStrndup(buf, len)); +} + +/** + * htmlParseEntityRef: + * @ctxt: an HTML parser context + * @str: location to store the entity name + * + * parse an HTML ENTITY references + * + * [68] EntityRef ::= '&' Name ';' + * + * Returns the associated htmlEntityDescPtr if found, or NULL otherwise, + * if non-NULL *str will have to be freed by the caller. + */ +htmlEntityDescPtr +htmlParseEntityRef(htmlParserCtxtPtr ctxt, xmlChar **str) { + xmlChar *name; + htmlEntityDescPtr ent = NULL; + *str = NULL; + + if (CUR == '&') { + NEXT; + name = htmlParseName(ctxt); + if (name == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "htmlParseEntityRef: no name\n"); + ctxt->wellFormed = 0; + } else { + GROW; + if (CUR == ';') { + *str = name; + + /* + * Lookup the entity in the table. + */ + ent = htmlEntityLookup(name); + if (ent != NULL) /* OK that's ugly !!! */ + NEXT; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseEntityRef: expecting ';'\n"); + *str = name; + } + } + } + return(ent); +} + +/** + * htmlParseAttValue: + * @ctxt: an HTML parser context + * + * parse a value for an attribute + * Note: the parser won't do substitution of entities here, this + * will be handled later in xmlStringGetNodeList, unless it was + * asked for ctxt->replaceEntities != 0 + * + * Returns the AttValue parsed or NULL. + */ + +xmlChar * +htmlParseAttValue(htmlParserCtxtPtr ctxt) { + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + ret = htmlParseHTMLAttribute(ctxt, '"'); + if (CUR != '"') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); + ctxt->wellFormed = 0; + } else + NEXT; + } else if (CUR == '\'') { + NEXT; + ret = htmlParseHTMLAttribute(ctxt, '\''); + if (CUR != '\'') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); + ctxt->wellFormed = 0; + } else + NEXT; + } else { + /* + * That's an HTMLism, the attribute value may not be quoted + */ + ret = htmlParseHTMLAttribute(ctxt, 0); + if (ret == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: no value found\n"); + ctxt->wellFormed = 0; + } + } + return(ret); +} + +/** + * htmlParseSystemLiteral: + * @ctxt: an HTML parser context + * + * parse an HTML Literal + * + * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") + * + * Returns the SystemLiteral parsed or NULL + */ + +xmlChar * +htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) + NEXT; + if (!IS_CHAR(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_CHAR(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + } + + return(ret); +} + +/** + * htmlParsePubidLiteral: + * @ctxt: an HTML parser context + * + * parse an HTML public literal + * + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + * + * Returns the PubidLiteral parsed or NULL. + */ + +xmlChar * +htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + /* + * Name ::= (Letter | '_') (NameChar)* + */ + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while (IS_PUBIDCHAR(CUR)) NEXT; + if (CUR != '"') { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_LETTER(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_LETTER(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + } + + return(ret); +} + +/** + * htmlParseScript: + * @ctxt: an HTML parser context + * + * parse the content of an HTML SCRIPT or STYLE element + * http://www.w3.org/TR/html4/sgml/dtd.html#Script + * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet + * http://www.w3.org/TR/html4/types.html#type-script + * http://www.w3.org/TR/html4/types.html#h-6.15 + * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1 + * + * Script data ( %Script; in the DTD) can be the content of the SCRIPT + * element and the value of intrinsic event attributes. User agents must + * not evaluate script data as HTML markup but instead must pass it on as + * data to a script engine. + * NOTES: + * - The content is passed like CDATA + * - the attributes for style and scripting "onXXX" are also described + * as CDATA but SGML allows entities references in attributes so their + * processing is identical as other attributes + */ +void +htmlParseScript(htmlParserCtxtPtr ctxt) { + xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 1]; + int nbchar = 0; + xmlChar cur; + + SHRINK; + cur = CUR; + while (IS_CHAR(cur)) { + if ((cur == '<') && (NXT(1) == '/')) { + /* + * One should break here, the specification is clear: + * Authors should therefore escape "= 'A') && (NXT(2) <= 'Z')) || + ((NXT(2) >= 'a') && (NXT(2) <= 'z'))) + break; /* while */ + } + buf[nbchar++] = cur; + if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { + if (ctxt->sax->cdataBlock!= NULL) { + /* + * Insert as CDATA, which is the same as HTML_PRESERVE_NODE + */ + ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); + } + nbchar = 0; + } + NEXT; + cur = CUR; + } + if (!(IS_CHAR(cur))) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Invalid char in CDATA 0x%X\n", cur); + ctxt->wellFormed = 0; + NEXT; + } + + if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (ctxt->sax->cdataBlock!= NULL) { + /* + * Insert as CDATA, which is the same as HTML_PRESERVE_NODE + */ + ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); + } + } +} + + +/** + * htmlParseCharData: + * @ctxt: an HTML parser context + * @cdata: int indicating whether we are within a CDATA section + * + * parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +void +htmlParseCharData(htmlParserCtxtPtr ctxt, int cdata) { + xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; + int nbchar = 0; + int cur, l; + + SHRINK; + cur = CUR_CHAR(l); + while (((cur != '<') || (ctxt->token == '<')) && + ((cur != '&') || (ctxt->token == '&')) && + (IS_CHAR(cur))) { + COPY_BUF(l,buf,nbchar,cur); + if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { + /* + * Ok the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + buf, nbchar); + } else { + htmlCheckParagraph(ctxt); + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + nbchar = 0; + } + NEXTL(l); + cur = CUR_CHAR(l); + } + if (nbchar != 0) { + /* + * Ok the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); + } else { + htmlCheckParagraph(ctxt); + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + } +} + +/** + * htmlParseExternalID: + * @ctxt: an HTML parser context + * @publicID: a xmlChar** receiving PubidLiteral + * @strict: indicate whether we should restrict parsing to only + * production [75], see NOTE below + * + * Parse an External ID or a Public ID + * + * NOTE: Productions [75] and [83] interract badly since [75] can generate + * 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [75] ExternalID ::= 'SYSTEM' S SystemLiteral + * | 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [83] PublicID ::= 'PUBLIC' S PubidLiteral + * + * Returns the function returns SystemLiteral and in the second + * case publicID receives PubidLiteral, is strict is off + * it is possible to return NULL and have publicID set. + */ + +xmlChar * +htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { + xmlChar *URI = NULL; + + if ((UPPER == 'S') && (UPP(1) == 'Y') && + (UPP(2) == 'S') && (UPP(3) == 'T') && + (UPP(4) == 'E') && (UPP(5) == 'M')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'SYSTEM'\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + URI = htmlParseSystemLiteral(ctxt); + if (URI == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseExternalID: SYSTEM, no URI\n"); + ctxt->wellFormed = 0; + } + } else if ((UPPER == 'P') && (UPP(1) == 'U') && + (UPP(2) == 'B') && (UPP(3) == 'L') && + (UPP(4) == 'I') && (UPP(5) == 'C')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'PUBLIC'\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + *publicID = htmlParsePubidLiteral(ctxt); + if (*publicID == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "htmlParseExternalID: PUBLIC, no Public Identifier\n"); + ctxt->wellFormed = 0; + } + SKIP_BLANKS; + if ((CUR == '"') || (CUR == '\'')) { + URI = htmlParseSystemLiteral(ctxt); + } + } + return(URI); +} + +/** + * htmlParseComment: + * @ctxt: an HTML parser context + * + * Parse an XML (SGML) comment + * + * [15] Comment ::= '' + */ +void +htmlParseComment(htmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len; + int size = HTML_PARSER_BUFFER_SIZE; + int q, ql; + int r, rl; + int cur, l; + xmlParserInputState state; + + /* + * Check that there is a comment right here. + */ + if ((RAW != '<') || (NXT(1) != '!') || + (NXT(2) != '-') || (NXT(3) != '-')) return; + + state = ctxt->instate; + ctxt->instate = XML_PARSER_COMMENT; + SHRINK; + SKIP(4); + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + q = CUR_CHAR(ql); + NEXTL(ql); + r = CUR_CHAR(rl); + NEXTL(rl); + cur = CUR_CHAR(l); + len = 0; + while (IS_CHAR(cur) && + ((cur != '>') || + (r != '-') || (q != '-'))) { + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + } + COPY_BUF(ql,buf,len,q); + q = r; + ql = rl; + r = cur; + rl = l; + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (!IS_CHAR(cur)) { + ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Comment not terminated \n"); + } + return; + } + if (cur->type == HTML_ENTITY_REF_NODE) { + xmlBufferWriteChar(buf, "&"); + xmlBufferWriteCHAR(buf, cur->name); + xmlBufferWriteChar(buf, ";"); + return; + } + + /* + * Get specific HTmL info for taht node. + */ + info = htmlTagLookup(cur->name); + + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, cur->name); + if (cur->properties != NULL) + htmlAttrListDump(buf, doc, cur->properties); + + if ((info != NULL) && (info->empty)) { + xmlBufferWriteChar(buf, ">"); + if (cur->next != NULL) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE)) + xmlBufferWriteChar(buf, "\n"); + } + return; + } + if ((cur->content == NULL) && (cur->children == NULL)) { + if ((info != NULL) && (info->endTag != 0)) + xmlBufferWriteChar(buf, ">"); + else { + xmlBufferWriteChar(buf, ">name); + xmlBufferWriteChar(buf, ">"); + } + if (cur->next != NULL) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE)) + xmlBufferWriteChar(buf, "\n"); + } + return; + } + xmlBufferWriteChar(buf, ">"); + if (cur->content != NULL) { + xmlChar *buffer; + +#ifndef XML_USE_BUFFER_CONTENT + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); +#else + buffer = xmlEncodeEntitiesReentrant(doc, + xmlBufferContent(cur->content)); +#endif + if (buffer != NULL) { + xmlBufferWriteCHAR(buf, buffer); + xmlFree(buffer); + } + } + if (cur->children != NULL) { + if ((cur->children->type != HTML_TEXT_NODE) && + (cur->children->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last)) + xmlBufferWriteChar(buf, "\n"); + htmlNodeListDump(buf, doc, cur->children); + if ((cur->last->type != HTML_TEXT_NODE) && + (cur->last->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last)) + xmlBufferWriteChar(buf, "\n"); + } + if (!htmlIsAutoClosed(doc, cur)) { + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, ">"); + } +#if 0 + if (!htmlIsAutoClosed(doc, cur)) { + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, ">"); + } +#else + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, ">"); +#endif + if (cur->next != NULL) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE)) + xmlBufferWriteChar(buf, "\n"); + } +} + +/** + * htmlNodeDumpFile: + * @out: the FILE pointer + * @doc: the document + * @cur: the current node + * + * Dump an HTML node, recursive behaviour,children are printed too. + */ +void +htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { + xmlBufferPtr buf; + + buf = xmlBufferCreate(); + if (buf == NULL) return; + htmlNodeDump(buf, doc, cur); + xmlBufferDump(out, buf); + xmlBufferFree(buf); +} + +/** + * htmlDocContentDump: + * @buf: the HTML buffer output + * @cur: the document + * + * Dump an HTML document. + */ +static void +htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) { + int type; + + /* + * force to output the stuff as HTML, especially for entities + */ + type = cur->type; + cur->type = XML_HTML_DOCUMENT_NODE; + if (cur->intSubset != NULL) + htmlDtdDump(buf, cur); + else { + /* Default to HTML-4.0 transitionnal @@@@ */ + xmlBufferWriteChar(buf, ""); + + } + if (cur->children != NULL) { + htmlNodeListDump(buf, cur, cur->children); + } + xmlBufferWriteChar(buf, "\n"); + cur->type = (xmlElementType) type; +} + +/** + * htmlDocDumpMemory: + * @cur: the document + * @mem: OUT: the memory pointer + * @size: OUT: the memory lenght + * + * Dump an HTML document in memory and return the xmlChar * and it's size. + * It's up to the caller to free the memory. + */ +void +htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { + xmlBufferPtr buf; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "htmlxmlDocDumpMemory : document == NULL\n"); +#endif + *mem = NULL; + *size = 0; + return; + } + buf = xmlBufferCreate(); + if (buf == NULL) { + *mem = NULL; + *size = 0; + return; + } + htmlDocContentDump(buf, cur); + *mem = buf->content; + *size = buf->use; + memset(buf, -1, sizeof(xmlBuffer)); + xmlFree(buf); +} + + +/************************************************************************ + * * + * Dumping HTML tree content to an I/O output buffer * + * * + ************************************************************************/ + +/** + * htmlDtdDump: + * @buf: the HTML buffer output + * @doc: the document + * @encoding: the encoding string + * + * Dump the HTML document DTD, if any. + */ +static void +htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) { + xmlDtdPtr cur = doc->intSubset; + + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlDtdDump : no internal subset\n"); + return; + } + xmlOutputBufferWriteString(buf, "name); + if (cur->ExternalID != NULL) { + xmlOutputBufferWriteString(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); + if (cur->SystemID != NULL) { + xmlOutputBufferWriteString(buf, " "); + xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); + } + } else if (cur->SystemID != NULL) { + xmlOutputBufferWriteString(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); + } + xmlOutputBufferWriteString(buf, ">\n"); +} + +/** + * htmlAttrDump: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the attribute pointer + * @encoding: the encoding string + * + * Dump an HTML attribute + */ +static void +htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { + xmlChar *value; + + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlAttrDump : property == NULL\n"); + return; + } + xmlOutputBufferWriteString(buf, " "); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->children != NULL) { + value = xmlNodeListGetString(doc, cur->children, 0); + if (value) { + xmlOutputBufferWriteString(buf, "="); + xmlBufferWriteQuotedString(buf->buffer, value); + xmlFree(value); + } else { + xmlOutputBufferWriteString(buf, "=\"\""); + } + } +} + +/** + * htmlAttrListDump: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the first attribute pointer + * @encoding: the encoding string + * + * Dump a list of HTML attributes + */ +static void +htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlAttrListDump : property == NULL\n"); + return; + } + while (cur != NULL) { + htmlAttrDumpOutput(buf, doc, cur, encoding); + cur = cur->next; + } +} + + +void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding); + +/** + * htmlNodeListDump: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the first node + * @encoding: the encoding string + * + * Dump an HTML node list, recursive behaviour,children are printed too. + */ +static void +htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) { + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlNodeListDump : node == NULL\n"); + return; + } + while (cur != NULL) { + htmlNodeDumpOutput(buf, doc, cur, encoding); + cur = cur->next; + } +} + +/** + * htmlNodeDumpOutput: + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node + * @encoding: the encoding string + * + * Dump an HTML node, recursive behaviour,children are printed too. + */ +void +htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) { + htmlElemDescPtr info; + + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlNodeDump : node == NULL\n"); + return; + } + /* + * Special cases. + */ + if (cur->type == XML_DTD_NODE) + return; + if (cur->type == XML_HTML_DOCUMENT_NODE) { + htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); + return; + } + if (cur->type == HTML_TEXT_NODE) { + if (cur->content != NULL) { + if ((cur->name == xmlStringText) || + (cur->name != xmlStringTextNoenc)) { + xmlChar *buffer; + +#ifndef XML_USE_BUFFER_CONTENT + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); +#else + buffer = xmlEncodeEntitiesReentrant(doc, + xmlBufferContent(cur->content)); +#endif + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } else { + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + } + return; + } + if (cur->type == HTML_COMMENT_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, ""); + } + return; + } + if (cur->type == HTML_ENTITY_REF_NODE) { + xmlOutputBufferWriteString(buf, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ";"); + return; + } + if (cur->type == HTML_PRESERVE_NODE) { + if (cur->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + xmlOutputBufferWriteString(buf, (const char *)cur->content); +#else + xmlOutputBufferWriteString(buf, (const char *) + xmlBufferContent(cur->content)); +#endif + } + return; + } + + /* + * Get specific HTmL info for taht node. + */ + info = htmlTagLookup(cur->name); + + xmlOutputBufferWriteString(buf, "<"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->properties != NULL) + htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); + + if ((info != NULL) && (info->empty)) { + xmlOutputBufferWriteString(buf, ">"); + if (cur->next != NULL) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE)) + xmlOutputBufferWriteString(buf, "\n"); + } + return; + } + if ((cur->content == NULL) && (cur->children == NULL)) { + if ((info != NULL) && (info->saveEndTag != 0) && + (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) { + xmlOutputBufferWriteString(buf, ">"); + } else { + xmlOutputBufferWriteString(buf, ">name); + xmlOutputBufferWriteString(buf, ">"); + } + if (cur->next != NULL) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE)) + xmlOutputBufferWriteString(buf, "\n"); + } + return; + } + xmlOutputBufferWriteString(buf, ">"); + if (cur->content != NULL) { + /* + * Uses the OutputBuffer property to automatically convert + * invalids to charrefs + */ + +#ifndef XML_USE_BUFFER_CONTENT + xmlOutputBufferWriteString(buf, (const char *) cur->content); +#else + xmlOutputBufferWriteString(buf, + (const char *) xmlBufferContent(cur->content)); +#endif + } + if (cur->children != NULL) { + if ((cur->children->type != HTML_TEXT_NODE) && + (cur->children->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last)) + xmlOutputBufferWriteString(buf, "\n"); + htmlNodeListDumpOutput(buf, doc, cur->children, encoding); + if ((cur->last->type != HTML_TEXT_NODE) && + (cur->last->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last)) + xmlOutputBufferWriteString(buf, "\n"); + } +#if 0 + if (!htmlIsAutoClosed(doc, cur)) { + xmlOutputBufferWriteString(buf, "name); + xmlOutputBufferWriteString(buf, ">"); + } +#else + xmlOutputBufferWriteString(buf, "name); + xmlOutputBufferWriteString(buf, ">"); +#endif + if (cur->next != NULL) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE)) + xmlOutputBufferWriteString(buf, "\n"); + } +} + +/** + * htmlDocContentDump: + * @buf: the HTML buffer output + * @cur: the document + * @encoding: the encoding string + * + * Dump an HTML document. + */ +void +htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) { + int type; + + /* + * force to output the stuff as HTML, especially for entities + */ + type = cur->type; + cur->type = XML_HTML_DOCUMENT_NODE; + if (cur->intSubset != NULL) + htmlDtdDumpOutput(buf, cur, NULL); + else { + /* Default to HTML-4.0 transitionnal @@@@ */ + xmlOutputBufferWriteString(buf, "\n"); + + } + if (cur->children != NULL) { + htmlNodeListDumpOutput(buf, cur, cur->children, encoding); + } + xmlOutputBufferWriteString(buf, "\n"); + cur->type = (xmlElementType) type; +} + + +/************************************************************************ + * * + * Saving functions front-ends * + * * + ************************************************************************/ + +/** + * htmlDocDump: + * @f: the FILE* + * @cur: the document + * + * Dump an HTML document to an open FILE. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlDocDump(FILE *f, xmlDocPtr cur) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + const char *encoding; + int ret; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "htmlDocDump : document == NULL\n"); +#endif + return(-1); + } + + encoding = (const char *) htmlGetMetaEncoding(cur); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != cur->charset) { + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + /* + * Not supported yet + */ + return(-1); + } + + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + return(-1); + } + } + + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + + buf = xmlOutputBufferCreateFile(f, handler); + if (buf == NULL) return(-1); + htmlDocContentDumpOutput(buf, cur, NULL); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlSaveFile: + * @filename: the filename (or URL) + * @cur: the document + * + * Dump an HTML document to a file. If @filename is "-" the stdout file is + * used. + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlSaveFile(const char *filename, xmlDocPtr cur) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + const char *encoding; + int ret; + + encoding = (const char *) htmlGetMetaEncoding(cur); + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != cur->charset) { + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + /* + * Not supported yet + */ + return(-1); + } + + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + return(-1); + } + } + + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); + if (buf == NULL) return(0); + + htmlDocContentDumpOutput(buf, cur, NULL); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * htmlSaveFileEnc: + * @filename: the filename + * @cur: the document + * + * Dump an HTML document to a file using a given encoding. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (enc != cur->charset) { + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + /* + * Not supported yet + */ + return(-1); + } + + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) + return(-1); + htmlSetMetaEncoding(cur, (const xmlChar *) encoding); + } + } + + /* + * Fallback to HTML or ASCII when the encoding is unspecified + */ + if (handler == NULL) + handler = xmlFindCharEncodingHandler("HTML"); + if (handler == NULL) + handler = xmlFindCharEncodingHandler("ascii"); + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFilename(filename, handler, 0); + if (buf == NULL) return(0); + + htmlDocContentDumpOutput(buf, cur, encoding); + + ret = xmlOutputBufferClose(buf); + return(ret); +} +#endif /* LIBXML_HTML_ENABLED */ diff --git a/HTMLtree.h b/HTMLtree.h new file mode 100644 index 00000000..543d693d --- /dev/null +++ b/HTMLtree.h @@ -0,0 +1,61 @@ +/* + * tree.h : describes the structures found in an tree resulting + * from an XML parsing. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __HTML_TREE_H__ +#define __HTML_TREE_H__ + +#include +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +#define HTML_TEXT_NODE XML_TEXT_NODE +#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE +#define HTML_COMMENT_NODE XML_COMMENT_NODE +#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE + +htmlDocPtr htmlNewDoc (const xmlChar *URI, + const xmlChar *ExternalID); +htmlDocPtr htmlNewDocNoDtD (const xmlChar *URI, + const xmlChar *ExternalID); +const xmlChar * htmlGetMetaEncoding (htmlDocPtr doc); +int htmlSetMetaEncoding (htmlDocPtr doc, + const xmlChar *encoding); +void htmlDocDumpMemory (xmlDocPtr cur, + xmlChar**mem, + int *size); +int htmlDocDump (FILE *f, + xmlDocPtr cur); +int htmlSaveFile (const char *filename, + xmlDocPtr cur); +void htmlNodeDump (xmlBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur); +void htmlNodeDumpFile (FILE *out, + xmlDocPtr doc, + xmlNodePtr cur); +int htmlSaveFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding); + +/* This one is imported from xmlIO.h +void htmlDocContentDumpOutput(xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding); + */ +#ifdef __cplusplus +} +#endif + +#endif /* __HTML_TREE_H__ */ + diff --git a/Makefile.am b/Makefile.am index 34197ae3..a6829714 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,45 +1,87 @@ ## Process this file with automake to produce Makefile.in -SUBDIRS = libxml doc example +SUBDIRS = . include doc example -INCLUDES = $(CORBA_CFLAGS) +INCLUDES = -I@srcdir@/include -I./include @Z_CFLAGS@ @CORBA_CFLAGS@ noinst_PROGRAMS=testSAX testHTML testXPath testURI -DEPS = $(top_builddir)/libxml/libxml2.la -LDADDS = $(top_builddir)/libxml/libxml2.la - bin_PROGRAMS = xmllint -xmllint_SOURCES = xmllint.c +bin_SCRIPTS=xml2-config + +lib_LTLIBRARIES = libxml2.la +libxml2_la_LIBADD = @Z_LIBS@ + +libxml2_la_LDFLAGS = -version-info @LIBXML_VERSION_INFO@ + +libxml2_la_SOURCES = \ + SAX.c \ + entities.c \ + encoding.c \ + error.c \ + parserInternals.c \ + parser.c \ + tree.c \ + hash.c \ + list.c \ + xmlIO.c \ + xmlmemory.c \ + uri.c \ + valid.c \ + xlink.c \ + HTMLparser.c \ + HTMLtree.c \ + debugXML.c \ + xpath.c \ + xpointer.c \ + xinclude.c \ + nanohttp.c \ + nanoftp.c + +DEPS = $(top_builddir)/libxml2.la +LDADDS = $(top_builddir)/libxml2.la @Z_LIBS@ @M_LIBS@ + +man_MANS = xmllint.1 xml2-config.1 libxml.4 + +m4datadir = $(datadir)/aclocal +m4data_DATA = libxml.m4 + +xmllint_SOURCES=xmllint.c xmllint_LDFLAGS = xmllint_DEPENDENCIES = $(DEPS) -xmllint_LDADD = $(RDL_LIBS) $(LDADDS) +xmllint_LDADD= @RDL_LIBS@ $(LDADDS) -testSAX_SOURCES = testSAX.c +testSAX_SOURCES=testSAX.c testSAX_LDFLAGS = testSAX_DEPENDENCIES = $(DEPS) -testSAX_LDADD = $(LDADDS) +testSAX_LDADD= $(LDADDS) -testHTML_SOURCES = testHTML.c +testHTML_SOURCES=testHTML.c testHTML_LDFLAGS = testHTML_DEPENDENCIES = $(DEPS) -testHTML_LDADD = $(LDADDS) +testHTML_LDADD= $(LDADDS) -testXPath_SOURCES = testXPath.c +testXPath_SOURCES=testXPath.c testXPath_LDFLAGS = testXPath_DEPENDENCIES = $(DEPS) -testXPath_LDADD = $(LDADDS) +testXPath_LDADD= $(LDADDS) -testURI_SOURCES = testURI.c +testURI_SOURCES=testURI.c testURI_LDFLAGS = testURI_DEPENDENCIES = $(DEPS) -testURI_LDADD = $(LDADDS) +testURI_LDADD= $(LDADDS) check-local: tests +$(srcdir)/libxml: + -$(RM) $(srcdir)/libxml + ln -s $(srcdir)/. $(srcdir)/libxml + install-data: $(srcdir)/libxml +$(libxml2_la_SOURCES): $(srcdir)/libxml + testall : tests SVGtests SAXtests tests: XMLtests XMLenttests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests @@ -363,3 +405,34 @@ cleantar: rpm: cleantar distcheck rpm -ta $(distdir).tar.gz + +## We create xml2Conf.sh here and not from configure because we want +## to get the paths expanded correctly. Macros like srcdir are given +## the value NONE in configure if the user doesn't specify them (this +## is an autoconf feature, not a bug). + +confexecdir=$(libdir) +confexec_DATA = xml2Conf.sh + +CLEANFILES=xml2Conf.sh + +confexecdir=$(libdir) +confexec_DATA = xml2Conf.sh +EXTRA_DIST = xml2Conf.sh.in libxml.spec.in libxml.spec libxml.m4 \ + example/Makefile.am example/gjobread.c example/gjobs.xml \ + $(man_MANS) libxml-2.0.pc.in xmlversion.h.in \ + win32/README.MSDev win32/Makefile.mingw \ + win32/libxml2/libxml2.dsp win32/libxml2/libxml2_so.dsp \ + win32/libxml2/libxml2_a.dsp vms/build_libxml.com vms/config.vms + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = libxml-2.0.pc + +#xml2Conf.sh: xml2Conf.sh.in Makefile +### Use sed and then mv to avoid problems if the user interrupts. +# sed -e 's?\@XML_LIBDIR\@?$(XML_LIBDIR)?g' \ +# -e 's?\@XML_INCLUDEDIR\@?$(XML_INCLUDEDIR)?g' \ +# -e 's?\@XML_LIBS\@?$(XML_LIBS)?g' \ +# -e 's?\@VERSION\@?$(VERSION)?g' \ +# < $(srcdir)/xml2Conf.sh.in > xml2Conf.tmp \ +# && mv xml2Conf.tmp xml2Conf.sh diff --git a/SAX.c b/SAX.c new file mode 100644 index 00000000..d7e256b1 --- /dev/null +++ b/SAX.c @@ -0,0 +1,1751 @@ +/* + * SAX.c : Default SAX handler to build a tree. + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* #define DEBUG_SAX */ +/* #define DEBUG_SAX_TREE */ + +/** + * getPublicId: + * @ctx: the user data (XML parser context) + * + * Return the public ID e.g. "-//SGMLSOURCE//DTD DEMO//EN" + * + * Returns a xmlChar * + */ +const xmlChar * +getPublicId(void *ctx) +{ + /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ + return(NULL); +} + +/** + * getSystemId: + * @ctx: the user data (XML parser context) + * + * Return the system ID, basically URL or filename e.g. + * http://www.sgmlsource.com/dtds/memo.dtd + * + * Returns a xmlChar * + */ +const xmlChar * +getSystemId(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(BAD_CAST ctxt->input->filename); +} + +/** + * getLineNumber: + * @ctx: the user data (XML parser context) + * + * Return the line number of the current parsing point. + * + * Returns an int + */ +int +getLineNumber(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->input->line); +} + +/** + * getColumnNumber: + * @ctx: the user data (XML parser context) + * + * Return the column number of the current parsing point. + * + * Returns an int + */ +int +getColumnNumber(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->input->col); +} + +/* + * The default SAX Locator. + */ + +xmlSAXLocator xmlDefaultSAXLocator = { + getPublicId, getSystemId, getLineNumber, getColumnNumber +}; + +/** + * isStandalone: + * @ctx: the user data (XML parser context) + * + * Is this document tagged standalone ? + * + * Returns 1 if true + */ +int +isStandalone(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->myDoc->standalone == 1); +} + +/** + * hasInternalSubset: + * @ctx: the user data (XML parser context) + * + * Does this document has an internal subset + * + * Returns 1 if true + */ +int +hasInternalSubset(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->myDoc->intSubset != NULL); +} + +/** + * hasExternalSubset: + * @ctx: the user data (XML parser context) + * + * Does this document has an external subset + * + * Returns 1 if true + */ +int +hasExternalSubset(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + return(ctxt->myDoc->extSubset != NULL); +} + +/** + * internalSubset: + * @ctx: the user data (XML parser context) + * @name: the root element name + * @ExternalID: the external ID + * @SystemID: the SYSTEM ID (e.g. filename or URL) + * + * Callback on internal subset declaration. + */ +void +internalSubset(void *ctx, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlDtdPtr dtd; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.internalSubset(%s, %s, %s)\n", + name, ExternalID, SystemID); +#endif + + if (ctxt->myDoc == NULL) + return; + dtd = xmlGetIntSubset(ctxt->myDoc); + if (dtd != NULL) { + if (ctxt->html) + return; + xmlUnlinkNode((xmlNodePtr) dtd); + xmlFreeDtd(dtd); + ctxt->myDoc->intSubset = NULL; + } + ctxt->myDoc->intSubset = + xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID); +} + +/** + * externalSubset: + * @ctx: the user data (XML parser context) + * @name: the root element name + * @ExternalID: the external ID + * @SystemID: the SYSTEM ID (e.g. filename or URL) + * + * Callback on external subset declaration. + */ +void +externalSubset(void *ctx, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.externalSubset(%s, %s, %s)\n", + name, ExternalID, SystemID); +#endif + if (((ExternalID != NULL) || (SystemID != NULL)) && + (((ctxt->validate) || (ctxt->loadsubset)) && + (ctxt->wellFormed && ctxt->myDoc))) { + /* + * Try to fetch and parse the external subset. + */ + xmlParserInputPtr oldinput; + int oldinputNr; + int oldinputMax; + xmlParserInputPtr *oldinputTab; + int oldwellFormed; + xmlParserInputPtr input = NULL; + xmlCharEncoding enc; + int oldcharset; + + /* + * Ask the Entity resolver to load the damn thing + */ + if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL)) + input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, + SystemID); + if (input == NULL) { + return; + } + + xmlNewDtd(ctxt->myDoc, name, ExternalID, SystemID); + + /* + * make sure we won't destroy the main document context + */ + oldinput = ctxt->input; + oldinputNr = ctxt->inputNr; + oldinputMax = ctxt->inputMax; + oldinputTab = ctxt->inputTab; + oldwellFormed = ctxt->wellFormed; + oldcharset = ctxt->charset; + + ctxt->inputTab = (xmlParserInputPtr *) + xmlMalloc(5 * sizeof(xmlParserInputPtr)); + if (ctxt->inputTab == NULL) { + ctxt->errNo = XML_ERR_NO_MEMORY; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "externalSubset: out of memory\n"); + ctxt->errNo = XML_ERR_NO_MEMORY; + ctxt->input = oldinput; + ctxt->inputNr = oldinputNr; + ctxt->inputMax = oldinputMax; + ctxt->inputTab = oldinputTab; + ctxt->charset = oldcharset; + return; + } + ctxt->inputNr = 0; + ctxt->inputMax = 5; + ctxt->input = NULL; + xmlPushInput(ctxt, input); + + /* + * On the fly encoding conversion if needed + */ + enc = xmlDetectCharEncoding(ctxt->input->cur, 4); + xmlSwitchEncoding(ctxt, enc); + + if (input->filename == NULL) + input->filename = (char *) xmlStrdup(SystemID); + input->line = 1; + input->col = 1; + input->base = ctxt->input->cur; + input->cur = ctxt->input->cur; + input->free = NULL; + + /* + * let's parse that entity knowing it's an external subset. + */ + xmlParseExternalSubset(ctxt, ExternalID, SystemID); + + /* + * Free up the external entities + */ + + while (ctxt->inputNr > 1) + xmlPopInput(ctxt); + xmlFreeInputStream(ctxt->input); + xmlFree(ctxt->inputTab); + + /* + * Restore the parsing context of the main entity + */ + ctxt->input = oldinput; + ctxt->inputNr = oldinputNr; + ctxt->inputMax = oldinputMax; + ctxt->inputTab = oldinputTab; + ctxt->charset = oldcharset; + /* ctxt->wellFormed = oldwellFormed; */ + } +} + +/** + * resolveEntity: + * @ctx: the user data (XML parser context) + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * The entity loader, to control the loading of external entities, + * the application can either: + * - override this resolveEntity() callback in the SAX block + * - or better use the xmlSetExternalEntityLoader() function to + * set up it's own entity resolution routine + * + * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. + */ +xmlParserInputPtr +resolveEntity(void *ctx, const xmlChar *publicId, const xmlChar *systemId) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr ret; + xmlChar *URI; + const char *base = NULL; + + if (ctxt->input != NULL) + base = ctxt->input->filename; + if (base == NULL) + base = ctxt->directory; + + URI = xmlBuildURI(systemId, (const xmlChar *) base); + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.resolveEntity(%s, %s)\n", publicId, systemId); +#endif + + ret = xmlLoadExternalEntity((const char *) URI, + (const char *) publicId, ctxt); + if (URI != NULL) + xmlFree(URI); + return(ret); +} + +/** + * getEntity: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Get an entity by name + * + * Returns the xmlEntityPtr if found. + */ +xmlEntityPtr +getEntity(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlEntityPtr ret; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.getEntity(%s)\n", name); +#endif + + ret = xmlGetDocEntity(ctxt->myDoc, name); + if ((ret != NULL) && (ctxt->validate) && (ret->children == NULL) && + (ret->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + /* + * for validation purposes we really need to fetch and + * parse the external entity + */ + int parse; + xmlNodePtr children; + + parse = xmlParseCtxtExternalEntity(ctxt, + ret->SystemID, ret->ExternalID, &children); + xmlAddChildList((xmlNodePtr) ret, children); + } + return(ret); +} + +/** + * getParameterEntity: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Get a parameter entity by name + * + * Returns the xmlEntityPtr if found. + */ +xmlEntityPtr +getParameterEntity(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlEntityPtr ret; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.getParameterEntity(%s)\n", name); +#endif + + ret = xmlGetParameterEntity(ctxt->myDoc, name); + return(ret); +} + + +/** + * entityDecl: + * @ctx: the user data (XML parser context) + * @name: the entity name + * @type: the entity type + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @content: the entity value (without processing). + * + * An entity definition has been parsed + */ +void +entityDecl(void *ctx, const xmlChar *name, int type, + const xmlChar *publicId, const xmlChar *systemId, xmlChar *content) +{ + xmlEntityPtr ent; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.entityDecl(%s, %d, %s, %s, %s)\n", + name, type, publicId, systemId, content); +#endif + if (ctxt->inSubset == 1) { + ent = xmlAddDocEntity(ctxt->myDoc, name, type, publicId, + systemId, content); + if ((ent == NULL) && (ctxt->pedantic) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "Entity(%s) already defined in the internal subset\n", name); + if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) { + xmlChar *URI; + const char *base = NULL; + + if (ctxt->input != NULL) + base = ctxt->input->filename; + if (base == NULL) + base = ctxt->directory; + + URI = xmlBuildURI(systemId, (const xmlChar *) base); + ent->URI = URI; + } + } else if (ctxt->inSubset == 2) { + ent = xmlAddDtdEntity(ctxt->myDoc, name, type, publicId, + systemId, content); + if ((ent == NULL) && (ctxt->pedantic) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "Entity(%s) already defined in the external subset\n", name); + if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) { + xmlChar *URI; + const char *base = NULL; + + if (ctxt->input != NULL) + base = ctxt->input->filename; + if (base == NULL) + base = ctxt->directory; + + URI = xmlBuildURI(systemId, (const xmlChar *) base); + ent->URI = URI; + } + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.entityDecl(%s) called while not in subset\n", name); + } +} + +/** + * attributeDecl: + * @ctx: the user data (XML parser context) + * @elem: the name of the element + * @fullname: the attribute name + * @type: the attribute type + * @def: the type of default value + * @defaultValue: the attribute default value + * @tree: the tree of enumerated value set + * + * An attribute definition has been parsed + */ +void +attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *fullname, + int type, int def, const xmlChar *defaultValue, + xmlEnumerationPtr tree) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlAttributePtr attr; + xmlChar *name = NULL, *prefix = NULL; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n", + elem, fullname, type, def, defaultValue); +#endif + name = xmlSplitQName(ctxt, fullname, &prefix); + if (ctxt->inSubset == 1) + attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem, + name, prefix, (xmlAttributeType) type, + (xmlAttributeDefault) def, defaultValue, tree); + else if (ctxt->inSubset == 2) + attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, elem, + name, prefix, (xmlAttributeType) type, + (xmlAttributeDefault) def, defaultValue, tree); + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.attributeDecl(%s) called while not in subset\n", name); + return; + } + if (attr == 0) ctxt->valid = 0; + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateAttributeDecl(&ctxt->vctxt, ctxt->myDoc, + attr); + if (prefix != NULL) + xmlFree(prefix); + if (name != NULL) + xmlFree(name); +} + +/** + * elementDecl: + * @ctx: the user data (XML parser context) + * @name: the element name + * @type: the element type + * @content: the element value tree + * + * An element definition has been parsed + */ +void +elementDecl(void *ctx, const xmlChar *name, int type, + xmlElementContentPtr content) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlElementPtr elem = NULL; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.elementDecl(%s, %d, ...)\n", + fullname, type); +#endif + + if (ctxt->inSubset == 1) + elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, + name, (xmlElementTypeVal) type, content); + else if (ctxt->inSubset == 2) + elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, + name, (xmlElementTypeVal) type, content); + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.elementDecl(%s) called while not in subset\n", name); + return; + } + if (elem == NULL) ctxt->valid = 0; + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateElementDecl(&ctxt->vctxt, ctxt->myDoc, elem); +} + +/** + * notationDecl: + * @ctx: the user data (XML parser context) + * @name: The name of the notation + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * What to do when a notation declaration has been parsed. + */ +void +notationDecl(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNotationPtr nota = NULL; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId); +#endif + + if (ctxt->inSubset == 1) + nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name, + publicId, systemId); + else if (ctxt->inSubset == 2) + nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name, + publicId, systemId); + else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "SAX.notationDecl(%s) called while not in subset\n", name); + return; + } + if (nota == NULL) ctxt->valid = 0; + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateNotationDecl(&ctxt->vctxt, ctxt->myDoc, + nota); +} + +/** + * unparsedEntityDecl: + * @ctx: the user data (XML parser context) + * @name: The name of the entity + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @notationName: the name of the notation + * + * What to do when an unparsed entity declaration is parsed + */ +void +unparsedEntityDecl(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId, + const xmlChar *notationName) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", + name, publicId, systemId, notationName); +#endif + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateNotationUse(&ctxt->vctxt, ctxt->myDoc, + notationName); + xmlAddDocEntity(ctxt->myDoc, name, + XML_EXTERNAL_GENERAL_UNPARSED_ENTITY, + publicId, systemId, notationName); +} + +/** + * setDocumentLocator: + * @ctx: the user data (XML parser context) + * @loc: A SAX Locator + * + * Receive the document locator at startup, actually xmlDefaultSAXLocator + * Everything is available on the context, so this is useless in our case. + */ +void +setDocumentLocator(void *ctx, xmlSAXLocatorPtr loc) +{ + /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.setDocumentLocator()\n"); +#endif +} + +/** + * startDocument: + * @ctx: the user data (XML parser context) + * + * called when the document start being processed. + */ +void +startDocument(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlDocPtr doc; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.startDocument()\n"); +#endif + if (ctxt->html) { + if (ctxt->myDoc == NULL) +#ifdef LIBXML_HTML_ENABLED + ctxt->myDoc = htmlNewDocNoDtD(NULL, NULL); +#else + xmlGenericError(xmlGenericErrorContext, + "libxml2 built without HTML support\n"); +#endif + } else { + doc = ctxt->myDoc = xmlNewDoc(ctxt->version); + if (doc != NULL) { + if (ctxt->encoding != NULL) + doc->encoding = xmlStrdup(ctxt->encoding); + else + doc->encoding = NULL; + doc->standalone = ctxt->standalone; + } + } + if ((ctxt->myDoc != NULL) && (ctxt->myDoc->URL == NULL) && + (ctxt->input != NULL) && (ctxt->input->filename != NULL)) { + ctxt->myDoc->URL = xmlStrdup((xmlChar *) ctxt->input->filename); + } +} + +/** + * endDocument: + * @ctx: the user data (XML parser context) + * + * called when the document end has been detected. + */ +void +endDocument(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.endDocument()\n"); +#endif + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateDocumentFinal(&ctxt->vctxt, ctxt->myDoc); + + /* + * Grab the encoding if it was added on-the-fly + */ + if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) && + (ctxt->myDoc->encoding == NULL)) { + ctxt->myDoc->encoding = ctxt->encoding; + ctxt->encoding = NULL; + } + if ((ctxt->inputTab[0]->encoding != NULL) && (ctxt->myDoc != NULL) && + (ctxt->myDoc->encoding == NULL)) { + ctxt->myDoc->encoding = xmlStrdup(ctxt->inputTab[0]->encoding); + } + if ((ctxt->charset != XML_CHAR_ENCODING_NONE) && (ctxt->myDoc != NULL) && + (ctxt->myDoc->charset == XML_CHAR_ENCODING_NONE)) { + ctxt->myDoc->charset = ctxt->charset; + } +} + +/** + * attribute: + * @ctx: the user data (XML parser context) + * @fullname: The attribute name, including namespace prefix + * @value: The attribute value + * + * Handle an attribute that has been read by the parser. + * The default handling is to convert the attribute into an + * DOM subtree and past it in a new xmlAttr element added to + * the element. + */ +void +attribute(void *ctx, const xmlChar *fullname, const xmlChar *value) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlAttrPtr ret; + xmlChar *name; + xmlChar *ns; + xmlChar *nval; + xmlNsPtr namespace; + +/**************** +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.attribute(%s, %s)\n", fullname, value); +#endif + ****************/ + /* + * Split the full name into a namespace prefix and the tag name + */ + name = xmlSplitQName(ctxt, fullname, &ns); + + /* + * Do the last stage of the attribute normalization + * Needed for HTML too: + * http://www.w3.org/TR/html4/types.html#h-6.2 + */ + nval = xmlValidNormalizeAttributeValue(ctxt->myDoc, ctxt->node, + fullname, value); + if (nval != NULL) + value = nval; + + /* + * Check whether it's a namespace definition + */ + if ((!ctxt->html) && (ns == NULL) && + (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') && + (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) { + if (value[0] != 0) { + xmlURIPtr uri; + + uri = xmlParseURI((const char *)value); + if (uri == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "nmlns: %s not a valid URI\n", value); + } else { + if (uri->scheme == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "nmlns: URI %s is not absolute\n", value); + } + xmlFreeURI(uri); + } + } + + /* a default namespace definition */ + xmlNewNs(ctxt->node, value, NULL); + if (name != NULL) + xmlFree(name); + if (nval != NULL) + xmlFree(nval); + return; + } + if ((!ctxt->html) && + (ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') && + (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) { + /* + * Validate also for namespace decls, they are attributes from + * an XML-1.0 perspective + TODO ... doesn't map well with current API + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc, + ctxt->node, ret, value); + */ + /* a standard namespace definition */ + xmlNewNs(ctxt->node, value, name); + xmlFree(ns); + if (name != NULL) + xmlFree(name); + if (nval != NULL) + xmlFree(nval); + return; + } + + if (ns != NULL) + namespace = xmlSearchNs(ctxt->myDoc, ctxt->node, ns); + else { + namespace = NULL; + } + + /* !!!!!! */ + ret = xmlNewNsProp(ctxt->node, namespace, name, NULL); + + if (ret != NULL) { + if ((ctxt->replaceEntities == 0) && (!ctxt->html)) { + xmlNodePtr tmp; + + ret->children = xmlStringGetNodeList(ctxt->myDoc, value); + tmp = ret->children; + while (tmp != NULL) { + tmp->parent = (xmlNodePtr) ret; + if (tmp->next == NULL) + ret->last = tmp; + tmp = tmp->next; + } + } else if (value != NULL) { + ret->children = xmlNewDocText(ctxt->myDoc, value); + ret->last = ret->children; + if (ret->children != NULL) + ret->children->parent = (xmlNodePtr) ret; + } + } + + if ((!ctxt->html) && ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) { + + /* + * If we don't substitute entities, the validation should be + * done on a value with replaced entities anyway. + */ + if (!ctxt->replaceEntities) { + xmlChar *val; + + ctxt->depth++; + val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF, + 0,0,0); + ctxt->depth--; + if (val == NULL) + ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, + ctxt->myDoc, ctxt->node, ret, value); + else { + ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, + ctxt->myDoc, ctxt->node, ret, val); + xmlFree(val); + } + } else { + ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc, + ctxt->node, ret, value); + } + } else { + /* + * when validating, the ID registration is done at the attribute + * validation level. Otherwise we have to do specific handling here. + */ + if (xmlIsID(ctxt->myDoc, ctxt->node, ret)) + xmlAddID(&ctxt->vctxt, ctxt->myDoc, value, ret); + else if (xmlIsRef(ctxt->myDoc, ctxt->node, ret)) + xmlAddRef(&ctxt->vctxt, ctxt->myDoc, value, ret); + } + + if (nval != NULL) + xmlFree(nval); + if (name != NULL) + xmlFree(name); + if (ns != NULL) + xmlFree(ns); +} + +/** + * startElement: + * @ctx: the user data (XML parser context) + * @fullname: The element name, including namespace prefix + * @atts: An array of name/value attributes pairs, NULL terminated + * + * called when an opening tag has been processed. + */ +void +startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + xmlNodePtr parent = ctxt->node; + xmlNsPtr ns; + xmlChar *name; + xmlChar *prefix; + const xmlChar *att; + const xmlChar *value; + int i; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.startElement(%s)\n", fullname); +#endif + + /* + * First check on validity: + */ + if (ctxt->validate && (ctxt->myDoc->extSubset == NULL) && + ((ctxt->myDoc->intSubset == NULL) || + ((ctxt->myDoc->intSubset->notations == NULL) && + (ctxt->myDoc->intSubset->elements == NULL) && + (ctxt->myDoc->intSubset->attributes == NULL) && + (ctxt->myDoc->intSubset->entities == NULL)))) { + if (ctxt->vctxt.error != NULL) { + ctxt->vctxt.error(ctxt->vctxt.userData, + "Validation failed: no DTD found !\n"); + } + ctxt->validate = 0; + } + + + /* + * Split the full name into a namespace prefix and the tag name + */ + name = xmlSplitQName(ctxt, fullname, &prefix); + + + /* + * Note : the namespace resolution is deferred until the end of the + * attributes parsing, since local namespace can be defined as + * an attribute at this level. + */ + ret = xmlNewDocNode(ctxt->myDoc, NULL, name, NULL); + if (ret == NULL) return; + if (ctxt->myDoc->children == NULL) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, "Setting %s as root\n", name); +#endif + xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret); + } else if (parent == NULL) { + parent = ctxt->myDoc->children; + } + ctxt->nodemem = -1; + + /* + * We are parsing a new node. + */ +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, "pushing(%s)\n", name); +#endif + nodePush(ctxt, ret); + + /* + * Link the child element + */ + if (parent != NULL) { + if (parent->type == XML_ELEMENT_NODE) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding child %s to %s\n", name, parent->name); +#endif + xmlAddChild(parent, ret); + } else { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding sibling %s to ", name); + xmlDebugDumpOneNode(stderr, parent, 0); +#endif + xmlAddSibling(parent, ret); + } + } + + /* + * process all the attributes whose name start with "xml" + */ + if (atts != NULL) { + i = 0; + att = atts[i++]; + value = atts[i++]; + if (!ctxt->html) { + while ((att != NULL) && (value != NULL)) { + if ((att[0] == 'x') && (att[1] == 'm') && (att[2] == 'l')) + attribute(ctxt, att, value); + + att = atts[i++]; + value = atts[i++]; + } + } + } + + /* + * Search the namespace, note that since the attributes have been + * processed, the local namespaces are available. + */ + ns = xmlSearchNs(ctxt->myDoc, ret, prefix); + if ((ns == NULL) && (parent != NULL)) + ns = xmlSearchNs(ctxt->myDoc, parent, prefix); + if ((prefix != NULL) && (ns == NULL)) { + ns = xmlNewNs(ret, NULL, prefix); + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Namespace prefix %s is not defined\n", prefix); + } + xmlSetNs(ret, ns); + + /* + * process all the other attributes + */ + if (atts != NULL) { + i = 0; + att = atts[i++]; + value = atts[i++]; + if (ctxt->html) { + while (att != NULL) { + attribute(ctxt, att, value); + att = atts[i++]; + value = atts[i++]; + } + } else { + while ((att != NULL) && (value != NULL)) { + if ((att[0] != 'x') || (att[1] != 'm') || (att[2] != 'l')) + attribute(ctxt, att, value); + + /* + * Next ones + */ + att = atts[i++]; + value = atts[i++]; + } + } + } + + /* + * If it's the Document root, finish the Dtd validation and + * check the document root element for validity + */ + if ((ctxt->validate) && (ctxt->vctxt.finishDtd == 0)) { + ctxt->valid &= xmlValidateDtdFinal(&ctxt->vctxt, ctxt->myDoc); + ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc); + ctxt->vctxt.finishDtd = 1; + } + + if (prefix != NULL) + xmlFree(prefix); + if (name != NULL) + xmlFree(name); + +} + +/** + * endElement: + * @ctx: the user data (XML parser context) + * @name: The element name + * + * called when the end of an element has been detected. + */ +void +endElement(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserNodeInfo node_info; + xmlNodePtr cur = ctxt->node; + +#ifdef DEBUG_SAX + if (name == NULL) + xmlGenericError(xmlGenericErrorContext, "SAX.endElement(NULL)\n"); + else + xmlGenericError(xmlGenericErrorContext, "SAX.endElement(%s)\n", name); +#endif + + /* Capture end position and add node */ + if (cur != NULL && ctxt->record_info) { + node_info.end_pos = ctxt->input->cur - ctxt->input->base; + node_info.end_line = ctxt->input->line; + node_info.node = cur; + xmlParserAddNodeInfo(ctxt, &node_info); + } + ctxt->nodemem = -1; + + if (ctxt->validate && ctxt->wellFormed && + ctxt->myDoc && ctxt->myDoc->intSubset) + ctxt->valid &= xmlValidateOneElement(&ctxt->vctxt, ctxt->myDoc, + cur); + + + /* + * end of parsing of this node. + */ +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, "popping(%s)\n", cur->name); +#endif + nodePop(ctxt); +} + +/** + * reference: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * called when an entity reference is detected. + */ +void +reference(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.reference(%s)\n", name); +#endif + if (name[0] == '#') + ret = xmlNewCharRef(ctxt->myDoc, name); + else + ret = xmlNewReference(ctxt->myDoc, name); +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "add reference %s to %s \n", name, ctxt->node->name); +#endif + xmlAddChild(ctxt->node, ret); +} + +/** + * characters: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * receiving some chars from the parser. + * Question: how much at a time ??? + */ +void +characters(void *ctx, const xmlChar *ch, int len) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr lastChild; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.characters(%.30s, %d)\n", ch, len); +#endif + /* + * Handle the data if any. If there is no child + * add it as content, otherwise if the last child is text, + * concatenate it, else create a new node of type text. + */ + + if (ctxt->node == NULL) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "add chars: ctxt->node == NULL !\n"); +#endif + return; + } + lastChild = xmlGetLastChild(ctxt->node); +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "add chars to %s \n", ctxt->node->name); +#endif + + /* + * Here we needed an accelerator mechanism in case of very large + * elements. Use an attribute in the structure !!! + */ + if (lastChild == NULL) { + /* first node, first time */ + xmlNodeAddContentLen(ctxt->node, ch, len); +#ifndef XML_USE_BUFFER_CONTENT + if (ctxt->node->children != NULL) { + ctxt->nodelen = len; + ctxt->nodemem = len + 1; + } +#endif + } else { + if ((xmlNodeIsText(lastChild)) && (ctxt->nodemem != 0)) { +#ifndef XML_USE_BUFFER_CONTENT + /* + * The whole point of maintaining nodelen and nodemem, + * xmlTextConcat is too costly, i.e. compute lenght, + * reallocate a new buffer, move data, append ch. Here + * We try to minimaze realloc() uses and avoid copying + * and recomputing lenght over and over. + */ + if (ctxt->nodelen + len >= ctxt->nodemem) { + xmlChar *newbuf; + int size; + + size = ctxt->nodemem + len; + size *= 2; + newbuf = (xmlChar *) xmlRealloc(lastChild->content,size); + if (newbuf == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SAX.characters(): out of memory\n"); + return; + } + ctxt->nodemem = size; + lastChild->content = newbuf; + } + memcpy(&lastChild->content[ctxt->nodelen], ch, len); + ctxt->nodelen += len; + lastChild->content[ctxt->nodelen] = 0; +#else + xmlTextConcat(lastChild, ch, len); +#endif + } else { + /* Mixed content, first time */ + lastChild = xmlNewTextLen(ch, len); + xmlAddChild(ctxt->node, lastChild); +#ifndef XML_USE_BUFFER_CONTENT + if (ctxt->node->children != NULL) { + ctxt->nodelen = len; + ctxt->nodemem = len + 1; + } +#endif + } + } +} + +/** + * ignorableWhitespace: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * receiving some ignorable whitespaces from the parser. + * Question: how much at a time ??? + */ +void +ignorableWhitespace(void *ctx, const xmlChar *ch, int len) +{ + /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.ignorableWhitespace(%.30s, %d)\n", ch, len); +#endif +} + +/** + * processingInstruction: + * @ctx: the user data (XML parser context) + * @target: the target name + * @data: the PI data's + * + * A processing instruction has been parsed. + */ +void +processingInstruction(void *ctx, const xmlChar *target, + const xmlChar *data) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + xmlNodePtr parent = ctxt->node; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.processingInstruction(%s, %s)\n", target, data); +#endif + + ret = xmlNewPI(target, data); + if (ret == NULL) return; + parent = ctxt->node; + + if (ctxt->inSubset == 1) { + xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret); + return; + } else if (ctxt->inSubset == 2) { + xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret); + return; + } + if ((ctxt->myDoc->children == NULL) || (parent == NULL)) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "Setting PI %s as root\n", target); +#endif + xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret); + return; + } + if (parent->type == XML_ELEMENT_NODE) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding PI %s child to %s\n", target, parent->name); +#endif + xmlAddChild(parent, ret); + } else { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding PI %s sibling to ", target); + xmlDebugDumpOneNode(stderr, parent, 0); +#endif + xmlAddSibling(parent, ret); + } +} + +/** + * globalNamespace: + * @ctx: the user data (XML parser context) + * @href: the namespace associated URN + * @prefix: the namespace prefix + * + * An old global namespace has been parsed. + */ +void +globalNamespace(void *ctx, const xmlChar *href, const xmlChar *prefix) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.globalNamespace(%s, %s)\n", href, prefix); +#endif + xmlNewGlobalNs(ctxt->myDoc, href, prefix); +} + +/** + * setNamespace: + * @ctx: the user data (XML parser context) + * @name: the namespace prefix + * + * Set the current element namespace. + */ + +void +setNamespace(void *ctx, const xmlChar *name) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNsPtr ns; + xmlNodePtr parent; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, "SAX.setNamespace(%s)\n", name); +#endif + ns = xmlSearchNs(ctxt->myDoc, ctxt->node, name); + if (ns == NULL) { /* ctxt->node may not have a parent yet ! */ + if (ctxt->nodeNr >= 2) { + parent = ctxt->nodeTab[ctxt->nodeNr - 2]; + if (parent != NULL) + ns = xmlSearchNs(ctxt->myDoc, parent, name); + } + } + xmlSetNs(ctxt->node, ns); +} + +/** + * getNamespace: + * @ctx: the user data (XML parser context) + * + * Get the current element namespace. + * + * Returns the xmlNsPtr or NULL if none + */ + +xmlNsPtr +getNamespace(void *ctx) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNsPtr ret; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, "SAX.getNamespace()\n"); +#endif + ret = ctxt->node->ns; + return(ret); +} + +/** + * checkNamespace: + * @ctx: the user data (XML parser context) + * @namespace: the namespace to check against + * + * Check that the current element namespace is the same as the + * one read upon parsing. + * + * Returns 1 if true 0 otherwise + */ + +int +checkNamespace(void *ctx, xmlChar *namespace) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr cur = ctxt->node; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.checkNamespace(%s)\n", namespace); +#endif + + /* + * Check that the Name in the ETag is the same as in the STag. + */ + if (namespace == NULL) { + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "End tags for %s don't hold the namespace %s\n", + cur->name, cur->ns->prefix); + ctxt->wellFormed = 0; + } + } else { + if ((cur->ns == NULL) || (cur->ns->prefix == NULL)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "End tags %s holds a prefix %s not used by the open tag\n", + cur->name, namespace); + ctxt->wellFormed = 0; + } else if (!xmlStrEqual(namespace, cur->ns->prefix)) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt, + "Start and End tags for %s don't use the same namespaces: %s and %s\n", + cur->name, cur->ns->prefix, namespace); + ctxt->wellFormed = 0; + } else + return(1); + } + return(0); +} + +/** + * namespaceDecl: + * @ctx: the user data (XML parser context) + * @href: the namespace associated URN + * @prefix: the namespace prefix + * + * A namespace has been parsed. + */ +void +namespaceDecl(void *ctx, const xmlChar *href, const xmlChar *prefix) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; +#ifdef DEBUG_SAX + if (prefix == NULL) + xmlGenericError(xmlGenericErrorContext, + "SAX.namespaceDecl(%s, NULL)\n", href); + else + xmlGenericError(xmlGenericErrorContext, + "SAX.namespaceDecl(%s, %s)\n", href, prefix); +#endif + xmlNewNs(ctxt->node, href, prefix); +} + +/** + * comment: + * @ctx: the user data (XML parser context) + * @value: the comment content + * + * A comment has been parsed. + */ +void +comment(void *ctx, const xmlChar *value) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret; + xmlNodePtr parent = ctxt->node; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, "SAX.comment(%s)\n", value); +#endif + ret = xmlNewDocComment(ctxt->myDoc, value); + if (ret == NULL) return; + + if (ctxt->inSubset == 1) { + xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret); + return; + } else if (ctxt->inSubset == 2) { + xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret); + return; + } + if ((ctxt->myDoc->children == NULL) || (parent == NULL)) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "Setting comment as root\n"); +#endif + xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret); + return; + } + if (parent->type == XML_ELEMENT_NODE) { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding comment child to %s\n", parent->name); +#endif + xmlAddChild(parent, ret); + } else { +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "adding comment sibling to "); + xmlDebugDumpOneNode(stderr, parent, 0); +#endif + xmlAddSibling(parent, ret); + } +} + +/** + * cdataBlock: + * @ctx: the user data (XML parser context) + * @value: The pcdata content + * @len: the block length + * + * called when a pcdata block has been parsed + */ +void +cdataBlock(void *ctx, const xmlChar *value, int len) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlNodePtr ret, lastChild; + +#ifdef DEBUG_SAX + xmlGenericError(xmlGenericErrorContext, + "SAX.pcdata(%.10s, %d)\n", value, len); +#endif + lastChild = xmlGetLastChild(ctxt->node); +#ifdef DEBUG_SAX_TREE + xmlGenericError(xmlGenericErrorContext, + "add chars to %s \n", ctxt->node->name); +#endif + if ((lastChild != NULL) && + (lastChild->type == XML_CDATA_SECTION_NODE)) { + xmlTextConcat(lastChild, value, len); + } else { + ret = xmlNewCDataBlock(ctxt->myDoc, value, len); + xmlAddChild(ctxt->node, ret); + } +} + +/* + * Default handler for XML, builds the DOM tree + */ +xmlSAXHandler xmlDefaultSAXHandler = { + internalSubset, + isStandalone, + hasInternalSubset, + hasExternalSubset, + resolveEntity, + getEntity, + entityDecl, + notationDecl, + attributeDecl, + elementDecl, + unparsedEntityDecl, + setDocumentLocator, + startDocument, + endDocument, + startElement, + endElement, + reference, + characters, + ignorableWhitespace, + processingInstruction, + comment, + xmlParserWarning, + xmlParserError, + xmlParserError, + getParameterEntity, + cdataBlock, + externalSubset, +}; + +/** + * xmlDefaultSAXHandlerInit: + * + * Initialize the default SAX handler + */ +void +xmlDefaultSAXHandlerInit(void) +{ + xmlDefaultSAXHandler.internalSubset = internalSubset; + xmlDefaultSAXHandler.externalSubset = externalSubset; + xmlDefaultSAXHandler.isStandalone = isStandalone; + xmlDefaultSAXHandler.hasInternalSubset = hasInternalSubset; + xmlDefaultSAXHandler.hasExternalSubset = hasExternalSubset; + xmlDefaultSAXHandler.resolveEntity = resolveEntity; + xmlDefaultSAXHandler.getEntity = getEntity; + xmlDefaultSAXHandler.getParameterEntity = getParameterEntity; + xmlDefaultSAXHandler.entityDecl = entityDecl; + xmlDefaultSAXHandler.attributeDecl = attributeDecl; + xmlDefaultSAXHandler.elementDecl = elementDecl; + xmlDefaultSAXHandler.notationDecl = notationDecl; + xmlDefaultSAXHandler.unparsedEntityDecl = unparsedEntityDecl; + xmlDefaultSAXHandler.setDocumentLocator = setDocumentLocator; + xmlDefaultSAXHandler.startDocument = startDocument; + xmlDefaultSAXHandler.endDocument = endDocument; + xmlDefaultSAXHandler.startElement = startElement; + xmlDefaultSAXHandler.endElement = endElement; + xmlDefaultSAXHandler.reference = reference; + xmlDefaultSAXHandler.characters = characters; + xmlDefaultSAXHandler.cdataBlock = cdataBlock; + xmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace; + xmlDefaultSAXHandler.processingInstruction = processingInstruction; + xmlDefaultSAXHandler.comment = comment; + if (xmlGetWarningsDefaultValue == 0) + xmlDefaultSAXHandler.warning = NULL; + else + xmlDefaultSAXHandler.warning = xmlParserWarning; + xmlDefaultSAXHandler.error = xmlParserError; + xmlDefaultSAXHandler.fatalError = xmlParserError; +} + +/* + * Default handler for HTML, builds the DOM tree + */ +xmlSAXHandler htmlDefaultSAXHandler = { + internalSubset, + NULL, + NULL, + NULL, + NULL, + getEntity, + NULL, + NULL, + NULL, + NULL, + NULL, + setDocumentLocator, + startDocument, + endDocument, + startElement, + endElement, + NULL, + characters, + ignorableWhitespace, + NULL, + comment, + xmlParserWarning, + xmlParserError, + xmlParserError, + getParameterEntity, + cdataBlock, + NULL, +}; + +/** + * htmlDefaultSAXHandlerInit: + * + * Initialize the default SAX handler + */ +void +htmlDefaultSAXHandlerInit(void) +{ + htmlDefaultSAXHandler.internalSubset = internalSubset; + htmlDefaultSAXHandler.externalSubset = NULL; + htmlDefaultSAXHandler.isStandalone = NULL; + htmlDefaultSAXHandler.hasInternalSubset = NULL; + htmlDefaultSAXHandler.hasExternalSubset = NULL; + htmlDefaultSAXHandler.resolveEntity = NULL; + htmlDefaultSAXHandler.getEntity = getEntity; + htmlDefaultSAXHandler.getParameterEntity = NULL; + htmlDefaultSAXHandler.entityDecl = NULL; + htmlDefaultSAXHandler.attributeDecl = NULL; + htmlDefaultSAXHandler.elementDecl = NULL; + htmlDefaultSAXHandler.notationDecl = NULL; + htmlDefaultSAXHandler.unparsedEntityDecl = NULL; + htmlDefaultSAXHandler.setDocumentLocator = setDocumentLocator; + htmlDefaultSAXHandler.startDocument = startDocument; + htmlDefaultSAXHandler.endDocument = endDocument; + htmlDefaultSAXHandler.startElement = startElement; + htmlDefaultSAXHandler.endElement = endElement; + htmlDefaultSAXHandler.reference = NULL; + htmlDefaultSAXHandler.characters = characters; + htmlDefaultSAXHandler.cdataBlock = cdataBlock; + htmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace; + htmlDefaultSAXHandler.processingInstruction = NULL; + htmlDefaultSAXHandler.comment = comment; + htmlDefaultSAXHandler.warning = xmlParserWarning; + htmlDefaultSAXHandler.error = xmlParserError; + htmlDefaultSAXHandler.fatalError = xmlParserError; +} + +/* + * Default handler for HTML, builds the DOM tree + */ +xmlSAXHandler sgmlDefaultSAXHandler = { + internalSubset, + NULL, + NULL, + NULL, + NULL, + getEntity, + NULL, + NULL, + NULL, + NULL, + NULL, + setDocumentLocator, + startDocument, + endDocument, + startElement, + endElement, + NULL, + characters, + ignorableWhitespace, + NULL, + comment, + xmlParserWarning, + xmlParserError, + xmlParserError, + getParameterEntity, + NULL, + NULL, +}; + +/** + * sgmlDefaultSAXHandlerInit: + * + * Initialize the default SAX handler + */ +void +sgmlDefaultSAXHandlerInit(void) +{ + sgmlDefaultSAXHandler.internalSubset = internalSubset; + sgmlDefaultSAXHandler.externalSubset = NULL; + sgmlDefaultSAXHandler.isStandalone = NULL; + sgmlDefaultSAXHandler.hasInternalSubset = NULL; + sgmlDefaultSAXHandler.hasExternalSubset = NULL; + sgmlDefaultSAXHandler.resolveEntity = NULL; + sgmlDefaultSAXHandler.getEntity = getEntity; + sgmlDefaultSAXHandler.getParameterEntity = NULL; + sgmlDefaultSAXHandler.entityDecl = NULL; + sgmlDefaultSAXHandler.attributeDecl = NULL; + sgmlDefaultSAXHandler.elementDecl = NULL; + sgmlDefaultSAXHandler.notationDecl = NULL; + sgmlDefaultSAXHandler.unparsedEntityDecl = NULL; + sgmlDefaultSAXHandler.setDocumentLocator = setDocumentLocator; + sgmlDefaultSAXHandler.startDocument = startDocument; + sgmlDefaultSAXHandler.endDocument = endDocument; + sgmlDefaultSAXHandler.startElement = startElement; + sgmlDefaultSAXHandler.endElement = endElement; + sgmlDefaultSAXHandler.reference = NULL; + sgmlDefaultSAXHandler.characters = characters; + sgmlDefaultSAXHandler.cdataBlock = NULL; + sgmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace; + sgmlDefaultSAXHandler.processingInstruction = NULL; + sgmlDefaultSAXHandler.comment = comment; + sgmlDefaultSAXHandler.warning = xmlParserWarning; + sgmlDefaultSAXHandler.error = xmlParserError; + sgmlDefaultSAXHandler.fatalError = xmlParserError; +} diff --git a/SAX.h b/SAX.h new file mode 100644 index 00000000..4fabbdf1 --- /dev/null +++ b/SAX.h @@ -0,0 +1,120 @@ +/* + * SAX.h : Default SAX handler interfaces. + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + + +#ifndef __XML_SAX_H__ +#define __XML_SAX_H__ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +const xmlChar * getPublicId (void *ctx); +const xmlChar * getSystemId (void *ctx); +void setDocumentLocator (void *ctx, + xmlSAXLocatorPtr loc); + +int getLineNumber (void *ctx); +int getColumnNumber (void *ctx); + +int isStandalone (void *ctx); +int hasInternalSubset (void *ctx); +int hasExternalSubset (void *ctx); + +void internalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +void externalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlEntityPtr getEntity (void *ctx, + const xmlChar *name); +xmlEntityPtr getParameterEntity (void *ctx, + const xmlChar *name); +xmlParserInputPtr resolveEntity (void *ctx, + const xmlChar *publicId, + const xmlChar *systemId); + +void entityDecl (void *ctx, + const xmlChar *name, + int type, + const xmlChar *publicId, + const xmlChar *systemId, + xmlChar *content); +void attributeDecl (void *ctx, + const xmlChar *elem, + const xmlChar *name, + int type, + int def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +void elementDecl (void *ctx, + const xmlChar *name, + int type, + xmlElementContentPtr content); +void notationDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId); +void unparsedEntityDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId, + const xmlChar *notationName); + +void startDocument (void *ctx); +void endDocument (void *ctx); +void attribute (void *ctx, + const xmlChar *fullname, + const xmlChar *value); +void startElement (void *ctx, + const xmlChar *fullname, + const xmlChar **atts); +void endElement (void *ctx, + const xmlChar *name); +void reference (void *ctx, + const xmlChar *name); +void characters (void *ctx, + const xmlChar *ch, + int len); +void ignorableWhitespace (void *ctx, + const xmlChar *ch, + int len); +void processingInstruction (void *ctx, + const xmlChar *target, + const xmlChar *data); +void globalNamespace (void *ctx, + const xmlChar *href, + const xmlChar *prefix); +void setNamespace (void *ctx, + const xmlChar *name); +xmlNsPtr getNamespace (void *ctx); +int checkNamespace (void *ctx, + xmlChar *nameSpace); +void namespaceDecl (void *ctx, + const xmlChar *href, + const xmlChar *prefix); +void comment (void *ctx, + const xmlChar *value); +void cdataBlock (void *ctx, + const xmlChar *value, + int len); + +void xmlDefaultSAXHandlerInit (void); +void htmlDefaultSAXHandlerInit (void); +void sgmlDefaultSAXHandlerInit (void); +#ifdef __cplusplus +} +#endif +#endif /* __XML_SAX_H__ */ diff --git a/autogen.sh b/autogen.sh index 28088eb9..e5a30eba 100755 --- a/autogen.sh +++ b/autogen.sh @@ -36,7 +36,7 @@ if test "$DIE" -eq 1; then exit 1 fi -test -f libxml/entities.h || { +test -f entities.h || { echo "You must run this script in the top-level libxml directory" exit 1 } diff --git a/configure.in b/configure.in index 13dd7df7..a505a80b 100644 --- a/configure.in +++ b/configure.in @@ -1,6 +1,6 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.2) -AC_INIT(libxml/entities.c) +AC_INIT(entities.c) AM_CONFIG_HEADER(config.h) AC_CANONICAL_HOST @@ -42,10 +42,32 @@ AM_PROG_LIBTOOL AM_MAINTAINER_MODE dnl Checks for zlib library. -AC_CHECK_HEADERS(zlib.h, - AC_CHECK_LIB(z, gzread, [AC_DEFINE(HAVE_LIBZ) LIBS="$LIBS -lz"]) -) +_cppflags="${CPPFLAGS}" +_ldflags="${LDFLAGS}" +AC_ARG_WITH(zlib, +[ --with-zlib[=DIR] use libz in DIR],[ + if test "$withval" != "no" -a "$withval" != "yes"; then + Z_DIR=$withval + CPPFLAGS="${CPPFLAGS} -I$withval/include" + LDFLAGS="${LDFLAGS} -L$withval/lib" + fi +]) + +AC_CHECK_HEADERS(zlib.h, + AC_CHECK_LIB(z, gzread,[ + AC_DEFINE(HAVE_LIBZ) + if test "x${Z_DIR}" != "x"; then + Z_CFLAGS="-I${Z_DIR}/include" + Z_LIBS="-L${Z_DIR}/lib -lz" + else + Z_LIBS="-lz" + fi])) +AC_SUBST(Z_CFLAGS) +AC_SUBST(Z_LIBS) + +CPPFLAGS=${_cppflags} +LDFLAGS=${_ldflags} dnl Checks for header files. AC_HEADER_DIRENT @@ -343,15 +365,37 @@ AC_SUBST(HAVE_ISINF) AC_SUBST(M_LIBS) AC_SUBST(RDL_LIBS) -AC_OUTPUT([ - Makefile - libxml/Makefile - libxml/xmlversion.h - libxml/xml2-config - libxml/libxml-2.0.pc - libxml/xml2Conf.sh - doc/Makefile - example/Makefile - libxml.spec -]) +dnl +dnl create the libxml and include links needed to get dependencies right +dnl +if test ! -d $srcdir/include/libxml +then + if test ! -d $srcdir/include + then + rm -f $srcdir/include + mkdir $srcdir/include + fi + rm -f $srcdir/libxml + (cd $srcdir/include ; ln -s .. libxml) +fi +if test ! -r $srcdir/libxml +then + (cd $srcdir ; ln -s include/libxml libxml) +fi +if test ! -r include/libxml +then + if test ! -d include + then + rm -f include + mkdir include + fi + (cd include ; ln -s ../libxml libxml) +fi +if test ! -r libxml +then + rm -rf libxml + ln -s $srcdir/include/libxml libxml +fi + +AC_OUTPUT(libxml.spec Makefile include/Makefile doc/Makefile example/Makefile libxml/xmlversion.h xml2-config libxml-2.0.pc xml2Conf.sh) diff --git a/debugXML.c b/debugXML.c new file mode 100644 index 00000000..65d703d6 --- /dev/null +++ b/debugXML.c @@ -0,0 +1,1888 @@ +/* + * debugXML.c : This is a set of routines used for debugging the tree + * produced by the XML parser. + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#ifdef LIBXML_DEBUG_ENABLED + +#include +#include +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#define IS_BLANK(c) \ + (((c) == '\n') || ((c) == '\r') || ((c) == '\t') || ((c) == ' ')) + +void xmlDebugDumpString(FILE *output, const xmlChar *str) { + int i; + if (str == NULL) { + fprintf(output, "(NULL)"); + return; + } + for (i = 0;i < 40;i++) + if (str[i] == 0) return; + else if (IS_BLANK(str[i])) fputc(' ', output); + else if (str[i] >= 0x80) + fprintf(output, "#%X", str[i]); + else fputc(str[i], output); + fprintf(output, "..."); +} + +void xmlDebugDumpDtd(FILE *output, xmlDtdPtr dtd, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (dtd->type != XML_DTD_NODE) { + fprintf(output, "PBM: not a DTD\n"); + return; + } + if (dtd->name != NULL) + fprintf(output, "DTD(%s)", dtd->name); + else + fprintf(output, "DTD"); + if (dtd->ExternalID != NULL) + fprintf(output, ", PUBLIC %s", dtd->ExternalID); + if (dtd->SystemID != NULL) + fprintf(output, ", SYSTEM %s", dtd->SystemID); + fprintf(output, "\n"); + /* + * Do a bit of checking + */ + if (dtd->parent == NULL) + fprintf(output, "PBM: Dtd has no parent\n"); + if (dtd->doc == NULL) + fprintf(output, "PBM: Dtd has no doc\n"); + if ((dtd->parent != NULL) && (dtd->doc != dtd->parent->doc)) + fprintf(output, "PBM: Dtd doc differs from parent's one\n"); + if (dtd->prev == NULL) { + if ((dtd->parent != NULL) && (dtd->parent->children != (xmlNodePtr)dtd)) + fprintf(output, "PBM: Dtd has no prev and not first of list\n"); + } else { + if (dtd->prev->next != (xmlNodePtr) dtd) + fprintf(output, "PBM: Dtd prev->next : back link wrong\n"); + } + if (dtd->next == NULL) { + if ((dtd->parent != NULL) && (dtd->parent->last != (xmlNodePtr) dtd)) + fprintf(output, "PBM: Dtd has no next and not last of list\n"); + } else { + if (dtd->next->prev != (xmlNodePtr) dtd) + fprintf(output, "PBM: Dtd next->prev : forward link wrong\n"); + } +} + +void xmlDebugDumpAttrDecl(FILE *output, xmlAttributePtr attr, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (attr->type != XML_ATTRIBUTE_DECL) { + fprintf(output, "PBM: not a Attr\n"); + return; + } + if (attr->name != NULL) + fprintf(output, "ATTRDECL(%s)", attr->name); + else + fprintf(output, "PBM ATTRDECL noname!!!"); + if (attr->elem != NULL) + fprintf(output, " for %s", attr->elem); + else + fprintf(output, " PBM noelem!!!"); + switch (attr->atype) { + case XML_ATTRIBUTE_CDATA: + fprintf(output, " CDATA"); + break; + case XML_ATTRIBUTE_ID: + fprintf(output, " ID"); + break; + case XML_ATTRIBUTE_IDREF: + fprintf(output, " IDREF"); + break; + case XML_ATTRIBUTE_IDREFS: + fprintf(output, " IDREFS"); + break; + case XML_ATTRIBUTE_ENTITY: + fprintf(output, " ENTITY"); + break; + case XML_ATTRIBUTE_ENTITIES: + fprintf(output, " ENTITIES"); + break; + case XML_ATTRIBUTE_NMTOKEN: + fprintf(output, " NMTOKEN"); + break; + case XML_ATTRIBUTE_NMTOKENS: + fprintf(output, " NMTOKENS"); + break; + case XML_ATTRIBUTE_ENUMERATION: + fprintf(output, " ENUMERATION"); + break; + case XML_ATTRIBUTE_NOTATION: + fprintf(output, " NOTATION "); + break; + } + if (attr->tree != NULL) { + int i; + xmlEnumerationPtr cur = attr->tree; + + for (i = 0;i < 5; i++) { + if (i != 0) + fprintf(output, "|%s", cur->name); + else + fprintf(output, " (%s", cur->name); + cur = cur->next; + if (cur == NULL) break; + } + if (cur == NULL) + fprintf(output, ")"); + else + fprintf(output, "...)"); + } + switch (attr->def) { + case XML_ATTRIBUTE_NONE: + break; + case XML_ATTRIBUTE_REQUIRED: + fprintf(output, " REQUIRED"); + break; + case XML_ATTRIBUTE_IMPLIED: + fprintf(output, " IMPLIED"); + break; + case XML_ATTRIBUTE_FIXED: + fprintf(output, " FIXED"); + break; + } + if (attr->defaultValue != NULL) { + fprintf(output, "\""); + xmlDebugDumpString(output, attr->defaultValue); + fprintf(output, "\""); + } + printf("\n"); + + /* + * Do a bit of checking + */ + if (attr->parent == NULL) + fprintf(output, "PBM: Attr has no parent\n"); + if (attr->doc == NULL) + fprintf(output, "PBM: Attr has no doc\n"); + if ((attr->parent != NULL) && (attr->doc != attr->parent->doc)) + fprintf(output, "PBM: Attr doc differs from parent's one\n"); + if (attr->prev == NULL) { + if ((attr->parent != NULL) && (attr->parent->children != (xmlNodePtr)attr)) + fprintf(output, "PBM: Attr has no prev and not first of list\n"); + } else { + if (attr->prev->next != (xmlNodePtr) attr) + fprintf(output, "PBM: Attr prev->next : back link wrong\n"); + } + if (attr->next == NULL) { + if ((attr->parent != NULL) && (attr->parent->last != (xmlNodePtr) attr)) + fprintf(output, "PBM: Attr has no next and not last of list\n"); + } else { + if (attr->next->prev != (xmlNodePtr) attr) + fprintf(output, "PBM: Attr next->prev : forward link wrong\n"); + } +} + +void xmlDebugDumpElemDecl(FILE *output, xmlElementPtr elem, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (elem->type != XML_ELEMENT_DECL) { + fprintf(output, "PBM: not a Elem\n"); + return; + } + if (elem->name != NULL) { + fprintf(output, "ELEMDECL("); + xmlDebugDumpString(output, elem->name); + fprintf(output, ")"); + } else + fprintf(output, "PBM ELEMDECL noname!!!"); + switch (elem->etype) { + case XML_ELEMENT_TYPE_EMPTY: + fprintf(output, ", EMPTY"); + break; + case XML_ELEMENT_TYPE_ANY: + fprintf(output, ", ANY"); + break; + case XML_ELEMENT_TYPE_MIXED: + fprintf(output, ", MIXED "); + break; + case XML_ELEMENT_TYPE_ELEMENT: + fprintf(output, ", MIXED "); + break; + } + if (elem->content != NULL) { + char buf[5001]; + + buf[0] = 0; + xmlSprintfElementContent(buf, elem->content, 1); + buf[5000] = 0; + fprintf(output, "%s", buf); + } + printf("\n"); + + /* + * Do a bit of checking + */ + if (elem->parent == NULL) + fprintf(output, "PBM: Elem has no parent\n"); + if (elem->doc == NULL) + fprintf(output, "PBM: Elem has no doc\n"); + if ((elem->parent != NULL) && (elem->doc != elem->parent->doc)) + fprintf(output, "PBM: Elem doc differs from parent's one\n"); + if (elem->prev == NULL) { + if ((elem->parent != NULL) && (elem->parent->children != (xmlNodePtr)elem)) + fprintf(output, "PBM: Elem has no prev and not first of list\n"); + } else { + if (elem->prev->next != (xmlNodePtr) elem) + fprintf(output, "PBM: Elem prev->next : back link wrong\n"); + } + if (elem->next == NULL) { + if ((elem->parent != NULL) && (elem->parent->last != (xmlNodePtr) elem)) + fprintf(output, "PBM: Elem has no next and not last of list\n"); + } else { + if (elem->next->prev != (xmlNodePtr) elem) + fprintf(output, "PBM: Elem next->prev : forward link wrong\n"); + } +} + +void xmlDebugDumpEntityDecl(FILE *output, xmlEntityPtr ent, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (ent->type != XML_ENTITY_DECL) { + fprintf(output, "PBM: not a Entity decl\n"); + return; + } + if (ent->name != NULL) { + fprintf(output, "ENTITYDECL("); + xmlDebugDumpString(output, ent->name); + fprintf(output, ")"); + } else + fprintf(output, "PBM ENTITYDECL noname!!!"); + switch (ent->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + fprintf(output, ", internal\n"); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + fprintf(output, ", external parsed\n"); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + fprintf(output, ", unparsed\n"); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + fprintf(output, ", parameter\n"); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + fprintf(output, ", external parameter\n"); + break; + case XML_INTERNAL_PREDEFINED_ENTITY: + fprintf(output, ", predefined\n"); + break; + } + if (ent->ExternalID) { + fprintf(output, shift); + fprintf(output, " ExternalID=%s\n", ent->ExternalID); + } + if (ent->SystemID) { + fprintf(output, shift); + fprintf(output, " SystemID=%s\n", ent->SystemID); + } + if (ent->URI != NULL) { + fprintf(output, shift); + fprintf(output, " URI=%s\n", ent->URI); + } + if (ent->content) { + fprintf(output, shift); + fprintf(output, " content="); + xmlDebugDumpString(output, ent->content); + fprintf(output, "\n"); + } + + /* + * Do a bit of checking + */ + if (ent->parent == NULL) + fprintf(output, "PBM: Ent has no parent\n"); + if (ent->doc == NULL) + fprintf(output, "PBM: Ent has no doc\n"); + if ((ent->parent != NULL) && (ent->doc != ent->parent->doc)) + fprintf(output, "PBM: Ent doc differs from parent's one\n"); + if (ent->prev == NULL) { + if ((ent->parent != NULL) && (ent->parent->children != (xmlNodePtr)ent)) + fprintf(output, "PBM: Ent has no prev and not first of list\n"); + } else { + if (ent->prev->next != (xmlNodePtr) ent) + fprintf(output, "PBM: Ent prev->next : back link wrong\n"); + } + if (ent->next == NULL) { + if ((ent->parent != NULL) && (ent->parent->last != (xmlNodePtr) ent)) + fprintf(output, "PBM: Ent has no next and not last of list\n"); + } else { + if (ent->next->prev != (xmlNodePtr) ent) + fprintf(output, "PBM: Ent next->prev : forward link wrong\n"); + } +} + +void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + if (ns->type != XML_NAMESPACE_DECL) { + fprintf(output, "invalid namespace node %d\n", ns->type); + return; + } + if (ns->href == NULL) { + if (ns->prefix != NULL) + fprintf(output, "incomplete namespace %s href=NULL\n", ns->prefix); + else + fprintf(output, "incomplete default namespace href=NULL\n"); + } else { + if (ns->prefix != NULL) + fprintf(output, "namespace %s href=", ns->prefix); + else + fprintf(output, "default namespace href="); + + xmlDebugDumpString(output, ns->href); + fprintf(output, "\n"); + } +} + +void xmlDebugDumpNamespaceList(FILE *output, xmlNsPtr ns, int depth) { + while (ns != NULL) { + xmlDebugDumpNamespace(output, ns, depth); + ns = ns->next; + } +} + +void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + switch (ent->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + fprintf(output, "INTERNAL_GENERAL_ENTITY "); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + fprintf(output, "EXTERNAL_GENERAL_PARSED_ENTITY "); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + fprintf(output, "EXTERNAL_GENERAL_UNPARSED_ENTITY "); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + fprintf(output, "INTERNAL_PARAMETER_ENTITY "); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + fprintf(output, "EXTERNAL_PARAMETER_ENTITY "); + break; + default: + fprintf(output, "ENTITY_%d ! ", ent->etype); + } + fprintf(output, "%s\n", ent->name); + if (ent->ExternalID) { + fprintf(output, shift); + fprintf(output, "ExternalID=%s\n", ent->ExternalID); + } + if (ent->SystemID) { + fprintf(output, shift); + fprintf(output, "SystemID=%s\n", ent->SystemID); + } + if (ent->URI) { + fprintf(output, shift); + fprintf(output, "URI=%s\n", ent->URI); + } + if (ent->content) { + fprintf(output, shift); + fprintf(output, "content="); + xmlDebugDumpString(output, ent->content); + fprintf(output, "\n"); + } +} + +void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + fprintf(output, "ATTRIBUTE "); + xmlDebugDumpString(output, attr->name); + fprintf(output, "\n"); + if (attr->children != NULL) + xmlDebugDumpNodeList(output, attr->children, depth + 1); + + /* + * Do a bit of checking + */ + if (attr->parent == NULL) + fprintf(output, "PBM: Attr has no parent\n"); + if (attr->doc == NULL) + fprintf(output, "PBM: Attr has no doc\n"); + if ((attr->parent != NULL) && (attr->doc != attr->parent->doc)) + fprintf(output, "PBM: Attr doc differs from parent's one\n"); + if (attr->prev == NULL) { + if ((attr->parent != NULL) && (attr->parent->properties != attr)) + fprintf(output, "PBM: Attr has no prev and not first of list\n"); + } else { + if (attr->prev->next != attr) + fprintf(output, "PBM: Attr prev->next : back link wrong\n"); + } + if (attr->next != NULL) { + if (attr->next->prev != attr) + fprintf(output, "PBM: Attr next->prev : forward link wrong\n"); + } +} + +void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth) { + while (attr != NULL) { + xmlDebugDumpAttr(output, attr, depth); + attr = attr->next; + } +} + +void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + switch (node->type) { + case XML_ELEMENT_NODE: + fprintf(output, shift); + fprintf(output, "ELEMENT "); + if ((node->ns != NULL) && (node->ns->prefix != NULL)) { + xmlDebugDumpString(output, node->ns->prefix); + fprintf(output, ":"); + } + xmlDebugDumpString(output, node->name); + fprintf(output, "\n"); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, shift); + fprintf(output, "Error, ATTRIBUTE found here\n"); + break; + case XML_TEXT_NODE: + fprintf(output, shift); + fprintf(output, "TEXT\n"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, shift); + fprintf(output, "CDATA_SECTION\n"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, shift); + fprintf(output, "ENTITY_REF(%s)\n", node->name); + break; + case XML_ENTITY_NODE: + fprintf(output, shift); + fprintf(output, "ENTITY\n"); + break; + case XML_PI_NODE: + fprintf(output, shift); + fprintf(output, "PI %s\n", node->name); + break; + case XML_COMMENT_NODE: + fprintf(output, shift); + fprintf(output, "COMMENT\n"); + break; + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + fprintf(output, shift); + fprintf(output, "Error, DOCUMENT found here\n"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, shift); + fprintf(output, "DOCUMENT_TYPE\n"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, shift); + fprintf(output, "DOCUMENT_FRAG\n"); + break; + case XML_NOTATION_NODE: + fprintf(output, shift); + fprintf(output, "NOTATION\n"); + break; + case XML_DTD_NODE: + xmlDebugDumpDtd(output, (xmlDtdPtr) node, depth); + return; + case XML_ELEMENT_DECL: + xmlDebugDumpElemDecl(output, (xmlElementPtr) node, depth); + return; + case XML_ATTRIBUTE_DECL: + xmlDebugDumpAttrDecl(output, (xmlAttributePtr) node, depth); + return; + case XML_ENTITY_DECL: + xmlDebugDumpEntityDecl(output, (xmlEntityPtr) node, depth); + return; + case XML_NAMESPACE_DECL: + xmlDebugDumpNamespace(output, (xmlNsPtr) node, depth); + return; + case XML_XINCLUDE_START: + fprintf(output, shift); + fprintf(output, "INCLUDE START\n"); + return; + case XML_XINCLUDE_END: + fprintf(output, shift); + fprintf(output, "INCLUDE END\n"); + return; + default: + fprintf(output, shift); + fprintf(output, "NODE_%d !!!\n", node->type); + return; + } + if (node->doc == NULL) { + fprintf(output, shift); + fprintf(output, "doc == NULL !!!\n"); + } + if (node->nsDef != NULL) + xmlDebugDumpNamespaceList(output, node->nsDef, depth + 1); + if (node->properties != NULL) + xmlDebugDumpAttrList(output, node->properties, depth + 1); + if (node->type != XML_ENTITY_REF_NODE) { + if (node->content != NULL) { + shift[2 * i] = shift[2 * i + 1] = ' ' ; + shift[2 * i + 2] = shift[2 * i + 3] = 0 ; + fprintf(output, shift); + fprintf(output, "content="); +#ifndef XML_USE_BUFFER_CONTENT + xmlDebugDumpString(output, node->content); +#else + xmlDebugDumpString(output, xmlBufferContent(node->content)); +#endif + fprintf(output, "\n"); + } + } else { + xmlEntityPtr ent; + ent = xmlGetDocEntity(node->doc, node->name); + if (ent != NULL) + xmlDebugDumpEntity(output, ent, depth + 1); + } + /* + * Do a bit of checking + */ + if (node->parent == NULL) + fprintf(output, "PBM: Node has no parent\n"); + if (node->doc == NULL) + fprintf(output, "PBM: Node has no doc\n"); + if ((node->parent != NULL) && (node->doc != node->parent->doc)) + fprintf(output, "PBM: Node doc differs from parent's one\n"); + if (node->prev == NULL) { + if ((node->parent != NULL) && (node->parent->children != node)) + fprintf(output, "PBM: Node has no prev and not first of list\n"); + } else { + if (node->prev->next != node) + fprintf(output, "PBM: Node prev->next : back link wrong\n"); + } + if (node->next == NULL) { + if ((node->parent != NULL) && (node->parent->last != node)) + fprintf(output, "PBM: Node has no next and not last of list\n"); + } else { + if (node->next->prev != node) + fprintf(output, "PBM: Node next->prev : forward link wrong\n"); + } +} + +void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth) { + xmlDebugDumpOneNode(output, node, depth); + if ((node->children != NULL) && (node->type != XML_ENTITY_REF_NODE)) + xmlDebugDumpNodeList(output, node->children, depth + 1); +} + +void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth) { + while (node != NULL) { + xmlDebugDumpNode(output, node, depth); + node = node->next; + } +} + + +void xmlDebugDumpDocumentHead(FILE *output, xmlDocPtr doc) { + if (output == NULL) output = stdout; + if (doc == NULL) { + fprintf(output, "DOCUMENT == NULL !\n"); + return; + } + + switch (doc->type) { + case XML_ELEMENT_NODE: + fprintf(output, "Error, ELEMENT found here "); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, "Error, ATTRIBUTE found here\n"); + break; + case XML_TEXT_NODE: + fprintf(output, "Error, TEXT\n"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, "Error, CDATA_SECTION\n"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, "Error, ENTITY_REF\n"); + break; + case XML_ENTITY_NODE: + fprintf(output, "Error, ENTITY\n"); + break; + case XML_PI_NODE: + fprintf(output, "Error, PI\n"); + break; + case XML_COMMENT_NODE: + fprintf(output, "Error, COMMENT\n"); + break; + case XML_DOCUMENT_NODE: + fprintf(output, "DOCUMENT\n"); + break; + case XML_HTML_DOCUMENT_NODE: + fprintf(output, "HTML DOCUMENT\n"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, "Error, DOCUMENT_TYPE\n"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, "Error, DOCUMENT_FRAG\n"); + break; + case XML_NOTATION_NODE: + fprintf(output, "Error, NOTATION\n"); + break; + default: + fprintf(output, "NODE_%d\n", doc->type); + } + if (doc->name != NULL) { + fprintf(output, "name="); + xmlDebugDumpString(output, BAD_CAST doc->name); + fprintf(output, "\n"); + } + if (doc->version != NULL) { + fprintf(output, "version="); + xmlDebugDumpString(output, doc->version); + fprintf(output, "\n"); + } + if (doc->encoding != NULL) { + fprintf(output, "encoding="); + xmlDebugDumpString(output, doc->encoding); + fprintf(output, "\n"); + } + if (doc->URL != NULL) { + fprintf(output, "URL="); + xmlDebugDumpString(output, doc->URL); + fprintf(output, "\n"); + } + if (doc->standalone) + fprintf(output, "standalone=true\n"); + if (doc->oldNs != NULL) + xmlDebugDumpNamespaceList(output, doc->oldNs, 0); +} + +void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) { + if (output == NULL) output = stdout; + if (doc == NULL) { + fprintf(output, "DOCUMENT == NULL !\n"); + return; + } + xmlDebugDumpDocumentHead(output, doc); + if (((doc->type == XML_DOCUMENT_NODE) || + (doc->type == XML_HTML_DOCUMENT_NODE)) && + (doc->children != NULL)) + xmlDebugDumpNodeList(output, doc->children, 1); +} + +void xmlDebugDumpDTD(FILE *output, xmlDtdPtr dtd) { + if (dtd == NULL) + return; + if (dtd->type != XML_DTD_NODE) { + fprintf(output, "PBM: not a DTD\n"); + return; + } + if (dtd->name != NULL) + fprintf(output, "DTD(%s)", dtd->name); + else + fprintf(output, "DTD"); + if (dtd->ExternalID != NULL) + fprintf(output, ", PUBLIC %s", dtd->ExternalID); + if (dtd->SystemID != NULL) + fprintf(output, ", SYSTEM %s", dtd->SystemID); + fprintf(output, "\n"); + /* + * Do a bit of checking + */ + if ((dtd->parent != NULL) && (dtd->doc != dtd->parent->doc)) + fprintf(output, "PBM: Dtd doc differs from parent's one\n"); + if (dtd->prev == NULL) { + if ((dtd->parent != NULL) && (dtd->parent->children != (xmlNodePtr)dtd)) + fprintf(output, "PBM: Dtd has no prev and not first of list\n"); + } else { + if (dtd->prev->next != (xmlNodePtr) dtd) + fprintf(output, "PBM: Dtd prev->next : back link wrong\n"); + } + if (dtd->next == NULL) { + if ((dtd->parent != NULL) && (dtd->parent->last != (xmlNodePtr) dtd)) + fprintf(output, "PBM: Dtd has no next and not last of list\n"); + } else { + if (dtd->next->prev != (xmlNodePtr) dtd) + fprintf(output, "PBM: Dtd next->prev : forward link wrong\n"); + } + if (dtd->children == NULL) + fprintf(output, " DTD is empty\n"); + else + xmlDebugDumpNodeList(output, dtd->children, 1); +} + +void xmlDebugDumpEntityCallback(xmlEntityPtr cur, FILE *output, + const xmlChar *name) { + fprintf(output, "%s : ", cur->name); + switch (cur->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + fprintf(output, "INTERNAL GENERAL, "); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + fprintf(output, "EXTERNAL PARSED, "); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + fprintf(output, "EXTERNAL UNPARSED, "); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + fprintf(output, "INTERNAL PARAMETER, "); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + fprintf(output, "EXTERNAL PARAMETER, "); + break; + default: + fprintf(output, "UNKNOWN TYPE %d", + cur->etype); + } + if (cur->ExternalID != NULL) + fprintf(output, "ID \"%s\"", cur->ExternalID); + if (cur->SystemID != NULL) + fprintf(output, "SYSTEM \"%s\"", cur->SystemID); + if (cur->orig != NULL) + fprintf(output, "\n orig \"%s\"", cur->orig); + if (cur->content != NULL) + fprintf(output, "\n content \"%s\"", cur->content); + fprintf(output, "\n"); +} + +void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) { + if (output == NULL) output = stdout; + if (doc == NULL) { + fprintf(output, "DOCUMENT == NULL !\n"); + return; + } + + switch (doc->type) { + case XML_ELEMENT_NODE: + fprintf(output, "Error, ELEMENT found here "); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, "Error, ATTRIBUTE found here\n"); + break; + case XML_TEXT_NODE: + fprintf(output, "Error, TEXT\n"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, "Error, CDATA_SECTION\n"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, "Error, ENTITY_REF\n"); + break; + case XML_ENTITY_NODE: + fprintf(output, "Error, ENTITY\n"); + break; + case XML_PI_NODE: + fprintf(output, "Error, PI\n"); + break; + case XML_COMMENT_NODE: + fprintf(output, "Error, COMMENT\n"); + break; + case XML_DOCUMENT_NODE: + fprintf(output, "DOCUMENT\n"); + break; + case XML_HTML_DOCUMENT_NODE: + fprintf(output, "HTML DOCUMENT\n"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, "Error, DOCUMENT_TYPE\n"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, "Error, DOCUMENT_FRAG\n"); + break; + case XML_NOTATION_NODE: + fprintf(output, "Error, NOTATION\n"); + break; + default: + fprintf(output, "NODE_%d\n", doc->type); + } + if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { + xmlEntitiesTablePtr table = (xmlEntitiesTablePtr) + doc->intSubset->entities; + fprintf(output, "Entities in internal subset\n"); + xmlHashScan(table, (xmlHashScanner)xmlDebugDumpEntityCallback, output); + } else + fprintf(output, "No entities in internal subset\n"); + if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { + xmlEntitiesTablePtr table = (xmlEntitiesTablePtr) + doc->extSubset->entities; + fprintf(output, "Entities in external subset\n"); + xmlHashScan(table, (xmlHashScanner)xmlDebugDumpEntityCallback, output); + } else + fprintf(output, "No entities in external subset\n"); +} + +static int xmlLsCountNode(xmlNodePtr node) { + int ret = 0; + xmlNodePtr list = NULL; + + switch (node->type) { + case XML_ELEMENT_NODE: + list = node->children; + break; + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif + list = ((xmlDocPtr) node)->children; + break; + case XML_ATTRIBUTE_NODE: + list = ((xmlAttrPtr) node)->children; + break; + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + if (node->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + ret = xmlStrlen(node->content); +#else + ret = xmlBufferLength(node->content); +#endif + } + break; + case XML_ENTITY_REF_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_ENTITY_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + ret = 1; + break; + } + for (;list != NULL;ret++) + list = list->next; + return(ret); +} + +void xmlLsOneNode(FILE *output, xmlNodePtr node) { + switch (node->type) { + case XML_ELEMENT_NODE: + fprintf(output, "-"); + break; + case XML_ATTRIBUTE_NODE: + fprintf(output, "a"); + break; + case XML_TEXT_NODE: + fprintf(output, "t"); + break; + case XML_CDATA_SECTION_NODE: + fprintf(output, "c"); + break; + case XML_ENTITY_REF_NODE: + fprintf(output, "e"); + break; + case XML_ENTITY_NODE: + fprintf(output, "E"); + break; + case XML_PI_NODE: + fprintf(output, "p"); + break; + case XML_COMMENT_NODE: + fprintf(output, "c"); + break; + case XML_DOCUMENT_NODE: + fprintf(output, "d"); + break; + case XML_HTML_DOCUMENT_NODE: + fprintf(output, "h"); + break; + case XML_DOCUMENT_TYPE_NODE: + fprintf(output, "T"); + break; + case XML_DOCUMENT_FRAG_NODE: + fprintf(output, "F"); + break; + case XML_NOTATION_NODE: + fprintf(output, "N"); + break; + default: + fprintf(output, "?"); + } + if (node->properties != NULL) + fprintf(output, "a"); + else + fprintf(output, "-"); + if (node->nsDef != NULL) + fprintf(output, "n"); + else + fprintf(output, "-"); + + fprintf(output, " %8d ", xmlLsCountNode(node)); + + switch (node->type) { + case XML_ELEMENT_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_ATTRIBUTE_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_TEXT_NODE: + if (node->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + xmlDebugDumpString(output, node->content); +#else + xmlDebugDumpString(output, xmlBufferContent(node->content)); +#endif + } + break; + case XML_CDATA_SECTION_NODE: + break; + case XML_ENTITY_REF_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_ENTITY_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_PI_NODE: + if (node->name != NULL) + fprintf(output, "%s", node->name); + break; + case XML_COMMENT_NODE: + break; + case XML_DOCUMENT_NODE: + break; + case XML_HTML_DOCUMENT_NODE: + break; + case XML_DOCUMENT_TYPE_NODE: + break; + case XML_DOCUMENT_FRAG_NODE: + break; + case XML_NOTATION_NODE: + break; + default: + if (node->name != NULL) + fprintf(output, "%s", node->name); + } + fprintf(output, "\n"); +} + +/**************************************************************** + * * + * The XML shell related functions * + * * + ****************************************************************/ + +/* + * TODO: Improvement/cleanups for the XML shell + * - allow to shell out an editor on a subpart + * - cleanup function registrations (with help) and calling + * - provide registration routines + */ + +/** + * xmlShellList: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "ls" + * Does an Unix like listing of the given node (like a directory) + * + * Returns 0 + */ +int +xmlShellList(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node, + xmlNodePtr node2) { + xmlNodePtr cur; + + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + cur = ((xmlDocPtr) node)->children; + } else if (node->children != NULL) { + cur = node->children; + } else { + xmlLsOneNode(stdout, node); + return(0); + } + while (cur != NULL) { + xmlLsOneNode(stdout, cur); + cur = cur->next; + } + return(0); +} + +/** + * xmlShellDir: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "dir" + * dumps informations about the node (namespace, attributes, content). + * + * Returns 0 + */ +int +xmlShellDir(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node, + xmlNodePtr node2) { + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + xmlDebugDumpDocumentHead(stdout, (xmlDocPtr) node); + } else if (node->type == XML_ATTRIBUTE_NODE) { + xmlDebugDumpAttr(stdout, (xmlAttrPtr) node, 0); + } else { + xmlDebugDumpOneNode(stdout, node, 0); + } + return(0); +} + +/** + * xmlShellCat: + * @ctxt: the shell context + * @arg: unused + * @node: a node + * @node2: unused + * + * Implements the XML shell function "cat" + * dumps the serialization node content (XML or HTML). + * + * Returns 0 + */ +int +xmlShellCat(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node, + xmlNodePtr node2) { + if (ctxt->doc->type == XML_HTML_DOCUMENT_NODE) { +#ifdef LIBXML_HTML_ENABLED + if (node->type == XML_HTML_DOCUMENT_NODE) + htmlDocDump(stdout, (htmlDocPtr) node); + else + htmlNodeDumpFile(stdout, ctxt->doc, node); +#else + if (node->type == XML_DOCUMENT_NODE) + xmlDocDump(stdout, (xmlDocPtr) node); + else + xmlElemDump(stdout, ctxt->doc, node); +#endif /* LIBXML_HTML_ENABLED */ + } else { + if (node->type == XML_DOCUMENT_NODE) + xmlDocDump(stdout, (xmlDocPtr) node); + else + xmlElemDump(stdout, ctxt->doc, node); + } + printf("\n"); + return(0); +} + +/** + * xmlShellLoad: + * @ctxt: the shell context + * @filename: the file name + * @node: unused + * @node2: unused + * + * Implements the XML shell function "load" + * loads a new document specified by the filename + * + * Returns 0 or -1 if loading failed + */ +int +xmlShellLoad(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node, + xmlNodePtr node2) { + xmlDocPtr doc; + int html = 0; + + if (ctxt->doc != NULL) + html = (ctxt->doc->type == XML_HTML_DOCUMENT_NODE); + + if (html) { +#ifdef LIBXML_HTML_ENABLED + doc = htmlParseFile(filename, NULL); +#else + printf("HTML support not compiled in\n"); + doc = NULL; +#endif /* LIBXML_HTML_ENABLED */ + } else { + doc = xmlParseFile(filename); + } + if (doc != NULL) { + if (ctxt->loaded == 1) { + xmlFreeDoc(ctxt->doc); + } + ctxt->loaded = 1; +#ifdef LIBXML_XPATH_ENABLED + xmlXPathFreeContext(ctxt->pctxt); +#endif /* LIBXML_XPATH_ENABLED */ + xmlFree(ctxt->filename); + ctxt->doc = doc; + ctxt->node = (xmlNodePtr) doc; +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt = xmlXPathNewContext(doc); +#endif /* LIBXML_XPATH_ENABLED */ + ctxt->filename = (char *) xmlStrdup((xmlChar *) filename); + } else + return(-1); + return(0); +} + +/** + * xmlShellWrite: + * @ctxt: the shell context + * @filename: the file name + * @node: a node in the tree + * @node2: unused + * + * Implements the XML shell function "write" + * Write the current node to the filename, it saves the serailization + * of the subtree under the @node specified + * + * Returns 0 or -1 in case of error + */ +int +xmlShellWrite(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node, + xmlNodePtr node2) { + if (node == NULL) + return(-1); + if ((filename == NULL) || (filename[0] == 0)) { + xmlGenericError(xmlGenericErrorContext, + "Write command requires a filename argument\n"); + return(-1); + } +#ifdef W_OK + if (access((char *) filename, W_OK)) { + xmlGenericError(xmlGenericErrorContext, + "Cannot write to %s\n", filename); + return(-1); + } +#endif + switch(node->type) { + case XML_DOCUMENT_NODE: + if (xmlSaveFile((char *) filename, ctxt->doc) < -1) { + xmlGenericError(xmlGenericErrorContext, + "Failed to write to %s\n", filename); + return(-1); + } + break; + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_HTML_ENABLED + if (htmlSaveFile((char *) filename, ctxt->doc) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to write to %s\n", filename); + return(-1); + } +#else + if (xmlSaveFile((char *) filename, ctxt->doc) < -1) { + xmlGenericError(xmlGenericErrorContext, + "Failed to write to %s\n", filename); + return(-1); + } +#endif /* LIBXML_HTML_ENABLED */ + break; + default: { + FILE *f; + + f = fopen((char *) filename, "w"); + if (f == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Failed to write to %s\n", filename); + return(-1); + } + xmlElemDump(f, ctxt->doc, node); + fclose(f); + } + } + return(0); +} + +/** + * xmlShellSave: + * @ctxt: the shell context + * @filename: the file name (optionnal) + * @node: unused + * @node2: unused + * + * Implements the XML shell function "save" + * Write the current document to the filename, or it's original name + * + * Returns 0 or -1 in case of error + */ +int +xmlShellSave(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node, + xmlNodePtr node2) { + if (ctxt->doc == NULL) + return(-1); + if ((filename == NULL) || (filename[0] == 0)) + filename = ctxt->filename; +#ifdef W_OK + if (access((char *) filename, W_OK)) { + xmlGenericError(xmlGenericErrorContext, + "Cannot save to %s\n", filename); + return(-1); + } +#endif + switch(ctxt->doc->type) { + case XML_DOCUMENT_NODE: + if (xmlSaveFile((char *) filename, ctxt->doc) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to save to %s\n", filename); + } + break; + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_HTML_ENABLED + if (htmlSaveFile((char *) filename, ctxt->doc) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to save to %s\n", filename); + } +#else + if (xmlSaveFile((char *) filename, ctxt->doc) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to save to %s\n", filename); + } +#endif /* LIBXML_HTML_ENABLED */ + break; + default: + xmlGenericError(xmlGenericErrorContext, + "To save to subparts of a document use the 'write' command\n"); + return(-1); + + } + return(0); +} + +/** + * xmlShellValidate: + * @ctxt: the shell context + * @dtd: the DTD URI (optionnal) + * @node: unused + * @node2: unused + * + * Implements the XML shell function "validate" + * Validate the document, if a DTD path is provided, then the validation + * is done against the given DTD. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellValidate(xmlShellCtxtPtr ctxt, char *dtd, xmlNodePtr node, + xmlNodePtr node2) { + xmlValidCtxt vctxt; + int res = -1; + + vctxt.userData = stderr; + vctxt.error = (xmlValidityErrorFunc) fprintf; + vctxt.warning = (xmlValidityWarningFunc) fprintf; + + if ((dtd == NULL) || (dtd[0] == 0)) { + res = xmlValidateDocument(&vctxt, ctxt->doc); + } else { + xmlDtdPtr subset; + + subset = xmlParseDTD(NULL, (xmlChar *) dtd); + if (subset != NULL) { + res = xmlValidateDtd(&vctxt, ctxt->doc, subset); + + xmlFreeDtd(subset); + } + } + return(res); +} + +/** + * xmlShellDu: + * @ctxt: the shell context + * @arg: unused + * @tree: a node defining a subtree + * @node2: unused + * + * Implements the XML shell function "du" + * show the structure of the subtree under node @tree + * If @tree is null, the command works on the current node. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellDu(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr tree, + xmlNodePtr node2) { + xmlNodePtr node; + int indent = 0,i; + + if (tree == NULL) return(-1); + node = tree; + while (node != NULL) { + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + printf("/\n"); + } else if (node->type == XML_ELEMENT_NODE) { + for (i = 0;i < indent;i++) + printf(" "); + printf("%s\n", node->name); + } else { + } + + /* + * Browse the full subtree, deep first + */ + + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + node = ((xmlDocPtr) node)->children; + } else if ((node->children != NULL) && (node->type != XML_ENTITY_REF_NODE)) { + /* deep first */ + node = node->children; + indent++; + } else if ((node != tree) && (node->next != NULL)) { + /* then siblings */ + node = node->next; + } else if (node != tree) { + /* go up to parents->next if needed */ + while (node != tree) { + if (node->parent != NULL) { + node = node->parent; + indent--; + } + if ((node != tree) && (node->next != NULL)) { + node = node->next; + break; + } + if (node->parent == NULL) { + node = NULL; + break; + } + if (node == tree) { + node = NULL; + break; + } + } + /* exit condition */ + if (node == tree) + node = NULL; + } else + node = NULL; + } + return(0); +} + +/** + * xmlShellPwd: + * @ctxt: the shell context + * @buffer: the output buffer + * @tree: a node + * @node2: unused + * + * Implements the XML shell function "pwd" + * Show the full path from the root to the node, if needed building + * thumblers when similar elements exists at a given ancestor level. + * The output is compatible with XPath commands. + * + * Returns 0 or -1 in case of error + */ +int +xmlShellPwd(xmlShellCtxtPtr ctxt, char *buffer, xmlNodePtr node, + xmlNodePtr node2) { + xmlNodePtr cur, tmp, next; + char buf[500]; + char sep; + const char *name; + int occur = 0; + + buffer[0] = 0; + if (node == NULL) return(-1); + cur = node; + do { + name = ""; + sep= '?'; + occur = 0; + if ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + sep = '/'; + next = NULL; + } else if (cur->type == XML_ELEMENT_NODE) { + sep = '/'; + name = (const char *)cur->name; + next = cur->parent; + + /* + * Thumbler index computation + */ + tmp = cur->prev; + while (tmp != NULL) { + if (xmlStrEqual(cur->name, tmp->name)) + occur++; + tmp = tmp->prev; + } + if (occur == 0) { + tmp = cur->next; + while (tmp != NULL) { + if (xmlStrEqual(cur->name, tmp->name)) + occur++; + tmp = tmp->next; + } + if (occur != 0) occur = 1; + } else + occur++; + } else if (cur->type == XML_ATTRIBUTE_NODE) { + sep = '@'; + name = (const char *) (((xmlAttrPtr) cur)->name); + next = ((xmlAttrPtr) cur)->parent; + } else { + next = cur->parent; + } + if (occur == 0) +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "%c%s%s", sep, name, buffer); +#else + sprintf(buf, "%c%s%s", sep, name, buffer); +#endif + else +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "%c%s[%d]%s", + sep, name, occur, buffer); +#else + sprintf(buf, "%c%s[%d]%s", sep, name, occur, buffer); +#endif + buf[sizeof(buf) - 1] = 0; + /* + * This test prevents buffer overflow, because this routine + * is only called by xmlShell, in which the second argument is + * 500 chars long. + * It is a dirty hack before a cleaner solution is found. + * Documentation should mention that the second argument must + * be at least 500 chars long, and could be stripped if too long. + */ + if (strlen(buffer) + strlen(buf) > 499) + break; + strcpy(buffer, buf); + cur = next; + } while (cur != NULL); + return(0); +} + +/** + * xmlShell + * @doc: the initial document + * @filename: the output buffer + * @input: the line reading function + * @output: the output FILE* + * + * Implements the XML shell + * This allow to load, validate, view, modify and save a document + * using a environment similar to a UNIX commandline. + */ +void +xmlShell(xmlDocPtr doc, char *filename, xmlShellReadlineFunc input, + FILE *output) { + char prompt[500] = "/ > "; + char *cmdline = NULL, *cur; + int nbargs; + char command[100]; + char arg[400]; + int i; + xmlShellCtxtPtr ctxt; + xmlXPathObjectPtr list; + + if (doc == NULL) + return; + if (filename == NULL) + return; + if (input == NULL) + return; + if (output == NULL) + return; + ctxt = (xmlShellCtxtPtr) xmlMalloc(sizeof(xmlShellCtxt)); + if (ctxt == NULL) + return; + ctxt->loaded = 0; + ctxt->doc = doc; + ctxt->input = input; + ctxt->output = output; + ctxt->filename = (char *) xmlStrdup((xmlChar *) filename); + ctxt->node = (xmlNodePtr) ctxt->doc; + +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt = xmlXPathNewContext(ctxt->doc); + if (ctxt->pctxt == NULL) { + xmlFree(ctxt); + return; + } +#endif /* LIBXML_XPATH_ENABLED */ + while (1) { + if (ctxt->node == (xmlNodePtr) ctxt->doc) + sprintf(prompt, "%s > ", "/"); + else if (ctxt->node->name) +#ifdef HAVE_SNPRINTF + snprintf(prompt, sizeof(prompt), "%s > ", ctxt->node->name); +#else + sprintf(prompt, "%s > ", ctxt->node->name); +#endif + else + sprintf(prompt, "? > "); + prompt[sizeof(prompt) - 1] = 0; + + /* + * Get a new command line + */ + cmdline = ctxt->input(prompt); + if (cmdline == NULL) break; + + /* + * Parse the command itself + */ + cur = cmdline; + while ((*cur == ' ') || (*cur == '\t')) cur++; + i = 0; + while ((*cur != ' ') && (*cur != '\t') && + (*cur != '\n') && (*cur != '\r')) { + if (*cur == 0) + break; + command[i++] = *cur++; + } + command[i] = 0; + if (i == 0) continue; + nbargs++; + + /* + * Parse the argument + */ + while ((*cur == ' ') || (*cur == '\t')) cur++; + i = 0; + while ((*cur != '\n') && (*cur != '\r') && (*cur != 0)) { + if (*cur == 0) + break; + arg[i++] = *cur++; + } + arg[i] = 0; + if (i != 0) + nbargs++; + + /* + * start interpreting the command + */ + if (!strcmp(command, "exit")) + break; + if (!strcmp(command, "quit")) + break; + if (!strcmp(command, "bye")) + break; + if (!strcmp(command, "validate")) { + xmlShellValidate(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "load")) { + xmlShellLoad(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "save")) { + xmlShellSave(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "write")) { + xmlShellWrite(ctxt, arg, NULL, NULL); + } else if (!strcmp(command, "free")) { + if (arg[0] == 0) { + xmlMemShow(stdout, 0); + } else { + int len = 0; + sscanf(arg, "%d", &len); + xmlMemShow(stdout, len); + } + } else if (!strcmp(command, "pwd")) { + char dir[500]; + if (!xmlShellPwd(ctxt, dir, ctxt->node, NULL)) + printf("%s\n", dir); + } else if (!strcmp(command, "du")) { + xmlShellDu(ctxt, NULL, ctxt->node, NULL); + } else if ((!strcmp(command, "ls")) || + (!strcmp(command, "dir"))) { + int dir = (!strcmp(command, "dir")); + if (arg[0] == 0) { + if (dir) + xmlShellDir(ctxt, NULL, ctxt->node, NULL); + else + xmlShellList(ctxt, NULL, ctxt->node, NULL); + } else { + ctxt->pctxt->node = ctxt->node; +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt->node = ctxt->node; + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); +#else + list = NULL; +#endif /* LIBXML_XPATH_ENABLED */ + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + break; + case XPATH_NODESET: { + int i; + + for (i = 0;i < list->nodesetval->nodeNr;i++) { + if (dir) + xmlShellDir(ctxt, NULL, + list->nodesetval->nodeTab[i], NULL); + else + xmlShellList(ctxt, NULL, + list->nodesetval->nodeTab[i], NULL); + } + break; + } + case XPATH_BOOLEAN: + xmlGenericError(xmlGenericErrorContext, + "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + xmlGenericError(xmlGenericErrorContext, + "%s is a number\n", arg); + break; + case XPATH_STRING: + xmlGenericError(xmlGenericErrorContext, + "%s is a string\n", arg); + break; + case XPATH_POINT: + xmlGenericError(xmlGenericErrorContext, + "%s is a point\n", arg); + break; + case XPATH_RANGE: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_LOCATIONSET: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_USERS: + xmlGenericError(xmlGenericErrorContext, + "%s is user-defined\n", arg); + break; + case XPATH_XSLT_TREE: + xmlGenericError(xmlGenericErrorContext, + "%s is an XSLT value tree\n", arg); + break; + } + xmlXPathFreeNodeSetList(list); + } else { + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + } + ctxt->pctxt->node = NULL; + } + } else if (!strcmp(command, "cd")) { + if (arg[0] == 0) { + ctxt->node = (xmlNodePtr) ctxt->doc; + } else { +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt->node = ctxt->node; + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); +#else + list = NULL; +#endif /* LIBXML_XPATH_ENABLED */ + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + break; + case XPATH_NODESET: + if (list->nodesetval->nodeNr == 1) { + ctxt->node = list->nodesetval->nodeTab[0]; + } else + xmlGenericError(xmlGenericErrorContext, + "%s is a %d Node Set\n", + arg, list->nodesetval->nodeNr); + break; + case XPATH_BOOLEAN: + xmlGenericError(xmlGenericErrorContext, + "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + xmlGenericError(xmlGenericErrorContext, + "%s is a number\n", arg); + break; + case XPATH_STRING: + xmlGenericError(xmlGenericErrorContext, + "%s is a string\n", arg); + break; + case XPATH_POINT: + xmlGenericError(xmlGenericErrorContext, + "%s is a point\n", arg); + break; + case XPATH_RANGE: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_LOCATIONSET: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_USERS: + xmlGenericError(xmlGenericErrorContext, + "%s is user-defined\n", arg); + break; + case XPATH_XSLT_TREE: + xmlGenericError(xmlGenericErrorContext, + "%s is an XSLT value tree\n", arg); + break; + } + xmlXPathFreeNodeSetList(list); + } else { + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + } + ctxt->pctxt->node = NULL; + } + } else if (!strcmp(command, "cat")) { + if (arg[0] == 0) { + xmlShellCat(ctxt, NULL, ctxt->node, NULL); + } else { + ctxt->pctxt->node = ctxt->node; +#ifdef LIBXML_XPATH_ENABLED + ctxt->pctxt->node = ctxt->node; + list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt); +#else + list = NULL; +#endif /* LIBXML_XPATH_ENABLED */ + if (list != NULL) { + switch (list->type) { + case XPATH_UNDEFINED: + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + break; + case XPATH_NODESET: { + int i; + + for (i = 0;i < list->nodesetval->nodeNr;i++) { + if (i > 0) printf(" -------\n"); + xmlShellCat(ctxt, NULL, + list->nodesetval->nodeTab[i], NULL); + } + break; + } + case XPATH_BOOLEAN: + xmlGenericError(xmlGenericErrorContext, + "%s is a Boolean\n", arg); + break; + case XPATH_NUMBER: + xmlGenericError(xmlGenericErrorContext, + "%s is a number\n", arg); + break; + case XPATH_STRING: + xmlGenericError(xmlGenericErrorContext, + "%s is a string\n", arg); + break; + case XPATH_POINT: + xmlGenericError(xmlGenericErrorContext, + "%s is a point\n", arg); + break; + case XPATH_RANGE: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_LOCATIONSET: + xmlGenericError(xmlGenericErrorContext, + "%s is a range\n", arg); + break; + case XPATH_USERS: + xmlGenericError(xmlGenericErrorContext, + "%s is user-defined\n", arg); + break; + case XPATH_XSLT_TREE: + xmlGenericError(xmlGenericErrorContext, + "%s is an XSLT value tree\n", arg); + break; + } + xmlXPathFreeNodeSetList(list); + } else { + xmlGenericError(xmlGenericErrorContext, + "%s: no such node\n", arg); + } + ctxt->pctxt->node = NULL; + } + } else { + xmlGenericError(xmlGenericErrorContext, + "Unknown command %s\n", command); + } + free(cmdline); /* not xmlFree here ! */ + } +#ifdef LIBXML_XPATH_ENABLED + xmlXPathFreeContext(ctxt->pctxt); +#endif /* LIBXML_XPATH_ENABLED */ + if (ctxt->loaded) { + xmlFreeDoc(ctxt->doc); + } + xmlFree(ctxt); + if (cmdline != NULL) + free(cmdline); /* not xmlFree here ! */ +} + +#endif /* LIBXML_DEBUG_ENABLED */ diff --git a/debugXML.h b/debugXML.h new file mode 100644 index 00000000..4a55fa8d --- /dev/null +++ b/debugXML.h @@ -0,0 +1,113 @@ +/* + * debugXML.h : Interfaces to a set of routines used for debugging the tree + * produced by the XML parser. + * + * Daniel Veillard + */ + +#ifndef __DEBUG_XML__ +#define __DEBUG_XML__ +#include +#include + +#ifdef LIBXML_DEBUG_ENABLED + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The standard Dump routines + */ +void xmlDebugDumpString (FILE *output, + const xmlChar *str); +void xmlDebugDumpAttr (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpAttrList (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpOneNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNodeList (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpDocumentHead(FILE *output, + xmlDocPtr doc); +void xmlDebugDumpDocument (FILE *output, + xmlDocPtr doc); +void xmlDebugDumpDTD (FILE *output, + xmlDtdPtr doc); +void xmlDebugDumpEntities (FILE *output, + xmlDocPtr doc); +void xmlLsOneNode (FILE *output, + xmlNodePtr node); + +/**************************************************************** + * * + * The XML shell related structures and functions * + * * + ****************************************************************/ + +/** + * xmlShellReadlineFunc: + * @prompt: a string prompt + * + * This is a generic signature for the XML shell input function + * + * Returns a string which will be freed by the Shell + */ +typedef char * (* xmlShellReadlineFunc)(char *prompt); + +/* + * The shell context itself + * TODO: add the defined function tables. + */ +typedef struct _xmlShellCtxt xmlShellCtxt; +typedef xmlShellCtxt *xmlShellCtxtPtr; +struct _xmlShellCtxt { + char *filename; + xmlDocPtr doc; + xmlNodePtr node; + xmlXPathContextPtr pctxt; + int loaded; + FILE *output; + xmlShellReadlineFunc input; +}; + +/** + * xmlShellCmd: + * @ctxt: a shell context + * @arg: a string argument + * @node: a first node + * @node2: a second node + * + * This is a generic signature for the XML shell functions + * + * Returns an int, negative returns indicating errors + */ +typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); + +/* + * The Shell interface. + */ +void xmlShell (xmlDocPtr doc, + char *filename, + xmlShellReadlineFunc input, + FILE *output); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_DEBUG_ENABLED */ +#endif /* __DEBUG_XML__ */ diff --git a/encoding.c b/encoding.c new file mode 100644 index 00000000..fab241e6 --- /dev/null +++ b/encoding.c @@ -0,0 +1,2078 @@ +/* + * encoding.c : implements the encoding conversion functions needed for XML + * + * Related specs: + * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies + * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau + * [ISO-10646] UTF-8 and UTF-16 in Annexes + * [ISO-8859-1] ISO Latin-1 characters codes. + * [UNICODE] The Unicode Consortium, "The Unicode Standard -- + * Worldwide Character Encoding -- Version 1.0", Addison- + * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is + * described in Unicode Technical Report #4. + * [US-ASCII] Coded Character Set--7-bit American Standard Code for + * Information Interchange, ANSI X3.4-1986. + * + * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include + +#ifdef HAVE_CTYPE_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#include +#ifdef LIBXML_ICONV_ENABLED +#ifdef HAVE_ERRNO_H +#include +#endif +#endif +#include +#include +#ifdef LIBXML_HTML_ENABLED +#include +#endif +#include + +xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; +xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; + +typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; +typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; +struct _xmlCharEncodingAlias { + const char *name; + const char *alias; +}; + +static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; +static int xmlCharEncodingAliasesNb = 0; +static int xmlCharEncodingAliasesMax = 0; + +#ifdef LIBXML_ICONV_ENABLED +#if 0 +#define DEBUG_ENCODING /* Define this to get encoding traces */ +#endif +#endif + +static int xmlLittleEndian = 1; + +/* + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * I hope we won't use values > 0xFFFF anytime soon ! + */ + +/** + * xmlGetUTF8Char: + * @utf: a sequence of UTF-8 encoded bytes + * @len: a pointer to @bytes len + * + * Read one UTF8 Char from @utf + * + * Returns the char value or -1 in case of error and update @len with the + * number of bytes used + */ +int +xmlGetUTF8Char(const unsigned char *utf, int *len) { + unsigned int c; + + if (utf == NULL) + goto error; + if (len == NULL) + goto error; + if (*len < 1) + goto error; + + c = utf[0]; + if (c & 0x80) { + if (*len < 2) + goto error; + if ((utf[1] & 0xc0) != 0x80) + goto error; + if ((c & 0xe0) == 0xe0) { + if (*len < 3) + goto error; + if ((utf[2] & 0xc0) != 0x80) + goto error; + if ((c & 0xf0) == 0xf0) { + if (*len < 4) + goto error; + if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80) + goto error; + *len = 4; + /* 4-byte code */ + c = (utf[0] & 0x7) << 18; + c |= (utf[1] & 0x3f) << 12; + c |= (utf[2] & 0x3f) << 6; + c |= utf[3] & 0x3f; + } else { + /* 3-byte code */ + *len = 3; + c = (utf[0] & 0xf) << 12; + c |= (utf[1] & 0x3f) << 6; + c |= utf[2] & 0x3f; + } + } else { + /* 2-byte code */ + *len = 2; + c = (utf[0] & 0x1f) << 6; + c |= utf[1] & 0x3f; + } + } else { + /* 1-byte code */ + *len = 1; + } + return(c); + +error: + *len = 0; + return(-1); +} + +/** + * xmlCheckUTF8: Check utf-8 string for legality. + * @utf: Pointer to putative utf-8 encoded string. + * + * Checks @utf for being valid utf-8. @utf is assumed to be + * null-terminated. This function is not super-strict, as it will + * allow longer utf-8 sequences than necessary. Note that Java is + * capable of producing these sequences if provoked. Also note, this + * routine checks for the 4-byte maxiumum size, but does not check for + * 0x10ffff maximum value. + * + * Return value: true if @utf is valid. + **/ +int +xmlCheckUTF8(const unsigned char *utf) +{ + int ix; + unsigned char c; + + for (ix = 0; (c = utf[ix]);) { + if (c & 0x80) { + if ((utf[ix + 1] & 0xc0) != 0x80) + return(0); + if ((c & 0xe0) == 0xe0) { + if ((utf[ix + 2] & 0xc0) != 0x80) + return(0); + if ((c & 0xf0) == 0xf0) { + if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80) + return(0); + ix += 4; + /* 4-byte code */ + } else + /* 3-byte code */ + ix += 3; + } else + /* 2-byte code */ + ix += 2; + } else + /* 1-byte code */ + ix++; + } + return(1); +} + +/** + * asciiToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of ASCII chars + * @inlen: the length of @in + * + * Take a block of ASCII chars in and try to convert it to an UTF-8 + * block of chars out. + * Returns 0 if success, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +int +asciiToUTF8(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + unsigned char* outstart = out; + const unsigned char* base = in; + const unsigned char* processed = in; + unsigned char* outend = out + *outlen; + const unsigned char* inend; + unsigned int c; + int bits; + + inend = in + (*inlen); + while ((in < inend) && (out - outstart + 5 < *outlen)) { + c= *in++; + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else { + *outlen = out - outstart; + *inlen = processed - base; + return(-1); + } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlen = processed - base; + return(0); +} + +/** + * UTF8Toascii: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an ASCII + * block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +int +UTF8Toascii(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + const unsigned char* processed = in; + const unsigned char* outend; + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend; + unsigned int c, d; + int trailing; + + if (in == NULL) { + /* + * initialization nothing to do + */ + *outlen = 0; + *inlen = 0; + return(0); + } + inend = in + (*inlen); + outend = out + (*outlen); + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) + break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x80) { + if (out >= outend) + break; + *out++ = c; + } else { + /* no chance for this in Ascii */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + +/** + * isolat1ToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of ISO Latin 1 chars + * @inlen: the length of @in + * + * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 + * block of chars out. + * Returns 0 if success, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +int +isolat1ToUTF8(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + unsigned char* outstart = out; + const unsigned char* base = in; + const unsigned char* processed = in; + unsigned char* outend = out + *outlen; + const unsigned char* inend; + unsigned int c; + int bits; + + inend = in + (*inlen); + while ((in < inend) && (out - outstart + 5 < *outlen)) { + c= *in++; + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlen = processed - base; + return(0); +} + +/** + * UTF8Toisolat1: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 + * block of chars out. + * + * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +int +UTF8Toisolat1(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen) { + const unsigned char* processed = in; + const unsigned char* outend; + const unsigned char* outstart = out; + const unsigned char* instart = in; + const unsigned char* inend; + unsigned int c, d; + int trailing; + + if (in == NULL) { + /* + * initialization nothing to do + */ + *outlen = 0; + *inlen = 0; + return(0); + } + inend = in + (*inlen); + outend = out + (*outlen); + while (in < inend) { + d = *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in IsoLat1 */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if (in >= inend) + break; + if (((d= *in++) & 0xC0) != 0x80) { + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c <= 0xFF) { + if (out >= outend) + break; + *out++ = c; + } else { + /* no chance for this in IsoLat1 */ + *outlen = out - outstart; + *inlen = processed - instart; + return(-2); + } + processed = in; + } + *outlen = out - outstart; + *inlen = processed - instart; + return(0); +} + +/** + * UTF16LEToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @inb: a pointer to an array of UTF-16LE passwd as a byte array + * @inlenb: the length of @in in UTF-16LE chars + * + * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 + * block of chars out. This function assume the endian properity + * is the same between the native type of this machine and the + * inputed one. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding fails (for *in is not valid utf16 string) + * The value of *inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + */ +int +UTF16LEToUTF8(unsigned char* out, int *outlen, + const unsigned char* inb, int *inlenb) +{ + unsigned char* outstart = out; + const unsigned char* processed = inb; + unsigned char* outend = out + *outlen; + unsigned short* in = (unsigned short*) inb; + unsigned short* inend; + unsigned int c, d, inlen; + unsigned char *tmp; + int bits; + + if ((*inlenb % 2) == 1) + (*inlenb)--; + inlen = *inlenb / 2; + inend = in + inlen; + while ((in < inend) && (out - outstart + 5 < *outlen)) { + if (xmlLittleEndian) { + c= *in++; + } else { + tmp = (unsigned char *) in; + c = *tmp++; + c = c | (((unsigned int)*tmp) << 8); + in++; + } + if ((c & 0xFC00) == 0xD800) { /* surrogates */ + if (in >= inend) { /* (in > inend) shouldn't happens */ + break; + } + if (xmlLittleEndian) { + d = *in++; + } else { + tmp = (unsigned char *) in; + d = *tmp++; + d = d | (((unsigned int)*tmp) << 8); + in++; + } + if ((d & 0xFC00) == 0xDC00) { + c &= 0x03FF; + c <<= 10; + c |= d & 0x03FF; + c += 0x10000; + } + else { + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + } + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlenb = processed - inb; + return(0); +} + +/** + * UTF8ToUTF16LE: + * @outb: a pointer to an array of bytes to store the result + * @outlen: the length of @outb + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE + * block of chars out. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + */ +int +UTF8ToUTF16LE(unsigned char* outb, int *outlen, + const unsigned char* in, int *inlen) +{ + unsigned short* out = (unsigned short*) outb; + const unsigned char* processed = in; + unsigned short* outstart= out; + unsigned short* outend; + const unsigned char* inend= in+*inlen; + unsigned int c, d; + int trailing; + unsigned char *tmp; + unsigned short tmp1, tmp2; + + if (in == NULL) { + /* + * initialization, add the Byte Order Mark + */ + if (*outlen >= 2) { + outb[0] = 0xFF; + outb[1] = 0xFE; + *outlen = 2; + *inlen = 0; +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Added FFFE Byte Order Mark\n"); +#endif + return(2); + } + *outlen = 0; + *inlen = 0; + return(0); + } + outend = out + (*outlen / 2); + while (in < inend) { + d= *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = (out - outstart) * 2; + *inlen = processed - in; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in UTF-16 */ + *outlen = (out - outstart) * 2; + *inlen = processed - in; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) + break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x10000) { + if (out >= outend) + break; + if (xmlLittleEndian) { + *out++ = c; + } else { + tmp = (unsigned char *) out; + *tmp = c ; + *(tmp + 1) = c >> 8 ; + out++; + } + } + else if (c < 0x110000) { + if (out+1 >= outend) + break; + c -= 0x10000; + if (xmlLittleEndian) { + *out++ = 0xD800 | (c >> 10); + *out++ = 0xDC00 | (c & 0x03FF); + } else { + tmp1 = 0xD800 | (c >> 10); + tmp = (unsigned char *) out; + *tmp = (unsigned char) tmp1; + *(tmp + 1) = tmp1 >> 8; + out++; + + tmp2 = 0xDC00 | (c & 0x03FF); + tmp = (unsigned char *) out; + *tmp = (unsigned char) tmp2; + *(tmp + 1) = tmp2 >> 8; + out++; + } + } + else + break; + processed = in; + } + *outlen = (out - outstart) * 2; + *inlen = processed - in; + return(0); +} + +/** + * UTF16BEToUTF8: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @inb: a pointer to an array of UTF-16 passwd as a byte array + * @inlenb: the length of @in in UTF-16 chars + * + * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 + * block of chars out. This function assume the endian properity + * is the same between the native type of this machine and the + * inputed one. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding fails (for *in is not valid utf16 string) + * The value of *inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + */ +int +UTF16BEToUTF8(unsigned char* out, int *outlen, + const unsigned char* inb, int *inlenb) +{ + unsigned char* outstart = out; + const unsigned char* processed = inb; + unsigned char* outend = out + *outlen; + unsigned short* in = (unsigned short*) inb; + unsigned short* inend; + unsigned int c, d, inlen; + unsigned char *tmp; + int bits; + + if ((*inlenb % 2) == 1) + (*inlenb)--; + inlen = *inlenb / 2; + inend= in + inlen; + while (in < inend) { + if (xmlLittleEndian) { + tmp = (unsigned char *) in; + c = *tmp++; + c = c << 8; + c = c | (unsigned int) *tmp; + in++; + } else { + c= *in++; + } + if ((c & 0xFC00) == 0xD800) { /* surrogates */ + if (in >= inend) { /* (in > inend) shouldn't happens */ + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + if (xmlLittleEndian) { + tmp = (unsigned char *) in; + d = *tmp++; + d = d << 8; + d = d | (unsigned int) *tmp; + in++; + } else { + d= *in++; + } + if ((d & 0xFC00) == 0xDC00) { + c &= 0x03FF; + c <<= 10; + c |= d & 0x03FF; + c += 0x10000; + } + else { + *outlen = out - outstart; + *inlenb = processed - inb; + return(-2); + } + } + + /* assertion: c is a single UTF-4 value */ + if (out >= outend) + break; + if (c < 0x80) { *out++= c; bits= -6; } + else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } + else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } + + for ( ; bits >= 0; bits-= 6) { + if (out >= outend) + break; + *out++= ((c >> bits) & 0x3F) | 0x80; + } + processed = (const unsigned char*) in; + } + *outlen = out - outstart; + *inlenb = processed - inb; + return(0); +} + +/** + * UTF8ToUTF16BE: + * @outb: a pointer to an array of bytes to store the result + * @outlen: the length of @outb + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE + * block of chars out. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + */ +int +UTF8ToUTF16BE(unsigned char* outb, int *outlen, + const unsigned char* in, int *inlen) +{ + unsigned short* out = (unsigned short*) outb; + const unsigned char* processed = in; + unsigned short* outstart= out; + unsigned short* outend; + const unsigned char* inend= in+*inlen; + unsigned int c, d; + int trailing; + unsigned char *tmp; + unsigned short tmp1, tmp2; + + if (in == NULL) { + /* + * initialization, add the Byte Order Mark + */ + if (*outlen >= 2) { + outb[0] = 0xFE; + outb[1] = 0xFF; + *outlen = 2; + *inlen = 0; +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Added FEFF Byte Order Mark\n"); +#endif + return(2); + } + *outlen = 0; + *inlen = 0; + return(0); + } + outend = out + (*outlen / 2); + while (in < inend) { + d= *in++; + if (d < 0x80) { c= d; trailing= 0; } + else if (d < 0xC0) { + /* trailing byte in leading position */ + *outlen = out - outstart; + *inlen = processed - in; + return(-2); + } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } + else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } + else if (d < 0xF8) { c= d & 0x07; trailing= 3; } + else { + /* no chance for this in UTF-16 */ + *outlen = out - outstart; + *inlen = processed - in; + return(-2); + } + + if (inend - in < trailing) { + break; + } + + for ( ; trailing; trailing--) { + if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; + c <<= 6; + c |= d & 0x3F; + } + + /* assertion: c is a single UTF-4 value */ + if (c < 0x10000) { + if (out >= outend) break; + if (xmlLittleEndian) { + tmp = (unsigned char *) out; + *tmp = c >> 8; + *(tmp + 1) = c; + out++; + } else { + *out++ = c; + } + } + else if (c < 0x110000) { + if (out+1 >= outend) break; + c -= 0x10000; + if (xmlLittleEndian) { + tmp1 = 0xD800 | (c >> 10); + tmp = (unsigned char *) out; + *tmp = tmp1 >> 8; + *(tmp + 1) = (unsigned char) tmp1; + out++; + + tmp2 = 0xDC00 | (c & 0x03FF); + tmp = (unsigned char *) out; + *tmp = tmp2 >> 8; + *(tmp + 1) = (unsigned char) tmp2; + out++; + } else { + *out++ = 0xD800 | (c >> 10); + *out++ = 0xDC00 | (c & 0x03FF); + } + } + else + break; + processed = in; + } + *outlen = (out - outstart) * 2; + *inlen = processed - in; + return(0); +} + +/** + * xmlDetectCharEncoding: + * @in: a pointer to the first bytes of the XML entity, must be at least + * 4 bytes long. + * @len: pointer to the length of the buffer + * + * Guess the encoding of the entity using the first bytes of the entity content + * accordingly of the non-normative appendix F of the XML-1.0 recommendation. + * + * Returns one of the XML_CHAR_ENCODING_... values. + */ +xmlCharEncoding +xmlDetectCharEncoding(const unsigned char* in, int len) +{ + if (len >= 4) { + if ((in[0] == 0x00) && (in[1] == 0x00) && + (in[2] == 0x00) && (in[3] == 0x3C)) + return(XML_CHAR_ENCODING_UCS4BE); + if ((in[0] == 0x3C) && (in[1] == 0x00) && + (in[2] == 0x00) && (in[3] == 0x00)) + return(XML_CHAR_ENCODING_UCS4LE); + if ((in[0] == 0x00) && (in[1] == 0x00) && + (in[2] == 0x3C) && (in[3] == 0x00)) + return(XML_CHAR_ENCODING_UCS4_2143); + if ((in[0] == 0x00) && (in[1] == 0x3C) && + (in[2] == 0x00) && (in[3] == 0x00)) + return(XML_CHAR_ENCODING_UCS4_3412); + if ((in[0] == 0x4C) && (in[1] == 0x6F) && + (in[2] == 0xA7) && (in[3] == 0x94)) + return(XML_CHAR_ENCODING_EBCDIC); + if ((in[0] == 0x3C) && (in[1] == 0x3F) && + (in[2] == 0x78) && (in[3] == 0x6D)) + return(XML_CHAR_ENCODING_UTF8); + } + if (len >= 2) { + if ((in[0] == 0xFE) && (in[1] == 0xFF)) + return(XML_CHAR_ENCODING_UTF16BE); + if ((in[0] == 0xFF) && (in[1] == 0xFE)) + return(XML_CHAR_ENCODING_UTF16LE); + } + return(XML_CHAR_ENCODING_NONE); +} + +/** + * xmlCleanupEncodingAliases: + * + * Unregisters all aliases + */ +void +xmlCleanupEncodingAliases(void) { + int i; + + if (xmlCharEncodingAliases == NULL) + return; + + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (xmlCharEncodingAliases[i].name != NULL) + xmlFree((char *) xmlCharEncodingAliases[i].name); + if (xmlCharEncodingAliases[i].alias != NULL) + xmlFree((char *) xmlCharEncodingAliases[i].alias); + } + xmlCharEncodingAliasesNb = 0; + xmlCharEncodingAliasesMax = 0; + xmlFree(xmlCharEncodingAliases); +} + +/** + * xmlGetEncodingAlias: + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Lookup an encoding name for the given alias. + * + * Returns NULL if not found the original name otherwise + */ +const char * +xmlGetEncodingAlias(const char *alias) { + int i; + char upper[100]; + + if (alias == NULL) + return(NULL); + + if (xmlCharEncodingAliases == NULL) + return(NULL); + + for (i = 0;i < 99;i++) { + upper[i] = toupper(alias[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { + return(xmlCharEncodingAliases[i].name); + } + } + return(NULL); +} + +/** + * xmlAddEncodingAlias: + * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Registers and alias @alias for an encoding named @name. Existing alias + * will be overwritten. + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlAddEncodingAlias(const char *name, const char *alias) { + int i; + char upper[100]; + + if ((name == NULL) || (alias == NULL)) + return(-1); + + for (i = 0;i < 99;i++) { + upper[i] = toupper(alias[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + if (xmlCharEncodingAliases == NULL) { + xmlCharEncodingAliasesNb = 0; + xmlCharEncodingAliasesMax = 20; + xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) + xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); + if (xmlCharEncodingAliases == NULL) + return(-1); + } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { + xmlCharEncodingAliasesMax *= 2; + xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) + xmlRealloc(xmlCharEncodingAliases, + xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); + } + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { + /* + * Replace the definition. + */ + xmlFree((char *) xmlCharEncodingAliases[i].name); + xmlCharEncodingAliases[i].name = xmlMemStrdup(name); + return(0); + } + } + /* + * Add the definition + */ + xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); + xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); + xmlCharEncodingAliasesNb++; + return(0); +} + +/** + * xmlDelEncodingAlias: + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Unregisters an encoding alias @alias + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlDelEncodingAlias(const char *alias) { + int i; + + if (alias == NULL) + return(-1); + + if (xmlCharEncodingAliases == NULL) + return(-1); + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { + xmlFree((char *) xmlCharEncodingAliases[i].name); + xmlFree((char *) xmlCharEncodingAliases[i].alias); + xmlCharEncodingAliasesNb--; + memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], + sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); + return(0); + } + } + return(-1); +} + +/** + * xmlParseCharEncoding: + * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) + * + * Conpare the string to the known encoding schemes already known. Note + * that the comparison is case insensitive accordingly to the section + * [XML] 4.3.3 Character Encoding in Entities. + * + * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE + * if not recognized. + */ +xmlCharEncoding +xmlParseCharEncoding(const char* name) +{ + const char *alias; + char upper[500]; + int i; + + if (name == NULL) + return(XML_CHAR_ENCODING_NONE); + + /* + * Do the alias resolution + */ + alias = xmlGetEncodingAlias(name); + if (alias != NULL) + name = alias; + + for (i = 0;i < 499;i++) { + upper[i] = toupper(name[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); + if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); + if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); + + /* + * NOTE: if we were able to parse this, the endianness of UTF16 is + * already found and in use + */ + if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); + if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); + + if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); + if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); + if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); + + /* + * NOTE: if we were able to parse this, the endianness of UCS4 is + * already found and in use + */ + if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); + if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); + if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); + + + if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); + if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); + if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); + + if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); + if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); + if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); + + if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); + if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); + if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); + if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); + if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); + if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); + if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); + + if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); + if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); + if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); +#endif + return(XML_CHAR_ENCODING_ERROR); +} + +/** + * xmlGetCharEncodingName: + * @enc: the encoding + * + * The "canonical" name for XML encoding. + * C.f. http://www.w3.org/TR/REC-xml#charencoding + * Section 4.3.3 Character Encoding in Entities + * + * Returns the canonical name for the given encoding + */ + +const char* +xmlGetCharEncodingName(xmlCharEncoding enc) { + switch (enc) { + case XML_CHAR_ENCODING_ERROR: + return(NULL); + case XML_CHAR_ENCODING_NONE: + return(NULL); + case XML_CHAR_ENCODING_UTF8: + return("UTF-8"); + case XML_CHAR_ENCODING_UTF16LE: + return("UTF-16"); + case XML_CHAR_ENCODING_UTF16BE: + return("UTF-16"); + case XML_CHAR_ENCODING_EBCDIC: + return("EBCDIC"); + case XML_CHAR_ENCODING_UCS4LE: + return("ISO-10646-UCS-4"); + case XML_CHAR_ENCODING_UCS4BE: + return("ISO-10646-UCS-4"); + case XML_CHAR_ENCODING_UCS4_2143: + return("ISO-10646-UCS-4"); + case XML_CHAR_ENCODING_UCS4_3412: + return("ISO-10646-UCS-4"); + case XML_CHAR_ENCODING_UCS2: + return("ISO-10646-UCS-2"); + case XML_CHAR_ENCODING_8859_1: + return("ISO-8859-1"); + case XML_CHAR_ENCODING_8859_2: + return("ISO-8859-2"); + case XML_CHAR_ENCODING_8859_3: + return("ISO-8859-3"); + case XML_CHAR_ENCODING_8859_4: + return("ISO-8859-4"); + case XML_CHAR_ENCODING_8859_5: + return("ISO-8859-5"); + case XML_CHAR_ENCODING_8859_6: + return("ISO-8859-6"); + case XML_CHAR_ENCODING_8859_7: + return("ISO-8859-7"); + case XML_CHAR_ENCODING_8859_8: + return("ISO-8859-8"); + case XML_CHAR_ENCODING_8859_9: + return("ISO-8859-9"); + case XML_CHAR_ENCODING_2022_JP: + return("ISO-2022-JP"); + case XML_CHAR_ENCODING_SHIFT_JIS: + return("Shift-JIS"); + case XML_CHAR_ENCODING_EUC_JP: + return("EUC-JP"); + case XML_CHAR_ENCODING_ASCII: + return(NULL); + } + return(NULL); +} + +/**************************************************************** + * * + * Char encoding handlers * + * * + ****************************************************************/ + +/* the size should be growable, but it's not a big deal ... */ +#define MAX_ENCODING_HANDLERS 50 +static xmlCharEncodingHandlerPtr *handlers = NULL; +static int nbCharEncodingHandler = 0; + +/* + * The default is UTF-8 for XML, that's also the default used for the + * parser internals, so the default encoding handler is NULL + */ + +static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; + +/** + * xmlNewCharEncodingHandler: + * @name: the encoding name, in UTF-8 format (ASCII actually) + * @input: the xmlCharEncodingInputFunc to read that encoding + * @output: the xmlCharEncodingOutputFunc to write that encoding + * + * Create and registers an xmlCharEncodingHandler. + * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). + */ +xmlCharEncodingHandlerPtr +xmlNewCharEncodingHandler(const char *name, + xmlCharEncodingInputFunc input, + xmlCharEncodingOutputFunc output) { + xmlCharEncodingHandlerPtr handler; + const char *alias; + char upper[500]; + int i; + char *up = 0; + + /* + * Do the alias resolution + */ + alias = xmlGetEncodingAlias(name); + if (alias != NULL) + name = alias; + + /* + * Keep only the uppercase version of the encoding. + */ + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewCharEncodingHandler : no name !\n"); + return(NULL); + } + for (i = 0;i < 499;i++) { + upper[i] = toupper(name[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + up = xmlMemStrdup(upper); + if (up == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewCharEncodingHandler : out of memory !\n"); + return(NULL); + } + + /* + * allocate and fill-up an handler block. + */ + handler = (xmlCharEncodingHandlerPtr) + xmlMalloc(sizeof(xmlCharEncodingHandler)); + if (handler == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewCharEncodingHandler : out of memory !\n"); + return(NULL); + } + handler->input = input; + handler->output = output; + handler->name = up; + +#ifdef LIBXML_ICONV_ENABLED + handler->iconv_in = NULL; + handler->iconv_out = NULL; +#endif /* LIBXML_ICONV_ENABLED */ + + /* + * registers and returns the handler. + */ + xmlRegisterCharEncodingHandler(handler); +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Registered encoding handler for %s\n", name); +#endif + return(handler); +} + +/** + * xmlInitCharEncodingHandlers: + * + * Initialize the char encoding support, it registers the default + * encoding supported. + * NOTE: while public, this function usually doesn't need to be called + * in normal processing. + */ +void +xmlInitCharEncodingHandlers(void) { + unsigned short int tst = 0x1234; + unsigned char *ptr = (unsigned char *) &tst; + + if (handlers != NULL) return; + + handlers = (xmlCharEncodingHandlerPtr *) + xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); + + if (*ptr == 0x12) xmlLittleEndian = 0; + else if (*ptr == 0x34) xmlLittleEndian = 1; + else xmlGenericError(xmlGenericErrorContext, + "Odd problem at endianness detection\n"); + + if (handlers == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlInitCharEncodingHandlers : out of memory !\n"); + return; + } + xmlNewCharEncodingHandler("UTF-8", NULL, NULL); + xmlUTF16LEHandler = + xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); + xmlUTF16BEHandler = + xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); + xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); + xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); +#ifdef LIBXML_HTML_ENABLED + xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); +#endif +} + +/** + * xmlCleanupCharEncodingHandlers: + * + * Cleanup the memory allocated for the char encoding support, it + * unregisters all the encoding handlers and the aliases. + */ +void +xmlCleanupCharEncodingHandlers(void) { + xmlCleanupEncodingAliases(); + + if (handlers == NULL) return; + + for (;nbCharEncodingHandler > 0;) { + nbCharEncodingHandler--; + if (handlers[nbCharEncodingHandler] != NULL) { + if (handlers[nbCharEncodingHandler]->name != NULL) + xmlFree(handlers[nbCharEncodingHandler]->name); + xmlFree(handlers[nbCharEncodingHandler]); + } + } + xmlFree(handlers); + handlers = NULL; + nbCharEncodingHandler = 0; + xmlDefaultCharEncodingHandler = NULL; +} + +/** + * xmlRegisterCharEncodingHandler: + * @handler: the xmlCharEncodingHandlerPtr handler block + * + * Register the char encoding handler, surprizing, isn't it ? + */ +void +xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { + if (handlers == NULL) xmlInitCharEncodingHandlers(); + if (handler == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlRegisterCharEncodingHandler: NULL handler !\n"); + return; + } + + if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { + xmlGenericError(xmlGenericErrorContext, + "xmlRegisterCharEncodingHandler: Too many handler registered\n"); + xmlGenericError(xmlGenericErrorContext, + "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__); + return; + } + handlers[nbCharEncodingHandler++] = handler; +} + +/** + * xmlGetCharEncodingHandler: + * @enc: an xmlCharEncoding value. + * + * Search in the registrered set the handler able to read/write that encoding. + * + * Returns the handler or NULL if not found + */ +xmlCharEncodingHandlerPtr +xmlGetCharEncodingHandler(xmlCharEncoding enc) { + xmlCharEncodingHandlerPtr handler; + + if (handlers == NULL) xmlInitCharEncodingHandlers(); + switch (enc) { + case XML_CHAR_ENCODING_ERROR: + return(NULL); + case XML_CHAR_ENCODING_NONE: + return(NULL); + case XML_CHAR_ENCODING_UTF8: + return(NULL); + case XML_CHAR_ENCODING_UTF16LE: + return(xmlUTF16LEHandler); + case XML_CHAR_ENCODING_UTF16BE: + return(xmlUTF16BEHandler); + case XML_CHAR_ENCODING_EBCDIC: + handler = xmlFindCharEncodingHandler("EBCDIC"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("ebcdic"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_UCS4BE: + handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS-4"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS4"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_UCS4LE: + handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS-4"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS4"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_UCS4_2143: + break; + case XML_CHAR_ENCODING_UCS4_3412: + break; + case XML_CHAR_ENCODING_UCS2: + handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS-2"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("UCS2"); + if (handler != NULL) return(handler); + break; + + /* + * We used to keep ISO Latin encodings native in the + * generated data. This led to so many problems that + * this has been removed. One can still change this + * back by registering no-ops encoders for those + */ + case XML_CHAR_ENCODING_8859_1: + handler = xmlFindCharEncodingHandler("ISO-8859-1"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_2: + handler = xmlFindCharEncodingHandler("ISO-8859-2"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_3: + handler = xmlFindCharEncodingHandler("ISO-8859-3"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_4: + handler = xmlFindCharEncodingHandler("ISO-8859-4"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_5: + handler = xmlFindCharEncodingHandler("ISO-8859-5"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_6: + handler = xmlFindCharEncodingHandler("ISO-8859-6"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_7: + handler = xmlFindCharEncodingHandler("ISO-8859-7"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_8: + handler = xmlFindCharEncodingHandler("ISO-8859-8"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_8859_9: + handler = xmlFindCharEncodingHandler("ISO-8859-9"); + if (handler != NULL) return(handler); + break; + + + case XML_CHAR_ENCODING_2022_JP: + handler = xmlFindCharEncodingHandler("ISO-2022-JP"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_SHIFT_JIS: + handler = xmlFindCharEncodingHandler("SHIFT-JIS"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("SHIFT_JIS"); + if (handler != NULL) return(handler); + handler = xmlFindCharEncodingHandler("Shift_JIS"); + if (handler != NULL) return(handler); + break; + case XML_CHAR_ENCODING_EUC_JP: + handler = xmlFindCharEncodingHandler("EUC-JP"); + if (handler != NULL) return(handler); + break; + default: + break; + } + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "No handler found for encoding %d\n", enc); +#endif + return(NULL); +} + +/** + * xmlGetCharEncodingHandler: + * @enc: a string describing the char encoding. + * + * Search in the registrered set the handler able to read/write that encoding. + * + * Returns the handler or NULL if not found + */ +xmlCharEncodingHandlerPtr +xmlFindCharEncodingHandler(const char *name) { + const char *nalias; + const char *norig; + xmlCharEncoding alias; +#ifdef LIBXML_ICONV_ENABLED + xmlCharEncodingHandlerPtr enc; + iconv_t icv_in, icv_out; +#endif /* LIBXML_ICONV_ENABLED */ + char upper[100]; + int i; + + if (handlers == NULL) xmlInitCharEncodingHandlers(); + if (name == NULL) return(xmlDefaultCharEncodingHandler); + if (name[0] == 0) return(xmlDefaultCharEncodingHandler); + + /* + * Do the alias resolution + */ + norig = name; + nalias = xmlGetEncodingAlias(name); + if (nalias != NULL) + name = nalias; + + /* + * Check first for directly registered encoding names + */ + for (i = 0;i < 99;i++) { + upper[i] = toupper(name[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + for (i = 0;i < nbCharEncodingHandler; i++) + if (!strcmp(upper, handlers[i]->name)) { +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Found registered handler for encoding %s\n", name); +#endif + return(handlers[i]); + } + +#ifdef LIBXML_ICONV_ENABLED + /* check whether iconv can handle this */ + icv_in = iconv_open("UTF-8", name); + icv_out = iconv_open(name, "UTF-8"); + if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { + enc = (xmlCharEncodingHandlerPtr) + xmlMalloc(sizeof(xmlCharEncodingHandler)); + if (enc == NULL) { + iconv_close(icv_in); + iconv_close(icv_out); + return(NULL); + } + enc->name = xmlMemStrdup(name); + enc->input = NULL; + enc->output = NULL; + enc->iconv_in = icv_in; + enc->iconv_out = icv_out; +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "Found iconv handler for encoding %s\n", name); +#endif + return enc; + } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { + xmlGenericError(xmlGenericErrorContext, + "iconv : problems with filters for '%s'\n", name); + } +#endif /* LIBXML_ICONV_ENABLED */ + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "No handler found for encoding %s\n", name); +#endif + + /* + * Fallback using the canonical names + */ + alias = xmlParseCharEncoding(norig); + if (alias != XML_CHAR_ENCODING_ERROR) { + const char* canon; + canon = xmlGetCharEncodingName(alias); + if ((canon != NULL) && (strcmp(name, canon))) { + return(xmlFindCharEncodingHandler(canon)); + } + } + + return(NULL); +} + +#ifdef LIBXML_ICONV_ENABLED +/** + * xmlIconvWrapper: + * @cd: iconv converter data structure + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of ISO Latin 1 chars + * @inlen: the length of @in + * + * Returns 0 if success, or + * -1 by lack of space, or + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + * -3 if there the last byte can't form a single output char. + * + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +static int +xmlIconvWrapper(iconv_t cd, + unsigned char *out, int *outlen, + const unsigned char *in, int *inlen) { + + size_t icv_inlen = *inlen, icv_outlen = *outlen; + const char *icv_in = (const char *) in; + char *icv_out = (char *) out; + int ret; + + ret = iconv(cd, + &icv_in, &icv_inlen, + &icv_out, &icv_outlen); + if (in != NULL) { + *inlen -= icv_inlen; + *outlen -= icv_outlen; + } else { + *inlen = 0; + *outlen = 0; + } + if (icv_inlen != 0 || ret == (size_t) -1) { +#ifdef EILSEQ + if (errno == EILSEQ) { + return -2; + } else +#endif +#ifdef E2BIG + if (errno == E2BIG) { + return -1; + } else +#endif +#ifdef EINVAL + if (errno == EINVAL) { + return -3; + } else +#endif + { + return -3; + } + } + return 0; +} +#endif /* LIBXML_ICONV_ENABLED */ + +/** + * xmlCharEncFirstLine: + * @handler: char enconding transformation data structure + * @out: an xmlBuffer for the output. + * @in: an xmlBuffer for the input + * + * Front-end for the encoding handler input function, but handle only + * the very first line, i.e. limit itself to 45 chars. + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, + xmlBufferPtr in) { + int ret = -2; + int written; + int toconv; + + if (handler == NULL) return(-1); + if (out == NULL) return(-1); + if (in == NULL) return(-1); + + written = out->size - out->use; + toconv = in->use; + if (toconv * 2 >= written) { + xmlBufferGrow(out, toconv); + written = out->size - out->use - 1; + } + + /* + * echo '' | wc -c => 38 + * 45 chars should be sufficient to reach the end of the encoding + * decalration without going too far inside the document content. + */ + written = 45; + + if (handler->input != NULL) { + ret = handler->input(&out->content[out->use], &written, + in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_in != NULL) { + ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + if (ret == -1) ret = -3; + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef DEBUG_ENCODING + switch (ret) { + case 0: + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input\n", + toconv, written); + break; + case -1: + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", + toconv, written, in->use); + break; + case -2: + xmlGenericError(xmlGenericErrorContext, + "input conversion failed due to input error\n"); + break; + case -3: + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", + toconv, written, in->use); + break; + default: + xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); + } +#endif + /* + * Ignore when input buffer is not on a boundary + */ + if (ret == -3) ret = 0; + if (ret == -1) ret = 0; + return(ret); +} + +/** + * xmlCharEncInFunc: + * @handler: char enconding transformation data structure + * @out: an xmlBuffer for the output. + * @in: an xmlBuffer for the input + * + * Generic front-end for the encoding handler input function + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncInFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, + xmlBufferPtr in) { + int ret = -2; + int written; + int toconv; + + if (handler == NULL) return(-1); + if (out == NULL) return(-1); + if (in == NULL) return(-1); + + toconv = in->use; + if (toconv == 0) + return(0); + written = out->size - out->use; + if (toconv * 2 >= written) { + xmlBufferGrow(out, out->size + toconv * 2); + written = out->size - out->use - 1; + } + if (handler->input != NULL) { + ret = handler->input(&out->content[out->use], &written, + in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_in != NULL) { + ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + if (ret == -1) ret = -3; + } +#endif /* LIBXML_ICONV_ENABLED */ + switch (ret) { +#ifdef DEBUG_ENCODING + case 0: + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of input\n", + toconv, written); + break; + case -1: + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", + toconv, written, in->use); + break; + case -3: + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", + toconv, written, in->use); + break; +#endif + case -2: + xmlGenericError(xmlGenericErrorContext, + "input conversion failed due to input error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + in->content[0], in->content[1], + in->content[2], in->content[3]); + } + /* + * Ignore when input buffer is not on a boundary + */ + if (ret == -3) ret = 0; + return(ret); +} + +/** + * xmlCharEncOutFunc: + * @handler: char enconding transformation data structure + * @out: an xmlBuffer for the output. + * @in: an xmlBuffer for the input + * + * Generic front-end for the encoding handler output function + * a first call with @in == NULL has to be made firs to initiate the + * output in case of non-stateless encoding needing to initiate their + * state or the output (like the BOM in UTF16). + * In case of UTF8 sequence conversion errors for the given encoder, + * the content will be automatically remapped to a CharRef sequence. + * + * Returns the number of byte written if success, or + * -1 general error + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + */ +int +xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, + xmlBufferPtr in) { + int ret = -2; + int written; + int writtentot = 0; + int toconv; + int output = 0; + + if (handler == NULL) return(-1); + if (out == NULL) return(-1); + +retry: + + written = out->size - out->use; + + /* + * First specific handling of in = NULL, i.e. the initialization call + */ + if (in == NULL) { + toconv = 0; + if (handler->output != NULL) { + ret = handler->output(&out->content[out->use], &written, + NULL, &toconv); + out->use += written; + out->content[out->use] = 0; + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_out != NULL) { + ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], + &written, NULL, &toconv); + out->use += written; + out->content[out->use] = 0; + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "initialized encoder\n"); +#endif + return(0); + } + + /* + * Convertion itself. + */ + toconv = in->use; + if (toconv == 0) + return(0); + if (toconv * 2 >= written) { + xmlBufferGrow(out, toconv * 2); + written = out->size - out->use - 1; + } + if (handler->output != NULL) { + ret = handler->output(&out->content[out->use], &written, + in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + writtentot += written; + out->content[out->use] = 0; + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_out != NULL) { + ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], + &written, in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + writtentot += written; + out->content[out->use] = 0; + if (ret == -1) { + if (written > 0) { + /* + * Can be a limitation of iconv + */ + goto retry; + } + ret = -3; + } + } +#endif /* LIBXML_ICONV_ENABLED */ + else { + xmlGenericError(xmlGenericErrorContext, + "xmlCharEncOutFunc: no output function !\n"); + return(-1); + } + + if (ret >= 0) output += ret; + + /* + * Attempt to handle error cases + */ + switch (ret) { +#ifdef DEBUG_ENCODING + case 0: + xmlGenericError(xmlGenericErrorContext, + "converted %d bytes to %d bytes of output\n", + toconv, written); + break; + case -1: + xmlGenericError(xmlGenericErrorContext, + "output conversion failed by lack of space\n"); + break; +#endif + case -3: + xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", + toconv, written, in->use); + break; + case -2: { + int len = in->use; + const xmlChar *utf = (const xmlChar *) in->content; + int cur; + + cur = xmlGetUTF8Char(utf, &len); + if (cur > 0) { + xmlChar charref[20]; + +#ifdef DEBUG_ENCODING + xmlGenericError(xmlGenericErrorContext, + "handling output conversion error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + in->content[0], in->content[1], + in->content[2], in->content[3]); +#endif + /* + * Removes the UTF8 sequence, and replace it by a charref + * and continue the transcoding phase, hoping the error + * did not mangle the encoder state. + */ + sprintf((char *) charref, "&#x%X;", cur); + xmlBufferShrink(in, len); + xmlBufferAddHead(in, charref, -1); + + goto retry; + } else { + xmlGenericError(xmlGenericErrorContext, + "output conversion failed due to conv error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + in->content[0], in->content[1], + in->content[2], in->content[3]); + in->content[0] = ' '; + } + break; + } + } + return(ret); +} + +/** + * xmlCharEncCloseFunc: + * @handler: char enconding transformation data structure + * + * Generic front-end for hencoding handler close function + * + * Returns 0 if success, or -1 in case of error + */ +int +xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { + int ret = 0; + if (handler == NULL) return(-1); + if (handler->name == NULL) return(-1); +#ifdef LIBXML_ICONV_ENABLED + /* + * Iconv handlers can be oused only once, free the whole block. + * and the associated icon resources. + */ + if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { + if (handler->name != NULL) + xmlFree(handler->name); + handler->name = NULL; + if (handler->iconv_out != NULL) { + if (iconv_close(handler->iconv_out)) + ret = -1; + handler->iconv_out = NULL; + } + if (handler->iconv_in != NULL) { + if (iconv_close(handler->iconv_in)) + ret = -1; + handler->iconv_in = NULL; + } + xmlFree(handler); + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef DEBUG_ENCODING + if (ret) + xmlGenericError(xmlGenericErrorContext, + "failed to close the encoding handler\n"); + else + xmlGenericError(xmlGenericErrorContext, + "closed the encoding handler\n"); + +#endif + return(ret); +} + diff --git a/encoding.h b/encoding.h new file mode 100644 index 00000000..62e81e3d --- /dev/null +++ b/encoding.h @@ -0,0 +1,187 @@ +/* + * encoding.h : interface for the encoding conversion functions needed for + * XML + * + * Related specs: + * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies + * [ISO-10646] UTF-8 and UTF-16 in Annexes + * [ISO-8859-1] ISO Latin-1 characters codes. + * [UNICODE] The Unicode Consortium, "The Unicode Standard -- + * Worldwide Character Encoding -- Version 1.0", Addison- + * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is + * described in Unicode Technical Report #4. + * [US-ASCII] Coded Character Set--7-bit American Standard Code for + * Information Interchange, ANSI X3.4-1986. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_CHAR_ENCODING_H__ +#define __XML_CHAR_ENCODING_H__ + +#include +#ifdef LIBXML_ICONV_ENABLED +#include +#endif +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Predefined values for some standard encodings + * Libxml don't do beforehand translation on UTF8, ISOLatinX + * It also support UTF16 (LE and BE) by default. + * + * Anything else would have to be translated to UTF8 before being + * given to the parser itself. The BOM for UTF16 and the encoding + * declaration are looked at and a converter is looked for at that + * point. If not found the parser stops here as asked by the XML REC + * Converter can be registered by the user using xmlRegisterCharEncodingHandler + * but the currentl form doesn't allow stateful transcoding (a serious + * problem agreed !). If iconv has been found it will be used + * automatically and allow stateful transcoding, the simplest is then + * to be sure to enable icon and to provide iconv libs for the encoding + * support needed. + */ +typedef enum { + XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ + XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ + XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ + XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ + XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ + XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ + XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ + XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ + XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ + XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ + XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ + XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ + XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ + XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ + XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ + XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ + XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ + XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ + XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ + XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ + XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ + XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ + XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ + XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ +} xmlCharEncoding; + +/** + * xmlCharEncodingInputFunc: + * @out: a pointer ot an array of bytes to store the UTF-8 result + * @outlen: the lenght of @out + * @in: a pointer ot an array of chars in the original encoding + * @inlen: the lenght of @in + * + * Take a block of chars in the original encoding and try to convert + * it to an UTF-8 block of chars out. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen); + + +/** + * xmlCharEncodingOutputFunc: + * @out: a pointer ot an array of bytes to store the result + * @outlen: the lenght of @out + * @in: a pointer ot an array of UTF-8 chars + * @inlen: the lenght of @in + * + * Take a block of UTF-8 chars in and try to convert it to an other + * encoding. + * Note: a first call designed to produce heading info is called with + * in = NULL. If stateful this should also initialize the encoder state + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen); + + +/* + * Block defining the handlers for non UTF-8 encodings. + * If iconv is supported, there is two extra fields + */ + +typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; +typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; +struct _xmlCharEncodingHandler { + char *name; + xmlCharEncodingInputFunc input; + xmlCharEncodingOutputFunc output; +#ifdef LIBXML_ICONV_ENABLED + iconv_t iconv_in; + iconv_t iconv_out; +#endif /* LIBXML_ICONV_ENABLED */ +}; + +/* + * Interfaces for encoding handlers + */ +void xmlInitCharEncodingHandlers (void); +void xmlCleanupCharEncodingHandlers (void); +void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); +xmlCharEncodingHandlerPtr + xmlGetCharEncodingHandler (xmlCharEncoding enc); +xmlCharEncodingHandlerPtr + xmlFindCharEncodingHandler (const char *name); + + +/* + * Interfaces for encoding names and aliases + */ +int xmlAddEncodingAlias (const char *name, + const char *alias); +int xmlDelEncodingAlias (const char *alias); +const char * + xmlGetEncodingAlias (const char *alias); +void xmlCleanupEncodingAliases (void); +xmlCharEncoding + xmlParseCharEncoding (const char* name); +const char* + xmlGetCharEncodingName (xmlCharEncoding enc); + +/* + * Interfaces directly used by the parsers. + */ +xmlCharEncoding + xmlDetectCharEncoding (const unsigned char* in, + int len); + +int xmlCheckUTF8 (const unsigned char *utf); + +int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); + +int xmlCharEncInFunc (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); +int xmlCharEncFirstLine (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); +int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_CHAR_ENCODING_H__ */ diff --git a/entities.c b/entities.c new file mode 100644 index 00000000..0fc4e8eb --- /dev/null +++ b/entities.c @@ -0,0 +1,1034 @@ +/* + * entities.c : implementation for the XML entities handking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include +#ifdef HAVE_STDLIB_H +#include +#endif +#include +#include +#include +#include +#include + +#define DEBUG_ENT_REF /* debugging of cross entities dependancies */ +#define ENTITY_HASH_SIZE 256 /* modify xmlEntityComputeHash accordingly */ + +/* + * xmlEntityComputeHash: + * + * Computes the hash value for this given entity + */ +int +xmlEntityComputeHash(const xmlChar *name) { + register const unsigned char *cur = (const unsigned char *) name; + register unsigned char val = 0; + + if (name == NULL) + return(val); + while (*cur) val += *cur++; + return(val); +} + +/* + * The XML predefined entities. + */ + +struct xmlPredefinedEntityValue { + const char *name; + const char *value; +}; +struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = { + { "lt", "<" }, + { "gt", ">" }, + { "apos", "'" }, + { "quot", "\"" }, + { "amp", "&" } +}; + +/* + * TODO: !!!!!!! This is GROSS, allocation of a 256 entry hash for + * a fixed number of 4 elements ! + */ +xmlHashTablePtr xmlPredefinedEntities = NULL; + +/* + * xmlFreeEntity : clean-up an entity record. + */ +void xmlFreeEntity(xmlEntityPtr entity) { + if (entity == NULL) return; + + if (entity->children) + xmlFreeNodeList(entity->children); + if (entity->name != NULL) + xmlFree((char *) entity->name); + if (entity->ExternalID != NULL) + xmlFree((char *) entity->ExternalID); + if (entity->SystemID != NULL) + xmlFree((char *) entity->SystemID); + if (entity->URI != NULL) + xmlFree((char *) entity->URI); + if (entity->content != NULL) + xmlFree((char *) entity->content); + if (entity->orig != NULL) + xmlFree((char *) entity->orig); + memset(entity, -1, sizeof(xmlEntity)); + xmlFree(entity); +} + +/* + * xmlAddEntity : register a new entity for an entities table. + */ +static xmlEntityPtr +xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type, + const xmlChar *ExternalID, const xmlChar *SystemID, + const xmlChar *content) { + xmlEntitiesTablePtr table = NULL; + xmlEntityPtr ret; + + if (name == NULL) + return(NULL); + switch (type) { + case XML_INTERNAL_GENERAL_ENTITY: + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + if (dtd->entities == NULL) + dtd->entities = xmlHashCreate(0); + table = dtd->entities; + break; + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + if (dtd->pentities == NULL) + dtd->pentities = xmlHashCreate(0); + table = dtd->pentities; + break; + case XML_INTERNAL_PREDEFINED_ENTITY: + if (xmlPredefinedEntities == NULL) + xmlPredefinedEntities = xmlHashCreate(8); + table = xmlPredefinedEntities; + } + if (table == NULL) + return(NULL); + ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddEntity: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlEntity)); + ret->type = XML_ENTITY_DECL; + + /* + * fill the structure. + */ + ret->name = xmlStrdup(name); + ret->etype = (xmlEntityType) type; + if (ExternalID != NULL) + ret->ExternalID = xmlStrdup(ExternalID); + if (SystemID != NULL) + ret->SystemID = xmlStrdup(SystemID); + if (content != NULL) { + ret->length = xmlStrlen(content); + ret->content = xmlStrndup(content, ret->length); + } else { + ret->length = 0; + ret->content = NULL; + } + ret->URI = NULL; /* to be computed by the layer knowing + the defining entity */ + ret->orig = NULL; + + if (xmlHashAddEntry(table, name, ret)) { + /* + * entity was already defined at another level. + */ + xmlFreeEntity(ret); + return(NULL); + } + return(ret); +} + +/** + * xmlInitializePredefinedEntities: + * + * Set up the predefined entities. + */ +void xmlInitializePredefinedEntities(void) { + int i; + xmlChar name[50]; + xmlChar value[50]; + const char *in; + xmlChar *out; + + if (xmlPredefinedEntities != NULL) return; + + xmlPredefinedEntities = xmlCreateEntitiesTable(); + for (i = 0;i < sizeof(xmlPredefinedEntityValues) / + sizeof(xmlPredefinedEntityValues[0]);i++) { + in = xmlPredefinedEntityValues[i].name; + out = &name[0]; + for (;(*out++ = (xmlChar) *in);)in++; + in = xmlPredefinedEntityValues[i].value; + out = &value[0]; + for (;(*out++ = (xmlChar) *in);)in++; + + xmlAddEntity(NULL, (const xmlChar *) &name[0], + XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL, + &value[0]); + } +} + +/** + * xmlCleanupPredefinedEntities: + * + * Cleanup up the predefined entities table. + */ +void xmlCleanupPredefinedEntities(void) { + if (xmlPredefinedEntities == NULL) return; + + xmlFreeEntitiesTable(xmlPredefinedEntities); + xmlPredefinedEntities = NULL; +} + +/** + * xmlGetPredefinedEntity: + * @name: the entity name + * + * Check whether this name is an predefined entity. + * + * Returns NULL if not, othervise the entity + */ +xmlEntityPtr +xmlGetPredefinedEntity(const xmlChar *name) { + if (xmlPredefinedEntities == NULL) + xmlInitializePredefinedEntities(); + return((xmlEntityPtr) xmlHashLookup(xmlPredefinedEntities, name)); +} + +/** + * xmlAddDtdEntity: + * @doc: the document + * @name: the entity name + * @type: the entity type XML_xxx_yyy_ENTITY + * @ExternalID: the entity external ID if available + * @SystemID: the entity system ID if available + * @content: the entity content + * + * Register a new entity for this document DTD external subset. + * + * Returns a pointer to the entity or NULL in case of error + */ +xmlEntityPtr +xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type, + const xmlChar *ExternalID, const xmlChar *SystemID, + const xmlChar *content) { + xmlEntityPtr ret; + xmlDtdPtr dtd; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddDtdEntity: doc == NULL !\n"); + return(NULL); + } + if (doc->extSubset == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddDtdEntity: document without external subset !\n"); + return(NULL); + } + dtd = doc->extSubset; + ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); + if (ret == NULL) return(NULL); + + /* + * Link it to the Dtd + */ + ret->parent = dtd; + ret->doc = dtd->doc; + if (dtd->last == NULL) { + dtd->children = dtd->last = (xmlNodePtr) ret; + } else { + dtd->last->next = (xmlNodePtr) ret; + ret->prev = dtd->last; + dtd->last = (xmlNodePtr) ret; + } + return(ret); +} + +/** + * xmlAddDocEntity: + * @doc: the document + * @name: the entity name + * @type: the entity type XML_xxx_yyy_ENTITY + * @ExternalID: the entity external ID if available + * @SystemID: the entity system ID if available + * @content: the entity content + * + * Register a new entity for this document. + * + * Returns a pointer to the entity or NULL in case of error + */ +xmlEntityPtr +xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type, + const xmlChar *ExternalID, const xmlChar *SystemID, + const xmlChar *content) { + xmlEntityPtr ret; + xmlDtdPtr dtd; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddDocEntity: document is NULL !\n"); + return(NULL); + } + if (doc->intSubset == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddDtdEntity: document without internal subset !\n"); + return(NULL); + } + dtd = doc->intSubset; + ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); + if (ret == NULL) return(NULL); + + /* + * Link it to the Dtd + */ + ret->parent = dtd; + ret->doc = dtd->doc; + if (dtd->last == NULL) { + dtd->children = dtd->last = (xmlNodePtr) ret; + } else { + dtd->last->next = (xmlNodePtr) ret; + ret->prev = dtd->last; + dtd->last = (xmlNodePtr) ret; + } + return(ret); +} + +/** + * xmlGetEntityFromTable: + * @table: an entity table + * @name: the entity name + * @parameter: look for parameter entities + * + * Do an entity lookup in the table. + * returns the corresponding parameter entity, if found. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +xmlEntityPtr +xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) { + return((xmlEntityPtr) xmlHashLookup(table, name)); +} + +/** + * xmlGetParameterEntity: + * @doc: the document referencing the entity + * @name: the entity name + * + * Do an entity lookup in the internal and external subsets and + * returns the corresponding parameter entity, if found. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +xmlEntityPtr +xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) { + xmlEntitiesTablePtr table; + xmlEntityPtr ret; + + if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) { + table = (xmlEntitiesTablePtr) doc->intSubset->pentities; + ret = xmlGetEntityFromTable(table, name); + if (ret != NULL) + return(ret); + } + if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) { + table = (xmlEntitiesTablePtr) doc->extSubset->pentities; + return(xmlGetEntityFromTable(table, name)); + } + return(NULL); +} + +/** + * xmlGetDtdEntity: + * @doc: the document referencing the entity + * @name: the entity name + * + * Do an entity lookup in the Dtd entity hash table and + * returns the corresponding entity, if found. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +xmlEntityPtr +xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) { + xmlEntitiesTablePtr table; + + if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->extSubset->entities; + return(xmlGetEntityFromTable(table, name)); + } + return(NULL); +} + +/** + * xmlGetDocEntity: + * @doc: the document referencing the entity + * @name: the entity name + * + * Do an entity lookup in the document entity hash table and + * returns the corrsponding entity, otherwise a lookup is done + * in the predefined entities too. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +xmlEntityPtr +xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) { + xmlEntityPtr cur; + xmlEntitiesTablePtr table; + + if (doc != NULL) { + if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->intSubset->entities; + cur = xmlGetEntityFromTable(table, name); + if (cur != NULL) + return(cur); + } + if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { + table = (xmlEntitiesTablePtr) doc->extSubset->entities; + cur = xmlGetEntityFromTable(table, name); + if (cur != NULL) + return(cur); + } + } + if (xmlPredefinedEntities == NULL) + xmlInitializePredefinedEntities(); + table = xmlPredefinedEntities; + return(xmlGetEntityFromTable(table, name)); +} + +/* + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ +#define IS_CHAR(c) \ + (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \ + (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) + +/* + * A buffer used for converting entities to their equivalent and back. + */ +static int buffer_size = 0; +static xmlChar *buffer = NULL; + +int growBuffer(void) { + buffer_size *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + perror("realloc failed"); + return(-1); + } + return(0); +} + + +/** + * xmlEncodeEntities: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts. + * + * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary + * compatibility + * + * People must migrate their code to xmlEncodeEntitiesReentrant ! + * This routine will issue a warning when encountered. + * + * Returns A newly allocated string with the substitution done. + */ +const xmlChar * +xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) { + const xmlChar *cur = input; + xmlChar *out = buffer; + static int warning = 1; + int html = 0; + + + if (warning) { + xmlGenericError(xmlGenericErrorContext, + "Deprecated API xmlEncodeEntities() used\n"); + xmlGenericError(xmlGenericErrorContext, + " change code to use xmlEncodeEntitiesReentrant()\n"); + warning = 0; + } + + if (input == NULL) return(NULL); + if (doc != NULL) + html = (doc->type == XML_HTML_DOCUMENT_NODE); + + if (buffer == NULL) { + buffer_size = 1000; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + perror("malloc failed"); + return(NULL); + } + out = buffer; + } + while (*cur != '\0') { + if (out - buffer > buffer_size - 100) { + int index = out - buffer; + + growBuffer(); + out = &buffer[index]; + } + + /* + * By default one have to encode at least '<', '>', '"' and '&' ! + */ + if (*cur == '<') { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '>') { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '&') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if (*cur == '"') { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; + } else if ((*cur == '\'') && (!html)) { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'p'; + *out++ = 'o'; + *out++ = 's'; + *out++ = ';'; + } else if (((*cur >= 0x20) && (*cur < 0x80)) || + (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) { + /* + * default case, just copy ! + */ + *out++ = *cur; +#ifndef USE_UTF_8 + } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) { + char buf[10], *ptr; + +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "&#%d;", *cur); +#else + sprintf(buf, "&#%d;", *cur); +#endif + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; +#endif + } else if (IS_CHAR(*cur)) { + char buf[10], *ptr; + +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "&#%d;", *cur); +#else + sprintf(buf, "&#%d;", *cur); +#endif + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + } +#if 0 + else { + /* + * default case, this is not a valid char ! + * Skip it... + */ + xmlGenericError(xmlGenericErrorContext, + "xmlEncodeEntities: invalid char %d\n", (int) *cur); + } +#endif + cur++; + } + *out++ = 0; + return(buffer); +} + +/* + * Macro used to grow the current buffer. + */ +#define growBufferReentrant() { \ + buffer_size *= 2; \ + buffer = (xmlChar *) \ + xmlRealloc(buffer, buffer_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + perror("realloc failed"); \ + return(NULL); \ + } \ +} + + +/** + * xmlEncodeEntitiesReentrant: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * and non ASCII values with their entities and CharRef counterparts. + * Contrary to xmlEncodeEntities, this routine is reentrant, and result + * must be deallocated. + * + * Returns A newly allocated string with the substitution done. + */ +xmlChar * +xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { + const xmlChar *cur = input; + xmlChar *buffer = NULL; + xmlChar *out = NULL; + int buffer_size = 0; + int html = 0; + + if (input == NULL) return(NULL); + if (doc != NULL) + html = (doc->type == XML_HTML_DOCUMENT_NODE); + + /* + * allocate an translation buffer. + */ + buffer_size = 1000; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + perror("malloc failed"); + return(NULL); + } + out = buffer; + + while (*cur != '\0') { + if (out - buffer > buffer_size - 100) { + int index = out - buffer; + + growBufferReentrant(); + out = &buffer[index]; + } + + /* + * By default one have to encode at least '<', '>', '"' and '&' ! + */ + if (*cur == '<') { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '>') { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '&') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if (*cur == '"') { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; +#if 0 + } else if ((*cur == '\'') && (!html)) { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'p'; + *out++ = 'o'; + *out++ = 's'; + *out++ = ';'; +#endif + } else if (((*cur >= 0x20) && (*cur < 0x80)) || + (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) { + /* + * default case, just copy ! + */ + *out++ = *cur; + } else if (*cur >= 0x80) { + if ((doc->encoding != NULL) || (html)) { + /* + * Bjřrn Reese provided the patch + xmlChar xc; + xc = (*cur & 0x3F) << 6; + if (cur[1] != 0) { + xc += *(++cur) & 0x3F; + *out++ = xc; + } else + */ + *out++ = *cur; + } else { + /* + * We assume we have UTF-8 input. + */ + char buf[10], *ptr; + int val = 0, l = 1; + + if (*cur < 0xC0) { + xmlGenericError(xmlGenericErrorContext, + "xmlEncodeEntitiesReentrant : input not UTF-8\n"); + doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "&#%d;", *cur); +#else + sprintf(buf, "&#%d;", *cur); +#endif + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + continue; + } else if (*cur < 0xE0) { + val = (cur[0]) & 0x1F; + val <<= 6; + val |= (cur[1]) & 0x3F; + l = 2; + } else if (*cur < 0xF0) { + val = (cur[0]) & 0x0F; + val <<= 6; + val |= (cur[1]) & 0x3F; + val <<= 6; + val |= (cur[2]) & 0x3F; + l = 3; + } else if (*cur < 0xF8) { + val = (cur[0]) & 0x07; + val <<= 6; + val |= (cur[1]) & 0x3F; + val <<= 6; + val |= (cur[2]) & 0x3F; + val <<= 6; + val |= (cur[3]) & 0x3F; + l = 4; + } + if ((l == 1) || (!IS_CHAR(val))) { + xmlGenericError(xmlGenericErrorContext, + "xmlEncodeEntitiesReentrant : char out of range\n"); + doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "&#%d;", *cur); +#else + sprintf(buf, "&#%d;", *cur); +#endif + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + cur++; + continue; + } + /* + * We could do multiple things here. Just save as a char ref + */ +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "&#x%X;", val); +#else + sprintf(buf, "&#x%X;", val); +#endif + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + cur += l; + continue; + } + } else if (IS_CHAR(*cur)) { + char buf[10], *ptr; + +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "&#%d;", *cur); +#else + sprintf(buf, "&#%d;", *cur); +#endif + buf[sizeof(buf) - 1] = 0; + ptr = buf; + while (*ptr != 0) *out++ = *ptr++; + } +#if 0 + else { + /* + * default case, this is not a valid char ! + * Skip it... + */ + xmlGenericError(xmlGenericErrorContext, + "xmlEncodeEntities: invalid char %d\n", (int) *cur); + } +#endif + cur++; + } + *out++ = 0; + return(buffer); +} + +/** + * xmlEncodeSpecialChars: + * @doc: the document containing the string + * @input: A string to convert to XML. + * + * Do a global encoding of a string, replacing the predefined entities + * this routine is reentrant, and result must be deallocated. + * + * Returns A newly allocated string with the substitution done. + */ +xmlChar * +xmlEncodeSpecialChars(xmlDocPtr doc, const xmlChar *input) { + const xmlChar *cur = input; + xmlChar *buffer = NULL; + xmlChar *out = NULL; + int buffer_size = 0; + int html = 0; + + if (input == NULL) return(NULL); + if (doc != NULL) + html = (doc->type == XML_HTML_DOCUMENT_NODE); + + /* + * allocate an translation buffer. + */ + buffer_size = 1000; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + perror("malloc failed"); + return(NULL); + } + out = buffer; + + while (*cur != '\0') { + if (out - buffer > buffer_size - 10) { + int index = out - buffer; + + growBufferReentrant(); + out = &buffer[index]; + } + + /* + * By default one have to encode at least '<', '>', '"' and '&' ! + */ + if (*cur == '<') { + *out++ = '&'; + *out++ = 'l'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '>') { + *out++ = '&'; + *out++ = 'g'; + *out++ = 't'; + *out++ = ';'; + } else if (*cur == '&') { + *out++ = '&'; + *out++ = 'a'; + *out++ = 'm'; + *out++ = 'p'; + *out++ = ';'; + } else if (*cur == '"') { + *out++ = '&'; + *out++ = 'q'; + *out++ = 'u'; + *out++ = 'o'; + *out++ = 't'; + *out++ = ';'; + } else { + /* + * Works because on UTF-8, all extended sequences cannot + * result in bytes in the ASCII range. + */ + *out++ = *cur; + } + cur++; + } + *out++ = 0; + return(buffer); +} + +/** + * xmlCreateEntitiesTable: + * + * create and initialize an empty entities hash table. + * + * Returns the xmlEntitiesTablePtr just created or NULL in case of error. + */ +xmlEntitiesTablePtr +xmlCreateEntitiesTable(void) { + return((xmlEntitiesTablePtr) xmlHashCreate(0)); +} + +/** + * xmlFreeEntitiesTable: + * @table: An entity table + * + * Deallocate the memory used by an entities hash table. + */ +void +xmlFreeEntitiesTable(xmlEntitiesTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeEntity); +} + +/** + * xmlCopyEntity: + * @ent: An entity + * + * Build a copy of an entity + * + * Returns the new xmlEntitiesPtr or NULL in case of error. + */ +xmlEntityPtr +xmlCopyEntity(xmlEntityPtr ent) { + xmlEntityPtr cur; + + cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyEntity: out of memory !\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlEntity)); + cur->type = XML_ELEMENT_DECL; + + cur->etype = ent->etype; + if (ent->name != NULL) + cur->name = xmlStrdup(ent->name); + if (ent->ExternalID != NULL) + cur->ExternalID = xmlStrdup(ent->ExternalID); + if (ent->SystemID != NULL) + cur->SystemID = xmlStrdup(ent->SystemID); + if (ent->content != NULL) + cur->content = xmlStrdup(ent->content); + if (ent->orig != NULL) + cur->orig = xmlStrdup(ent->orig); + return(cur); +} + +/** + * xmlCopyEntitiesTable: + * @table: An entity table + * + * Build a copy of an entity table. + * + * Returns the new xmlEntitiesTablePtr or NULL in case of error. + */ +xmlEntitiesTablePtr +xmlCopyEntitiesTable(xmlEntitiesTablePtr table) { + return(xmlHashCopy(table, (xmlHashCopier) xmlCopyEntity)); +} + +/** + * xmlDumpEntityDecl: + * @buf: An XML buffer. + * @ent: An entity table + * + * This will dump the content of the entity table as an XML DTD definition + */ +void +xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) { + switch (ent->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, " "); + if (ent->orig != NULL) + xmlBufferWriteQuotedString(buf, ent->orig); + else + xmlBufferWriteQuotedString(buf, ent->content); + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + xmlBufferWriteChar(buf, "name); + if (ent->ExternalID != NULL) { + xmlBufferWriteChar(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf, ent->ExternalID); + xmlBufferWriteChar(buf, " "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } else { + xmlBufferWriteChar(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + xmlBufferWriteChar(buf, "name); + if (ent->ExternalID != NULL) { + xmlBufferWriteChar(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf, ent->ExternalID); + xmlBufferWriteChar(buf, " "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } else { + xmlBufferWriteChar(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } + if (ent->content != NULL) { /* Should be true ! */ + xmlBufferWriteChar(buf, " NDATA "); + if (ent->orig != NULL) + xmlBufferWriteCHAR(buf, ent->orig); + else + xmlBufferWriteCHAR(buf, ent->content); + } + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, " "); + if (ent->orig == NULL) + xmlBufferWriteQuotedString(buf, ent->content); + else + xmlBufferWriteQuotedString(buf, ent->orig); + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + xmlBufferWriteChar(buf, "name); + if (ent->ExternalID != NULL) { + xmlBufferWriteChar(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf, ent->ExternalID); + xmlBufferWriteChar(buf, " "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } else { + xmlBufferWriteChar(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf, ent->SystemID); + } + xmlBufferWriteChar(buf, ">\n"); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpEntitiesTable: internal: unknown type %d\n", + ent->etype); + } +} + +/** + * xmlDumpEntitiesTable: + * @buf: An XML buffer. + * @table: An entity table + * + * This will dump the content of the entity table as an XML DTD definition + */ +void +xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) { + xmlHashScan(table, (xmlHashScanner)xmlDumpEntityDecl, buf); +} diff --git a/entities.h b/entities.h new file mode 100644 index 00000000..305d043c --- /dev/null +++ b/entities.h @@ -0,0 +1,114 @@ +/* + * entities.h : interface for the XML entities handking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_ENTITIES_H__ +#define __XML_ENTITIES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The different valid entity types + */ +typedef enum { + XML_INTERNAL_GENERAL_ENTITY = 1, + XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2, + XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3, + XML_INTERNAL_PARAMETER_ENTITY = 4, + XML_EXTERNAL_PARAMETER_ENTITY = 5, + XML_INTERNAL_PREDEFINED_ENTITY = 6 +} xmlEntityType; + +/* + * An unit of storage for an entity, contains the string, the value + * and the linkind data needed for the linking in the hash table. + */ + +typedef struct _xmlEntity xmlEntity; +typedef xmlEntity *xmlEntityPtr; +struct _xmlEntity { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_ENTITY_DECL, must be second ! */ + const xmlChar *name; /* Attribute name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + xmlChar *orig; /* content without ref substitution */ + xmlChar *content; /* content or ndata if unparsed */ + int length; /* the content length */ + xmlEntityType etype; /* The entity type */ + const xmlChar *ExternalID; /* External identifier for PUBLIC */ + const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ + + struct _xmlEntity *nexte; /* unused */ + const xmlChar *URI; /* the full URI as computed */ +}; + +/* + * ALl entities are stored in an hash table + * there is 2 separate hash tables for global and parmeter entities + */ + +typedef struct _xmlHashTable xmlEntitiesTable; +typedef xmlEntitiesTable *xmlEntitiesTablePtr; + +/* + * External functions : + */ + +void xmlInitializePredefinedEntities (void); +xmlEntityPtr xmlAddDocEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +xmlEntityPtr xmlAddDtdEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +xmlEntityPtr xmlGetPredefinedEntity (const xmlChar *name); +xmlEntityPtr xmlGetDocEntity (xmlDocPtr doc, + const xmlChar *name); +xmlEntityPtr xmlGetDtdEntity (xmlDocPtr doc, + const xmlChar *name); +xmlEntityPtr xmlGetParameterEntity (xmlDocPtr doc, + const xmlChar *name); +const xmlChar * xmlEncodeEntities (xmlDocPtr doc, + const xmlChar *input); +xmlChar * xmlEncodeEntitiesReentrant(xmlDocPtr doc, + const xmlChar *input); +xmlChar * xmlEncodeSpecialChars (xmlDocPtr doc, + const xmlChar *input); +xmlEntitiesTablePtr xmlCreateEntitiesTable (void); +xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table); +void xmlFreeEntitiesTable (xmlEntitiesTablePtr table); +void xmlDumpEntitiesTable (xmlBufferPtr buf, + xmlEntitiesTablePtr table); +void xmlDumpEntityDecl (xmlBufferPtr buf, + xmlEntityPtr ent); +xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table); +void xmlCleanupPredefinedEntities(void); + + +#ifdef __cplusplus +} +#endif + +# endif /* __XML_ENTITIES_H__ */ diff --git a/error.c b/error.c new file mode 100644 index 00000000..cf76016f --- /dev/null +++ b/error.c @@ -0,0 +1,298 @@ +/* + * error.c: module displaying/handling XML parser errors + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include +#include +#include + +/************************************************************************ + * * + * Handling of out of context errors * + * * + ************************************************************************/ + +/** + * xmlGenericErrorDefaultFunc: + * @ctx: an error context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Default handler for out of context error messages. + */ +void +xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...) { + va_list args; + + if (xmlGenericErrorContext == NULL) + xmlGenericErrorContext = (void *) stderr; + + va_start(args, msg); + vfprintf((FILE *)xmlGenericErrorContext, msg, args); + va_end(args); +} + +xmlGenericErrorFunc xmlGenericError = xmlGenericErrorDefaultFunc; +void *xmlGenericErrorContext = NULL; + + +/** + * xmlSetGenericErrorFunc: + * @ctx: the new error handling context + * @handler: the new handler function + * + * Function to reset the handler and the error context for out of + * context error messages. + * This simply means that @handler will be called for subsequent + * error messages while not parsing nor validating. And @ctx will + * be passed as first argument to @handler + * One can simply force messages to be emitted to another FILE * than + * stderr by setting @ctx to this file handle and @handler to NULL. + */ +void +xmlSetGenericErrorFunc(void *ctx, xmlGenericErrorFunc handler) { + xmlGenericErrorContext = ctx; + if (handler != NULL) + xmlGenericError = handler; + else + xmlGenericError = xmlGenericErrorDefaultFunc; +} + +/************************************************************************ + * * + * Handling of parsing errors * + * * + ************************************************************************/ + +/** + * xmlParserPrintFileInfo: + * @input: an xmlParserInputPtr input + * + * Displays the associated file and line informations for the current input + */ + +void +xmlParserPrintFileInfo(xmlParserInputPtr input) { + if (input != NULL) { + if (input->filename) + xmlGenericError(xmlGenericErrorContext, + "%s:%d: ", input->filename, + input->line); + else + xmlGenericError(xmlGenericErrorContext, + "Entity: line %d: ", input->line); + } +} + +/** + * xmlParserPrintFileContext: + * @input: an xmlParserInputPtr input + * + * Displays current context within the input content for error tracking + */ + +void +xmlParserPrintFileContext(xmlParserInputPtr input) { + const xmlChar *cur, *base; + int n; + + if (input == NULL) return; + cur = input->cur; + base = input->base; + while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) { + cur--; + } + n = 0; + while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r')) + cur--; + if ((*cur == '\n') || (*cur == '\r')) cur++; + base = cur; + n = 0; + while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) { + xmlGenericError(xmlGenericErrorContext, + "%c", (unsigned char) *cur++); + n++; + } + xmlGenericError(xmlGenericErrorContext, "\n"); + cur = input->cur; + while ((*cur == '\n') || (*cur == '\r')) + cur--; + n = 0; + while ((cur != base) && (n++ < 80)) { + xmlGenericError(xmlGenericErrorContext, " "); + base++; + } + xmlGenericError(xmlGenericErrorContext,"^\n"); +} + +/** + * xmlParserError: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format an error messages, gives file, line, position and + * extra parameters. + */ +void +xmlParserError(void *ctx, const char *msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr input = NULL; + xmlParserInputPtr cur = NULL; + va_list args; + + if (ctxt != NULL) { + input = ctxt->input; + if ((input != NULL) && (input->filename == NULL) && + (ctxt->inputNr > 1)) { + cur = input; + input = ctxt->inputTab[ctxt->inputNr - 2]; + } + xmlParserPrintFileInfo(input); + } + + xmlGenericError(xmlGenericErrorContext, "error: "); + va_start(args, msg); + vfprintf(xmlGenericErrorContext, msg, args); + va_end(args); + + if (ctxt != NULL) { + xmlParserPrintFileContext(input); + if (cur != NULL) { + xmlParserPrintFileInfo(cur); + xmlGenericError(xmlGenericErrorContext, "\n"); + xmlParserPrintFileContext(cur); + } + } +} + +/** + * xmlParserWarning: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a warning messages, gives file, line, position and + * extra parameters. + */ +void +xmlParserWarning(void *ctx, const char *msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr input = NULL; + xmlParserInputPtr cur = NULL; + va_list args; + + if (ctxt != NULL) { + input = ctxt->input; + if ((input != NULL) && (input->filename == NULL) && + (ctxt->inputNr > 1)) { + cur = input; + input = ctxt->inputTab[ctxt->inputNr - 2]; + } + xmlParserPrintFileInfo(input); + } + + xmlGenericError(xmlGenericErrorContext, "warning: "); + va_start(args, msg); + vfprintf(xmlGenericErrorContext, msg, args); + va_end(args); + + + if (ctxt != NULL) { + xmlParserPrintFileContext(input); + if (cur != NULL) { + xmlParserPrintFileInfo(cur); + xmlGenericError(xmlGenericErrorContext, "\n"); + xmlParserPrintFileContext(cur); + } + } +} + +/************************************************************************ + * * + * Handling of validation errors * + * * + ************************************************************************/ + +/** + * xmlParserValidityError: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format an validity error messages, gives file, + * line, position and extra parameters. + */ +void +xmlParserValidityError(void *ctx, const char *msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr input = NULL; + va_list args; + + if (ctxt != NULL) { + input = ctxt->input; + if ((input->filename == NULL) && (ctxt->inputNr > 1)) + input = ctxt->inputTab[ctxt->inputNr - 2]; + + xmlParserPrintFileInfo(input); + } + + xmlGenericError(xmlGenericErrorContext, "validity error: "); + va_start(args, msg); + vfprintf(xmlGenericErrorContext, msg, args); + va_end(args); + + if (ctxt != NULL) { + xmlParserPrintFileContext(input); + } +} + +/** + * xmlParserValidityWarning: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a validity warning messages, gives file, line, + * position and extra parameters. + */ +void +xmlParserValidityWarning(void *ctx, const char *msg, ...) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlParserInputPtr input = NULL; + va_list args; + + if (ctxt != NULL) { + input = ctxt->input; + if ((input->filename == NULL) && (ctxt->inputNr > 1)) + input = ctxt->inputTab[ctxt->inputNr - 2]; + + xmlParserPrintFileInfo(input); + } + + xmlGenericError(xmlGenericErrorContext, "validity warning: "); + va_start(args, msg); + vfprintf(xmlGenericErrorContext, msg, args); + va_end(args); + + if (ctxt != NULL) { + xmlParserPrintFileContext(input); + } +} + + diff --git a/example/Makefile.am b/example/Makefile.am index 00a53e9e..67d2a284 100644 --- a/example/Makefile.am +++ b/example/Makefile.am @@ -1,8 +1,8 @@ noinst_PROGRAMS = gjobread -DEFS = -INCLUDES = -I$(top_builddir) +INCLUDES = \ + -I$(top_builddir) -I$(top_srcdir) \ + -I@srcdir@ -DEPS = $(top_builddir)/libxml/libxml2.la -LDADD = $(top_builddir)/libxml/libxml2.la +LDADD = $(top_builddir)/libxml2.la @Z_LIBS@ diff --git a/hash.c b/hash.c new file mode 100644 index 00000000..7881fc64 --- /dev/null +++ b/hash.c @@ -0,0 +1,620 @@ +/* + * hash.c: chained hash tables + * + * Reference: Your favorite introductory book on algorithms + * + * Copyright (C) 2000 Bjorn Reese and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: bjorn.reese@systematic.dk + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include +#include +#include + +/* + * A single entry in the hash table + */ +typedef struct _xmlHashEntry xmlHashEntry; +typedef xmlHashEntry *xmlHashEntryPtr; +struct _xmlHashEntry { + struct _xmlHashEntry *next; + xmlChar *name; + xmlChar *name2; + xmlChar *name3; + void *payload; +}; + +/* + * The entire hash table + */ +struct _xmlHashTable { + struct _xmlHashEntry **table; + int size; + int nbElems; +}; + +/* + * xmlHashComputeKey: + * Calculate the hash key + */ +static unsigned long +xmlHashComputeKey(xmlHashTablePtr table, const xmlChar *string) { + unsigned long value = 0L; + char ch; + + while ((ch = *string++) != 0) { + /* value *= 31; */ + value += (unsigned long)ch; + } + return (value % table->size); +} + +/** + * xmlHashCreate: + * @size: the size of the hash table + * + * Create a new xmlHashTablePtr. + * + * Returns the newly created object, or NULL if an error occured. + */ +xmlHashTablePtr +xmlHashCreate(int size) { + xmlHashTablePtr table; + + if (size <= 0) + size = 256; + + table = xmlMalloc(sizeof(xmlHashTable)); + if (table) { + table->size = size; + table->nbElems = 0; + table->table = xmlMalloc(size * sizeof(xmlHashEntry)); + if (table->table) { + memset(table->table, 0, size * sizeof(xmlHashEntry)); + return(table); + } + xmlFree(table); + } + return(NULL); +} + +/** + * xmlHashFree: + * @table: the hash table + * @f: the deallocator function for items in the hash + * + * Free the hash table and its contents. The userdata is + * deallocated with f if provided. + */ +void +xmlHashFree(xmlHashTablePtr table, xmlHashDeallocator f) { + int i; + xmlHashEntryPtr iter; + xmlHashEntryPtr next; + + if (table == NULL) + return; + if (table->table) { + for(i = 0; i < table->size; i++) { + iter = table->table[i]; + while (iter) { + next = iter->next; + if (iter->name) + xmlFree(iter->name); + if (iter->name2) + xmlFree(iter->name2); + if (iter->name3) + xmlFree(iter->name3); + if (f) + f(iter->payload, iter->name); + iter->payload = NULL; + xmlFree(iter); + iter = next; + } + table->table[i] = NULL; + } + xmlFree(table->table); + } + xmlFree(table); +} + +/** + * xmlHashAddEntry: + * @table: the hash table + * @name: the name of the userdata + * @userdata: a pointer to the userdata + * + * Add the userdata to the hash table. This can later be retrieved + * by using the name. Duplicate names generate errors. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashAddEntry(xmlHashTablePtr table, const xmlChar *name, void *userdata) { + return(xmlHashAddEntry3(table, name, NULL, NULL, userdata)); +} + +/** + * xmlHashAddEntry2: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @userdata: a pointer to the userdata + * + * Add the userdata to the hash table. This can later be retrieved + * by using the (name, name2) tuple. Duplicate tuples generate errors. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashAddEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, void *userdata) { + return(xmlHashAddEntry3(table, name, name2, NULL, userdata)); +} + +/** + * xmlHashUpdateEntry: + * @table: the hash table + * @name: the name of the userdata + * @userdata: a pointer to the userdata + * @f: the deallocator function for replaced item (if any) + * + * Add the userdata to the hash table. This can later be retrieved + * by using the name. Existing entry for this name will be removed + * and freed with @f if found. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashUpdateEntry(xmlHashTablePtr table, const xmlChar *name, + void *userdata, xmlHashDeallocator f) { + return(xmlHashUpdateEntry3(table, name, NULL, NULL, userdata, f)); +} + +/** + * xmlHashUpdateEntry2: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @userdata: a pointer to the userdata + * @f: the deallocator function for replaced item (if any) + * + * Add the userdata to the hash table. This can later be retrieved + * by using the (name, name2) tuple. Existing entry for this tuple will + * be removed and freed with @f if found. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashUpdateEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, void *userdata, + xmlHashDeallocator f) { + return(xmlHashUpdateEntry3(table, name, name2, NULL, userdata, f)); +} + +/** + * xmlHashLookup: + * @table: the hash table + * @name: the name of the userdata + * + * Find the userdata specified by the name. + * + * Returns the a pointer to the userdata + */ +void * +xmlHashLookup(xmlHashTablePtr table, const xmlChar *name) { + return(xmlHashLookup3(table, name, NULL, NULL)); +} + +/** + * xmlHashLookup2: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * + * Find the userdata specified by the (name, name2) tuple. + * + * Returns the a pointer to the userdata + */ +void * +xmlHashLookup2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2) { + return(xmlHashLookup3(table, name, name2, NULL)); +} + +/** + * xmlHashAddEntry3: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @name3: a third name of the userdata + * @userdata: a pointer to the userdata + * + * Add the userdata to the hash table. This can later be retrieved + * by using the tuple (name, name2, name3). Duplicate entries generate + * errors. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashAddEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + void *userdata) { + unsigned long key; + xmlHashEntryPtr entry; + xmlHashEntryPtr insert; + + if ((table == NULL) || name == NULL) + return(-1); + + /* + * Check for duplicate and insertion location. + */ + key = xmlHashComputeKey(table, name); + if (table->table[key] == NULL) { + insert = NULL; + } else { + for (insert = table->table[key]; insert->next != NULL; + insert = insert->next) { + if ((xmlStrEqual(insert->name, name)) && + (xmlStrEqual(insert->name2, name2)) && + (xmlStrEqual(insert->name3, name3))) + return(-1); + } + if ((xmlStrEqual(insert->name, name)) && + (xmlStrEqual(insert->name2, name2)) && + (xmlStrEqual(insert->name3, name3))) + return(-1); + } + + entry = xmlMalloc(sizeof(xmlHashEntry)); + if (entry == NULL) + return(-1); + entry->name = xmlStrdup(name); + entry->name2 = xmlStrdup(name2); + entry->name3 = xmlStrdup(name3); + entry->payload = userdata; + entry->next = NULL; + + + if (insert == NULL) { + table->table[key] = entry; + } else { + insert->next = entry; + } + table->nbElems++; + return(0); +} + +/** + * xmlHashUpdateEntry3: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @name3: a third name of the userdata + * @userdata: a pointer to the userdata + * @f: the deallocator function for replaced item (if any) + * + * Add the userdata to the hash table. This can later be retrieved + * by using the tuple (name, name2, name3). Existing entry for this tuple + * will be removed and freed with @f if found. + * + * Returns 0 the addition succeeded and -1 in case of error. + */ +int +xmlHashUpdateEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + void *userdata, xmlHashDeallocator f) { + unsigned long key; + xmlHashEntryPtr entry; + xmlHashEntryPtr insert; + + if ((table == NULL) || name == NULL) + return(-1); + + /* + * Check for duplicate and insertion location. + */ + key = xmlHashComputeKey(table, name); + if (table->table[key] == NULL) { + insert = NULL; + } else { + for (insert = table->table[key]; insert->next != NULL; + insert = insert->next) { + if ((xmlStrEqual(insert->name, name)) && + (xmlStrEqual(insert->name2, name2)) && + (xmlStrEqual(insert->name3, name3))) { + if (f) + f(insert->payload, insert->name); + insert->payload = userdata; + return(0); + } + } + if ((xmlStrEqual(insert->name, name)) && + (xmlStrEqual(insert->name2, name2)) && + (xmlStrEqual(insert->name3, name3))) { + if (f) + f(insert->payload, insert->name); + insert->payload = userdata; + return(0); + } + } + + entry = xmlMalloc(sizeof(xmlHashEntry)); + if (entry == NULL) + return(-1); + entry->name = xmlStrdup(name); + entry->name2 = xmlStrdup(name2); + entry->name3 = xmlStrdup(name3); + entry->payload = userdata; + entry->next = NULL; + table->nbElems++; + + + if (insert == NULL) { + table->table[key] = entry; + } else { + insert->next = entry; + } + return(0); +} + +/** + * xmlHashLookup: + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @name3: a third name of the userdata + * + * Find the userdata specified by the (name, name2, name3) tuple. + * + * Returns the a pointer to the userdata + */ +void * +xmlHashLookup3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3) { + unsigned long key; + xmlHashEntryPtr entry; + + if (table == NULL) + return(NULL); + if (name == NULL) + return(NULL); + key = xmlHashComputeKey(table, name); + for (entry = table->table[key]; entry != NULL; entry = entry->next) { + if ((xmlStrEqual(entry->name, name)) && + (xmlStrEqual(entry->name2, name2)) && + (xmlStrEqual(entry->name3, name3))) + return(entry->payload); + } + return(NULL); +} + +/** + * xmlHashScan: + * @table: the hash table + * @f: the scanner function for items in the hash + * @data: extra data passed to f + * + * Scan the hash table and applied f to each value. + */ +void +xmlHashScan(xmlHashTablePtr table, xmlHashScanner f, void *data) { + int i; + xmlHashEntryPtr iter; + xmlHashEntryPtr next; + + if (table == NULL) + return; + if (f == NULL) + return; + + if (table->table) { + for(i = 0; i < table->size; i++) { + iter = table->table[i]; + while (iter) { + next = iter->next; + if (f) + f(iter->payload, data, iter->name); + iter = next; + } + } + } +} + +/** + * xmlHashScan3: + * @table: the hash table + * @name: the name of the userdata or NULL + * @name2: a second name of the userdata or NULL + * @name3: a third name of the userdata or NULL + * @f: the scanner function for items in the hash + * @data: extra data passed to f + * + * Scan the hash table and applied f to each value matching + * (name, name2, name3) tuple. If one of the names is null, + * the comparison is considered to match. + */ +void +xmlHashScan3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + xmlHashScanner f, void *data) { + int i; + xmlHashEntryPtr iter; + xmlHashEntryPtr next; + + if (table == NULL) + return; + if (f == NULL) + return; + + if (table->table) { + for(i = 0; i < table->size; i++) { + iter = table->table[i]; + while (iter) { + next = iter->next; + if (((name == NULL) || (xmlStrEqual(name, iter->name))) && + ((name2 == NULL) || (xmlStrEqual(name2, iter->name2))) && + ((name3 == NULL) || (xmlStrEqual(name3, iter->name3)))) { + f(iter->payload, data, iter->name); + } + iter = next; + } + } + } +} + +/** + * xmlHashCopy: + * @table: the hash table + * @f: the copier function for items in the hash + * + * Scan the hash table and applied f to each value. + * + * Returns the new table or NULL in case of error. + */ +xmlHashTablePtr +xmlHashCopy(xmlHashTablePtr table, xmlHashCopier f) { + int i; + xmlHashEntryPtr iter; + xmlHashEntryPtr next; + xmlHashTablePtr ret; + + if (table == NULL) + return(NULL); + if (f == NULL) + return(NULL); + + ret = xmlHashCreate(table->size); + if (table->table) { + for(i = 0; i < table->size; i++) { + iter = table->table[i]; + while (iter) { + next = iter->next; + xmlHashAddEntry3(ret, iter->name, iter->name2, + iter->name3, f(iter->payload, iter->name)); + iter = next; + } + } + } + ret->nbElems = table->nbElems; + return(ret); +} + +/** + * xmlHashSize: + * @table: the hash table + * + * Returns the number of elements in the hash table or + * -1 in case of error + */ +int +xmlHashSize(xmlHashTablePtr table) { + if (table == NULL) + return(-1); + return(table->nbElems); +} + +/** + * @table: the hash table + * @name: the name of the userdata + * @f: the deallocator function for removed item (if any) + * + * Find the userdata specified by the (name, name2, name3) tuple and remove + * it from the hash table. Existing userdata for this tuple will be removed + * and freed with @f. + * + * Returns 0 if the removal succeeded and -1 in case of error or not found. + */ +int xmlHashRemoveEntry(xmlHashTablePtr table, const xmlChar *name, + xmlHashDeallocator f) { + return(xmlHashRemoveEntry3(table, name, NULL, NULL, f)); +} + +/** + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @f: the deallocator function for removed item (if any) + * + * Find the userdata specified by the (name, name2, name3) tuple and remove + * it from the hash table. Existing userdata for this tuple will be removed + * and freed with @f. + * + * Returns 0 if the removal succeeded and -1 in case of error or not found. + */ +int xmlHashRemoveEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, xmlHashDeallocator f) { + return(xmlHashRemoveEntry3(table, name, name2, NULL, f)); +} + +/** + * @table: the hash table + * @name: the name of the userdata + * @name2: a second name of the userdata + * @name3: a third name of the userdata + * @f: the deallocator function for removed item (if any) + * + * Find the userdata specified by the (name, name2, name3) tuple and remove + * it from the hash table. Existing userdata for this tuple will be removed + * and freed with @f. + * + * Returns 0 if the removal succeeded and -1 in case of error or not found. + */ +int xmlHashRemoveEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + xmlHashDeallocator f) { + unsigned long key; + xmlHashEntryPtr entry; + xmlHashEntryPtr prev = NULL; + + if (table == NULL || name == NULL) + return(-1); + + key = xmlHashComputeKey(table, name); + if (table->table[key] == NULL) { + return(-1); + } else { + for (entry = table->table[key]; entry != NULL; entry = entry->next) { + if (xmlStrEqual(entry->name, name) && + xmlStrEqual(entry->name2, name2) && + xmlStrEqual(entry->name3, name3)) { + if(f) + f(entry->payload, entry->name); + entry->payload = NULL; + if(entry->name) + xmlFree(entry->name); + if(entry->name2) + xmlFree(entry->name2); + if(entry->name3) + xmlFree(entry->name3); + if(prev) + prev->next = entry->next; + else + table->table[key] = entry->next; + xmlFree(entry); + table->nbElems--; + return(0); + } + prev = entry; + } + return(-1); + } +} \ No newline at end of file diff --git a/hash.h b/hash.h new file mode 100644 index 00000000..c88fffd1 --- /dev/null +++ b/hash.h @@ -0,0 +1,129 @@ +/* + * hash.c: chained hash tables + * + * Copyright (C) 2000 Bjorn Reese and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: bjorn.reese@systematic.dk + */ + +#ifndef __XML_HASH_H__ +#define __XML_HASH_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The hash table + */ +typedef struct _xmlHashTable xmlHashTable; +typedef xmlHashTable *xmlHashTablePtr; + +/* + * function types: + */ +typedef void (*xmlHashDeallocator)(void *payload, xmlChar *name); +typedef void *(*xmlHashCopier)(void *payload, xmlChar *name); +typedef void *(*xmlHashScanner)(void *payload, void *data, xmlChar *name); + +/* + * Constructor and destructor + */ +xmlHashTablePtr xmlHashCreate (int size); +void xmlHashFree (xmlHashTablePtr table, + xmlHashDeallocator f); + +/* + * Add a new entry to the hash table + */ +int xmlHashAddEntry (xmlHashTablePtr table, + const xmlChar *name, + void *userdata); +int xmlHashUpdateEntry(xmlHashTablePtr table, + const xmlChar *name, + void *userdata, + xmlHashDeallocator f); +int xmlHashAddEntry2(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + void *userdata); +int xmlHashUpdateEntry2(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + void *userdata, + xmlHashDeallocator f); +int xmlHashAddEntry3(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + void *userdata); +int xmlHashUpdateEntry3(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + void *userdata, + xmlHashDeallocator f); + +/* + * Remove an entry from the hash table + */ +int xmlHashRemoveEntry(xmlHashTablePtr table, const xmlChar *name, + xmlHashDeallocator f); +int xmlHashRemoveEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, xmlHashDeallocator f); +int xmlHashRemoveEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + xmlHashDeallocator f); + +/* + * Retrieve the userdata + */ +void * xmlHashLookup (xmlHashTablePtr table, + const xmlChar *name); +void * xmlHashLookup2 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2); +void * xmlHashLookup3 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3); + +/* + * Helpers + */ +xmlHashTablePtr xmlHashCopy (xmlHashTablePtr table, + xmlHashCopier f); +int xmlHashSize (xmlHashTablePtr); +void xmlHashScan (xmlHashTablePtr table, + xmlHashScanner f, + void *data); +void xmlHashScan1 (xmlHashTablePtr table, + const xmlChar *name, + xmlHashScanner f, + void *data); +void xmlHashScan2 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + xmlHashScanner f, + void *data); +void xmlHashScan3 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + xmlHashScanner f, + void *data); +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_HASH_H__ */ diff --git a/include/.cvsignore b/include/.cvsignore new file mode 100644 index 00000000..70845e08 --- /dev/null +++ b/include/.cvsignore @@ -0,0 +1 @@ +Makefile.in diff --git a/include/Makefile.am b/include/Makefile.am new file mode 100644 index 00000000..f4878bed --- /dev/null +++ b/include/Makefile.am @@ -0,0 +1,33 @@ +## Process this file with automake to produce Makefile.in + +xmlincdir = $(includedir) + +xmlinc_HEADERS = \ + libxml/SAX.h \ + libxml/entities.h \ + libxml/encoding.h \ + libxml/parser.h \ + libxml/parserInternals.h \ + libxml/xmlerror.h \ + libxml/HTMLparser.h \ + libxml/HTMLtree.h \ + libxml/debugXML.h \ + libxml/tree.h \ + libxml/hash.h \ + libxml/xpath.h \ + libxml/xpathInternals.h \ + libxml/xpointer.h \ + libxml/xinclude.h \ + libxml/xmlIO.h \ + libxml/xmlmemory.h \ + libxml/nanohttp.h \ + libxml/nanoftp.h \ + libxml/uri.h \ + libxml/valid.h \ + libxml/xlink.h \ + libxml/xmlversion.h + +install-exec-hook: + $(mkinstalldirs) $(DESTDIR)$(xmlincdir) $(DESTDIR)$(xmlincdir)/libxml + +EXTRA_DIST = win32config.h libxml/xmlversion.h.in diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h new file mode 100644 index 00000000..c79ad09c --- /dev/null +++ b/include/libxml/HTMLparser.h @@ -0,0 +1,115 @@ +/* + * HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __HTML_PARSER_H__ +#define __HTML_PARSER_H__ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Most of the back-end structures from XML and HTML are shared + */ +typedef xmlParserCtxt htmlParserCtxt; +typedef xmlParserCtxtPtr htmlParserCtxtPtr; +typedef xmlParserNodeInfo htmlParserNodeInfo; +typedef xmlSAXHandler htmlSAXHandler; +typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; +typedef xmlParserInput htmlParserInput; +typedef xmlParserInputPtr htmlParserInputPtr; +typedef xmlDocPtr htmlDocPtr; +typedef xmlNodePtr htmlNodePtr; + +/* + * Internal description of an HTML element + */ +typedef struct _htmlElemDesc htmlElemDesc; +typedef htmlElemDesc *htmlElemDescPtr; +struct _htmlElemDesc { + const char *name; /* The tag name */ + char startTag; /* Whether the start tag can be implied */ + char endTag; /* Whether the end tag can be implied */ + char saveEndTag; /* Whether the end tag should be saved */ + char empty; /* Is this an empty element ? */ + char depr; /* Is this a deprecated element ? */ + char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ + const char *desc; /* the description */ +}; + +/* + * Internal description of an HTML entity + */ +typedef struct _htmlEntityDesc htmlEntityDesc; +typedef htmlEntityDesc *htmlEntityDescPtr; +struct _htmlEntityDesc { + int value; /* the UNICODE value for the character */ + const char *name; /* The entity name */ + const char *desc; /* the description */ +}; + +/* + * There is only few public functions. + */ +htmlElemDescPtr htmlTagLookup (const xmlChar *tag); +htmlEntityDescPtr htmlEntityLookup(const xmlChar *name); +htmlEntityDescPtr htmlEntityValueLookup(int value); + +int htmlIsAutoClosed(htmlDocPtr doc, + htmlNodePtr elem); +int htmlAutoCloseTag(htmlDocPtr doc, + const xmlChar *name, + htmlNodePtr elem); +htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt, + xmlChar **str); +int htmlParseCharRef(htmlParserCtxtPtr ctxt); +void htmlParseElement(htmlParserCtxtPtr ctxt); + +htmlDocPtr htmlSAXParseDoc (xmlChar *cur, + const char *encoding, + htmlSAXHandlerPtr sax, + void *userData); +htmlDocPtr htmlParseDoc (xmlChar *cur, + const char *encoding); +htmlDocPtr htmlSAXParseFile(const char *filename, + const char *encoding, + htmlSAXHandlerPtr sax, + void *userData); +htmlDocPtr htmlParseFile (const char *filename, + const char *encoding); +int UTF8ToHtml (unsigned char* out, + int *outlen, + const unsigned char* in, + int *inlen); +int htmlEncodeEntities(unsigned char* out, + int *outlen, + const unsigned char* in, + int *inlen, int quoteChar); +int htmlIsScriptAttribute(const xmlChar *name); +int htmlHandleOmittedElem(int val); + +/** + * Interfaces for the Push mode + */ +void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); +htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename, + xmlCharEncoding enc); +int htmlParseChunk (htmlParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); +#ifdef __cplusplus +} +#endif + +#endif /* __HTML_PARSER_H__ */ diff --git a/include/libxml/HTMLtree.h b/include/libxml/HTMLtree.h new file mode 100644 index 00000000..543d693d --- /dev/null +++ b/include/libxml/HTMLtree.h @@ -0,0 +1,61 @@ +/* + * tree.h : describes the structures found in an tree resulting + * from an XML parsing. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __HTML_TREE_H__ +#define __HTML_TREE_H__ + +#include +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +#define HTML_TEXT_NODE XML_TEXT_NODE +#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE +#define HTML_COMMENT_NODE XML_COMMENT_NODE +#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE + +htmlDocPtr htmlNewDoc (const xmlChar *URI, + const xmlChar *ExternalID); +htmlDocPtr htmlNewDocNoDtD (const xmlChar *URI, + const xmlChar *ExternalID); +const xmlChar * htmlGetMetaEncoding (htmlDocPtr doc); +int htmlSetMetaEncoding (htmlDocPtr doc, + const xmlChar *encoding); +void htmlDocDumpMemory (xmlDocPtr cur, + xmlChar**mem, + int *size); +int htmlDocDump (FILE *f, + xmlDocPtr cur); +int htmlSaveFile (const char *filename, + xmlDocPtr cur); +void htmlNodeDump (xmlBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur); +void htmlNodeDumpFile (FILE *out, + xmlDocPtr doc, + xmlNodePtr cur); +int htmlSaveFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding); + +/* This one is imported from xmlIO.h +void htmlDocContentDumpOutput(xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding); + */ +#ifdef __cplusplus +} +#endif + +#endif /* __HTML_TREE_H__ */ + diff --git a/include/libxml/SAX.h b/include/libxml/SAX.h new file mode 100644 index 00000000..4fabbdf1 --- /dev/null +++ b/include/libxml/SAX.h @@ -0,0 +1,120 @@ +/* + * SAX.h : Default SAX handler interfaces. + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + + +#ifndef __XML_SAX_H__ +#define __XML_SAX_H__ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +const xmlChar * getPublicId (void *ctx); +const xmlChar * getSystemId (void *ctx); +void setDocumentLocator (void *ctx, + xmlSAXLocatorPtr loc); + +int getLineNumber (void *ctx); +int getColumnNumber (void *ctx); + +int isStandalone (void *ctx); +int hasInternalSubset (void *ctx); +int hasExternalSubset (void *ctx); + +void internalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +void externalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlEntityPtr getEntity (void *ctx, + const xmlChar *name); +xmlEntityPtr getParameterEntity (void *ctx, + const xmlChar *name); +xmlParserInputPtr resolveEntity (void *ctx, + const xmlChar *publicId, + const xmlChar *systemId); + +void entityDecl (void *ctx, + const xmlChar *name, + int type, + const xmlChar *publicId, + const xmlChar *systemId, + xmlChar *content); +void attributeDecl (void *ctx, + const xmlChar *elem, + const xmlChar *name, + int type, + int def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +void elementDecl (void *ctx, + const xmlChar *name, + int type, + xmlElementContentPtr content); +void notationDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId); +void unparsedEntityDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId, + const xmlChar *notationName); + +void startDocument (void *ctx); +void endDocument (void *ctx); +void attribute (void *ctx, + const xmlChar *fullname, + const xmlChar *value); +void startElement (void *ctx, + const xmlChar *fullname, + const xmlChar **atts); +void endElement (void *ctx, + const xmlChar *name); +void reference (void *ctx, + const xmlChar *name); +void characters (void *ctx, + const xmlChar *ch, + int len); +void ignorableWhitespace (void *ctx, + const xmlChar *ch, + int len); +void processingInstruction (void *ctx, + const xmlChar *target, + const xmlChar *data); +void globalNamespace (void *ctx, + const xmlChar *href, + const xmlChar *prefix); +void setNamespace (void *ctx, + const xmlChar *name); +xmlNsPtr getNamespace (void *ctx); +int checkNamespace (void *ctx, + xmlChar *nameSpace); +void namespaceDecl (void *ctx, + const xmlChar *href, + const xmlChar *prefix); +void comment (void *ctx, + const xmlChar *value); +void cdataBlock (void *ctx, + const xmlChar *value, + int len); + +void xmlDefaultSAXHandlerInit (void); +void htmlDefaultSAXHandlerInit (void); +void sgmlDefaultSAXHandlerInit (void); +#ifdef __cplusplus +} +#endif +#endif /* __XML_SAX_H__ */ diff --git a/include/libxml/debugXML.h b/include/libxml/debugXML.h new file mode 100644 index 00000000..4a55fa8d --- /dev/null +++ b/include/libxml/debugXML.h @@ -0,0 +1,113 @@ +/* + * debugXML.h : Interfaces to a set of routines used for debugging the tree + * produced by the XML parser. + * + * Daniel Veillard + */ + +#ifndef __DEBUG_XML__ +#define __DEBUG_XML__ +#include +#include + +#ifdef LIBXML_DEBUG_ENABLED + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The standard Dump routines + */ +void xmlDebugDumpString (FILE *output, + const xmlChar *str); +void xmlDebugDumpAttr (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpAttrList (FILE *output, + xmlAttrPtr attr, + int depth); +void xmlDebugDumpOneNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNode (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpNodeList (FILE *output, + xmlNodePtr node, + int depth); +void xmlDebugDumpDocumentHead(FILE *output, + xmlDocPtr doc); +void xmlDebugDumpDocument (FILE *output, + xmlDocPtr doc); +void xmlDebugDumpDTD (FILE *output, + xmlDtdPtr doc); +void xmlDebugDumpEntities (FILE *output, + xmlDocPtr doc); +void xmlLsOneNode (FILE *output, + xmlNodePtr node); + +/**************************************************************** + * * + * The XML shell related structures and functions * + * * + ****************************************************************/ + +/** + * xmlShellReadlineFunc: + * @prompt: a string prompt + * + * This is a generic signature for the XML shell input function + * + * Returns a string which will be freed by the Shell + */ +typedef char * (* xmlShellReadlineFunc)(char *prompt); + +/* + * The shell context itself + * TODO: add the defined function tables. + */ +typedef struct _xmlShellCtxt xmlShellCtxt; +typedef xmlShellCtxt *xmlShellCtxtPtr; +struct _xmlShellCtxt { + char *filename; + xmlDocPtr doc; + xmlNodePtr node; + xmlXPathContextPtr pctxt; + int loaded; + FILE *output; + xmlShellReadlineFunc input; +}; + +/** + * xmlShellCmd: + * @ctxt: a shell context + * @arg: a string argument + * @node: a first node + * @node2: a second node + * + * This is a generic signature for the XML shell functions + * + * Returns an int, negative returns indicating errors + */ +typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); + +/* + * The Shell interface. + */ +void xmlShell (xmlDocPtr doc, + char *filename, + xmlShellReadlineFunc input, + FILE *output); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_DEBUG_ENABLED */ +#endif /* __DEBUG_XML__ */ diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h new file mode 100644 index 00000000..62e81e3d --- /dev/null +++ b/include/libxml/encoding.h @@ -0,0 +1,187 @@ +/* + * encoding.h : interface for the encoding conversion functions needed for + * XML + * + * Related specs: + * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies + * [ISO-10646] UTF-8 and UTF-16 in Annexes + * [ISO-8859-1] ISO Latin-1 characters codes. + * [UNICODE] The Unicode Consortium, "The Unicode Standard -- + * Worldwide Character Encoding -- Version 1.0", Addison- + * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is + * described in Unicode Technical Report #4. + * [US-ASCII] Coded Character Set--7-bit American Standard Code for + * Information Interchange, ANSI X3.4-1986. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_CHAR_ENCODING_H__ +#define __XML_CHAR_ENCODING_H__ + +#include +#ifdef LIBXML_ICONV_ENABLED +#include +#endif +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Predefined values for some standard encodings + * Libxml don't do beforehand translation on UTF8, ISOLatinX + * It also support UTF16 (LE and BE) by default. + * + * Anything else would have to be translated to UTF8 before being + * given to the parser itself. The BOM for UTF16 and the encoding + * declaration are looked at and a converter is looked for at that + * point. If not found the parser stops here as asked by the XML REC + * Converter can be registered by the user using xmlRegisterCharEncodingHandler + * but the currentl form doesn't allow stateful transcoding (a serious + * problem agreed !). If iconv has been found it will be used + * automatically and allow stateful transcoding, the simplest is then + * to be sure to enable icon and to provide iconv libs for the encoding + * support needed. + */ +typedef enum { + XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ + XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ + XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ + XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ + XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ + XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ + XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ + XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ + XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ + XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ + XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ + XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ + XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ + XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ + XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ + XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ + XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ + XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ + XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ + XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ + XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ + XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ + XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ + XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ +} xmlCharEncoding; + +/** + * xmlCharEncodingInputFunc: + * @out: a pointer ot an array of bytes to store the UTF-8 result + * @outlen: the lenght of @out + * @in: a pointer ot an array of chars in the original encoding + * @inlen: the lenght of @in + * + * Take a block of chars in the original encoding and try to convert + * it to an UTF-8 block of chars out. + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen); + + +/** + * xmlCharEncodingOutputFunc: + * @out: a pointer ot an array of bytes to store the result + * @outlen: the lenght of @out + * @in: a pointer ot an array of UTF-8 chars + * @inlen: the lenght of @in + * + * Take a block of UTF-8 chars in and try to convert it to an other + * encoding. + * Note: a first call designed to produce heading info is called with + * in = NULL. If stateful this should also initialize the encoder state + * + * Returns the number of byte written, or -1 by lack of space, or -2 + * if the transcoding failed. + * The value of @inlen after return is the number of octets consumed + * as the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of ocetes consumed. + */ +typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen, + const unsigned char* in, int *inlen); + + +/* + * Block defining the handlers for non UTF-8 encodings. + * If iconv is supported, there is two extra fields + */ + +typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; +typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; +struct _xmlCharEncodingHandler { + char *name; + xmlCharEncodingInputFunc input; + xmlCharEncodingOutputFunc output; +#ifdef LIBXML_ICONV_ENABLED + iconv_t iconv_in; + iconv_t iconv_out; +#endif /* LIBXML_ICONV_ENABLED */ +}; + +/* + * Interfaces for encoding handlers + */ +void xmlInitCharEncodingHandlers (void); +void xmlCleanupCharEncodingHandlers (void); +void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); +xmlCharEncodingHandlerPtr + xmlGetCharEncodingHandler (xmlCharEncoding enc); +xmlCharEncodingHandlerPtr + xmlFindCharEncodingHandler (const char *name); + + +/* + * Interfaces for encoding names and aliases + */ +int xmlAddEncodingAlias (const char *name, + const char *alias); +int xmlDelEncodingAlias (const char *alias); +const char * + xmlGetEncodingAlias (const char *alias); +void xmlCleanupEncodingAliases (void); +xmlCharEncoding + xmlParseCharEncoding (const char* name); +const char* + xmlGetCharEncodingName (xmlCharEncoding enc); + +/* + * Interfaces directly used by the parsers. + */ +xmlCharEncoding + xmlDetectCharEncoding (const unsigned char* in, + int len); + +int xmlCheckUTF8 (const unsigned char *utf); + +int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); + +int xmlCharEncInFunc (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); +int xmlCharEncFirstLine (xmlCharEncodingHandler *handler, + xmlBufferPtr out, + xmlBufferPtr in); +int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_CHAR_ENCODING_H__ */ diff --git a/include/libxml/entities.h b/include/libxml/entities.h new file mode 100644 index 00000000..305d043c --- /dev/null +++ b/include/libxml/entities.h @@ -0,0 +1,114 @@ +/* + * entities.h : interface for the XML entities handking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_ENTITIES_H__ +#define __XML_ENTITIES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The different valid entity types + */ +typedef enum { + XML_INTERNAL_GENERAL_ENTITY = 1, + XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2, + XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3, + XML_INTERNAL_PARAMETER_ENTITY = 4, + XML_EXTERNAL_PARAMETER_ENTITY = 5, + XML_INTERNAL_PREDEFINED_ENTITY = 6 +} xmlEntityType; + +/* + * An unit of storage for an entity, contains the string, the value + * and the linkind data needed for the linking in the hash table. + */ + +typedef struct _xmlEntity xmlEntity; +typedef xmlEntity *xmlEntityPtr; +struct _xmlEntity { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_ENTITY_DECL, must be second ! */ + const xmlChar *name; /* Attribute name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + xmlChar *orig; /* content without ref substitution */ + xmlChar *content; /* content or ndata if unparsed */ + int length; /* the content length */ + xmlEntityType etype; /* The entity type */ + const xmlChar *ExternalID; /* External identifier for PUBLIC */ + const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ + + struct _xmlEntity *nexte; /* unused */ + const xmlChar *URI; /* the full URI as computed */ +}; + +/* + * ALl entities are stored in an hash table + * there is 2 separate hash tables for global and parmeter entities + */ + +typedef struct _xmlHashTable xmlEntitiesTable; +typedef xmlEntitiesTable *xmlEntitiesTablePtr; + +/* + * External functions : + */ + +void xmlInitializePredefinedEntities (void); +xmlEntityPtr xmlAddDocEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +xmlEntityPtr xmlAddDtdEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +xmlEntityPtr xmlGetPredefinedEntity (const xmlChar *name); +xmlEntityPtr xmlGetDocEntity (xmlDocPtr doc, + const xmlChar *name); +xmlEntityPtr xmlGetDtdEntity (xmlDocPtr doc, + const xmlChar *name); +xmlEntityPtr xmlGetParameterEntity (xmlDocPtr doc, + const xmlChar *name); +const xmlChar * xmlEncodeEntities (xmlDocPtr doc, + const xmlChar *input); +xmlChar * xmlEncodeEntitiesReentrant(xmlDocPtr doc, + const xmlChar *input); +xmlChar * xmlEncodeSpecialChars (xmlDocPtr doc, + const xmlChar *input); +xmlEntitiesTablePtr xmlCreateEntitiesTable (void); +xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table); +void xmlFreeEntitiesTable (xmlEntitiesTablePtr table); +void xmlDumpEntitiesTable (xmlBufferPtr buf, + xmlEntitiesTablePtr table); +void xmlDumpEntityDecl (xmlBufferPtr buf, + xmlEntityPtr ent); +xmlEntitiesTablePtr xmlCopyEntitiesTable (xmlEntitiesTablePtr table); +void xmlCleanupPredefinedEntities(void); + + +#ifdef __cplusplus +} +#endif + +# endif /* __XML_ENTITIES_H__ */ diff --git a/include/libxml/hash.h b/include/libxml/hash.h new file mode 100644 index 00000000..c88fffd1 --- /dev/null +++ b/include/libxml/hash.h @@ -0,0 +1,129 @@ +/* + * hash.c: chained hash tables + * + * Copyright (C) 2000 Bjorn Reese and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: bjorn.reese@systematic.dk + */ + +#ifndef __XML_HASH_H__ +#define __XML_HASH_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The hash table + */ +typedef struct _xmlHashTable xmlHashTable; +typedef xmlHashTable *xmlHashTablePtr; + +/* + * function types: + */ +typedef void (*xmlHashDeallocator)(void *payload, xmlChar *name); +typedef void *(*xmlHashCopier)(void *payload, xmlChar *name); +typedef void *(*xmlHashScanner)(void *payload, void *data, xmlChar *name); + +/* + * Constructor and destructor + */ +xmlHashTablePtr xmlHashCreate (int size); +void xmlHashFree (xmlHashTablePtr table, + xmlHashDeallocator f); + +/* + * Add a new entry to the hash table + */ +int xmlHashAddEntry (xmlHashTablePtr table, + const xmlChar *name, + void *userdata); +int xmlHashUpdateEntry(xmlHashTablePtr table, + const xmlChar *name, + void *userdata, + xmlHashDeallocator f); +int xmlHashAddEntry2(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + void *userdata); +int xmlHashUpdateEntry2(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + void *userdata, + xmlHashDeallocator f); +int xmlHashAddEntry3(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + void *userdata); +int xmlHashUpdateEntry3(xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + void *userdata, + xmlHashDeallocator f); + +/* + * Remove an entry from the hash table + */ +int xmlHashRemoveEntry(xmlHashTablePtr table, const xmlChar *name, + xmlHashDeallocator f); +int xmlHashRemoveEntry2(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, xmlHashDeallocator f); +int xmlHashRemoveEntry3(xmlHashTablePtr table, const xmlChar *name, + const xmlChar *name2, const xmlChar *name3, + xmlHashDeallocator f); + +/* + * Retrieve the userdata + */ +void * xmlHashLookup (xmlHashTablePtr table, + const xmlChar *name); +void * xmlHashLookup2 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2); +void * xmlHashLookup3 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3); + +/* + * Helpers + */ +xmlHashTablePtr xmlHashCopy (xmlHashTablePtr table, + xmlHashCopier f); +int xmlHashSize (xmlHashTablePtr); +void xmlHashScan (xmlHashTablePtr table, + xmlHashScanner f, + void *data); +void xmlHashScan1 (xmlHashTablePtr table, + const xmlChar *name, + xmlHashScanner f, + void *data); +void xmlHashScan2 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + xmlHashScanner f, + void *data); +void xmlHashScan3 (xmlHashTablePtr table, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + xmlHashScanner f, + void *data); +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_HASH_H__ */ diff --git a/include/libxml/list.h b/include/libxml/list.h new file mode 100644 index 00000000..a708ef20 --- /dev/null +++ b/include/libxml/list.h @@ -0,0 +1,81 @@ +/* + * list.h: lists interfaces + * + * Copyright (C) 2000 Gary Pennington and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: Gary.Pennington@uk.sun.com + */ + +typedef struct _xmlLink xmlLink; +typedef xmlLink *xmlLinkPtr; + +typedef struct _xmlList xmlList; +typedef xmlList *xmlListPtr; + +typedef void (*xmlListDeallocator) (xmlLinkPtr lk); +typedef int (*xmlListDataCompare) (const void *data0, const void *data1); +typedef int (*xmlListWalker) (const void *data, const void *user); + +/* Creation/Deletion */ +xmlListPtr xmlListCreate (xmlListDeallocator deallocator, + xmlListDataCompare compare); +void xmlListDelete (xmlListPtr l); + +/* Basic Operators */ +void * xmlListSearch (xmlListPtr l, + void *data); +void * xmlListReverseSearch (xmlListPtr l, + void *data); +int xmlListInsert (xmlListPtr l, + void *data) ; +int xmlListAppend (xmlListPtr l, + void *data) ; +int xmlListRemoveFirst (xmlListPtr l, + void *data); +int xmlListRemoveLast (xmlListPtr l, + void *data); +int xmlListRemoveAll (xmlListPtr l, + void *data); +void xmlListClear (xmlListPtr l); +int xmlListEmpty (xmlListPtr l); +xmlLinkPtr xmlListFront (xmlListPtr l); +xmlLinkPtr xmlListEnd (xmlListPtr l); +int xmlListSize (xmlListPtr l); + +void xmlListPopFront (xmlListPtr l); +void xmlListPopBack (xmlListPtr l); +int xmlListPushFront (xmlListPtr l, + void *data); +int xmlListPushBack (xmlListPtr l, + void *data); + +/* Advanced Operators */ +void xmlListReverse (xmlListPtr l); +void xmlListSort (xmlListPtr l); +void xmlListWalk (xmlListPtr l, + xmlListWalker walker, + const void *user); +void xmlListReverseWalk (xmlListPtr l, + xmlListWalker walker, + const void *user); +void xmlListMerge (xmlListPtr l1, + xmlListPtr l2); +xmlListPtr xmlListDup (const xmlListPtr old); +int xmlListCopy (xmlListPtr cur, + const xmlListPtr old); +/* Link operators */ +void * xmlLinkGetData (xmlLinkPtr lk); + +/* xmlListUnique() */ +/* xmlListSwap */ + + diff --git a/include/libxml/nanoftp.h b/include/libxml/nanoftp.h new file mode 100644 index 00000000..53465280 --- /dev/null +++ b/include/libxml/nanoftp.h @@ -0,0 +1,110 @@ +/* + * nanohttp.c: minimalist FTP implementation to fetch external subsets. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __NANO_FTP_H__ +#define __NANO_FTP_H__ + +#include +#ifdef LIBXML_FTP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ftpListCallback: + * @userData: user provided data for the callback + * @filename: the file name (including "->" when links are shown) + * @attrib: the attribute string + * @owner: the owner string + * @group: the group string + * @size: the file size + * @links: the link count + * @year: the year + * @month: the month + * @day: the day + * @hour: the hour + * @minute: the minute + * + * A callback for the xmlNanoFTPList command + * Note that only one of year and day:minute are specified + */ +typedef void (*ftpListCallback) (void *userData, + const char *filename, const char* attrib, + const char *owner, const char *group, + unsigned long size, int links, int year, + const char *month, int day, int hour, + int minute); +/** + * ftpDataCallback: + * A callback for the xmlNanoFTPGet command + */ +typedef void (*ftpDataCallback) (void *userData, const char *data, int len); + +/* + * Init + */ +void xmlNanoFTPInit (void); +void xmlNanoFTPCleanup (void); + +/* + * Creating/freeing contexts + */ +void * xmlNanoFTPNewCtxt (const char *URL); +void xmlNanoFTPFreeCtxt (void * ctx); +void * xmlNanoFTPConnectTo (const char *server, + int port); +/* + * Opening/closing session connections + */ +void * xmlNanoFTPOpen (const char *URL); +int xmlNanoFTPConnect (void *ctx); +int xmlNanoFTPClose (void *ctx); +int xmlNanoFTPQuit (void *ctx); +void xmlNanoFTPScanProxy (const char *URL); +void xmlNanoFTPProxy (const char *host, + int port, + const char *user, + const char *passwd, + int type); +int xmlNanoFTPUpdateURL (void *ctx, + const char *URL); + +/* + * Rathern internal commands + */ +int xmlNanoFTPGetResponse (void *ctx); +int xmlNanoFTPCheckResponse (void *ctx); + +/* + * CD/DIR/GET handlers + */ +int xmlNanoFTPCwd (void *ctx, + char *directory); + +int xmlNanoFTPGetConnection (void *ctx); +int xmlNanoFTPCloseConnection(void *ctx); +int xmlNanoFTPList (void *ctx, + ftpListCallback callback, + void *userData, + char *filename); +int xmlNanoFTPGetSocket (void *ctx, + const char *filename); +int xmlNanoFTPGet (void *ctx, + ftpDataCallback callback, + void *userData, + const char *filename); +int xmlNanoFTPRead (void *ctx, + void *dest, + int len); + +#ifdef __cplusplus +} +#endif /* LIBXML_FTP_ENABLED */ +#endif +#endif /* __NANO_FTP_H__ */ diff --git a/include/libxml/nanohttp.h b/include/libxml/nanohttp.h new file mode 100644 index 00000000..78d1c44d --- /dev/null +++ b/include/libxml/nanohttp.h @@ -0,0 +1,44 @@ +/* + * nanohttp.c: minimalist HTTP implementation to fetch external subsets. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __NANO_HTTP_H__ +#define __NANO_HTTP_H__ + +#include +#ifdef LIBXML_HTTP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif +void xmlNanoHTTPInit (void); +void xmlNanoHTTPCleanup (void); +void xmlNanoHTTPScanProxy (const char *URL); +int xmlNanoHTTPFetch (const char *URL, + const char *filename, + char **contentType); +void * xmlNanoHTTPMethod (const char *URL, + const char *method, + const char *input, + char **contentType, + const char *headers); +void * xmlNanoHTTPOpen (const char *URL, + char **contentType); +int xmlNanoHTTPReturnCode (void *ctx); +const char * xmlNanoHTTPAuthHeader(void *ctx); +int xmlNanoHTTPRead (void *ctx, + void *dest, + int len); +int xmlNanoHTTPSave (void *ctxt, + const char *filename); +void xmlNanoHTTPClose (void *ctx); +#ifdef __cplusplus +} + +#endif /* LIBXML_HTTP_ENABLED */ +#endif +#endif /* __NANO_HTTP_H__ */ diff --git a/include/libxml/parser.h b/include/libxml/parser.h new file mode 100644 index 00000000..b98f2a33 --- /dev/null +++ b/include/libxml/parser.h @@ -0,0 +1,527 @@ +/* + * parser.h : Interfaces, constants and types related to the XML parser. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_PARSER_H__ +#define __XML_PARSER_H__ + +#include +#include +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Constants. + */ +#define XML_DEFAULT_VERSION "1.0" + +/** + * an xmlParserInput is an input flow for the XML processor. + * Each entity parsed is associated an xmlParserInput (except the + * few predefined ones). This is the case both for internal entities + * - in which case the flow is already completely in memory - or + * external entities - in which case we use the buf structure for + * progressive reading and I18N conversions to the internal UTF-8 format. + */ + +typedef void (* xmlParserInputDeallocate)(xmlChar *); +typedef struct _xmlParserInput xmlParserInput; +typedef xmlParserInput *xmlParserInputPtr; +struct _xmlParserInput { + /* Input buffer */ + xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ + + const char *filename; /* The file analyzed, if any */ + const char *directory; /* the directory/base of teh file */ + const xmlChar *base; /* Base of the array to parse */ + const xmlChar *cur; /* Current char being parsed */ + int length; /* length if known */ + int line; /* Current line */ + int col; /* Current column */ + int consumed; /* How many xmlChars already consumed */ + xmlParserInputDeallocate free; /* function to deallocate the base */ + const xmlChar *encoding; /* the encoding string for entity */ + const xmlChar *version; /* the version string for entity */ + int standalone; /* Was that entity marked standalone */ +}; + +/** + * the parser can be asked to collect Node informations, i.e. at what + * place in the file they were detected. + * NOTE: This is off by default and not very well tested. + */ +typedef struct _xmlParserNodeInfo xmlParserNodeInfo; +typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; + +struct _xmlParserNodeInfo { + const struct _xmlNode* node; + /* Position & line # that text that created the node begins & ends on */ + unsigned long begin_pos; + unsigned long begin_line; + unsigned long end_pos; + unsigned long end_line; +}; + +typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; +typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; +struct _xmlParserNodeInfoSeq { + unsigned long maximum; + unsigned long length; + xmlParserNodeInfo* buffer; +}; + +/** + * The parser is now working also as a state based parser + * The recursive one use the stagte info for entities processing + */ +typedef enum { + XML_PARSER_EOF = -1, /* nothing is to be parsed */ + XML_PARSER_START = 0, /* nothing has been parsed */ + XML_PARSER_MISC, /* Misc* before int subset */ + XML_PARSER_PI, /* Whithin a processing instruction */ + XML_PARSER_DTD, /* within some DTD content */ + XML_PARSER_PROLOG, /* Misc* after internal subset */ + XML_PARSER_COMMENT, /* within a comment */ + XML_PARSER_START_TAG, /* within a start tag */ + XML_PARSER_CONTENT, /* within the content */ + XML_PARSER_CDATA_SECTION, /* within a CDATA section */ + XML_PARSER_END_TAG, /* within a closing tag */ + XML_PARSER_ENTITY_DECL, /* within an entity declaration */ + XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ + XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ + XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ + XML_PARSER_EPILOG, /* the Misc* after the last end tag */ + XML_PARSER_IGNORE /* within an IGNORED section */ +} xmlParserInputState; + +/** + * The parser context. + * NOTE This doesn't completely defines the parser state, the (current ?) + * design of the parser uses recursive function calls since this allow + * and easy mapping from the production rules of the specification + * to the actual code. The drawback is that the actual function call + * also reflect the parser state. However most of the parsing routines + * takes as the only argument the parser context pointer, so migrating + * to a state based parser for progressive parsing shouldn't be too hard. + */ +typedef struct _xmlParserCtxt xmlParserCtxt; +typedef xmlParserCtxt *xmlParserCtxtPtr; +struct _xmlParserCtxt { + struct _xmlSAXHandler *sax; /* The SAX handler */ + void *userData; /* For SAX interface only, used by DOM build */ + xmlDocPtr myDoc; /* the document being built */ + int wellFormed; /* is the document well formed */ + int replaceEntities; /* shall we replace entities ? */ + const xmlChar *version; /* the XML version string */ + const xmlChar *encoding; /* the declared encoding, if any */ + int standalone; /* standalone document */ + int html; /* an HTML(1)/Docbook(2) document */ + + /* Input stream stack */ + xmlParserInputPtr input; /* Current input stream */ + int inputNr; /* Number of current input streams */ + int inputMax; /* Max number of input streams */ + xmlParserInputPtr *inputTab; /* stack of inputs */ + + /* Node analysis stack only used for DOM building */ + xmlNodePtr node; /* Current parsed Node */ + int nodeNr; /* Depth of the parsing stack */ + int nodeMax; /* Max depth of the parsing stack */ + xmlNodePtr *nodeTab; /* array of nodes */ + + int record_info; /* Whether node info should be kept */ + xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ + + int errNo; /* error code */ + + int hasExternalSubset; /* reference and external subset */ + int hasPErefs; /* the internal subset has PE refs */ + int external; /* are we parsing an external entity */ + + int valid; /* is the document valid */ + int validate; /* shall we try to validate ? */ + xmlValidCtxt vctxt; /* The validity context */ + + xmlParserInputState instate; /* current type of input */ + int token; /* next char look-ahead */ + + char *directory; /* the data directory */ + + /* Node name stack */ + xmlChar *name; /* Current parsed Node */ + int nameNr; /* Depth of the parsing stack */ + int nameMax; /* Max depth of the parsing stack */ + xmlChar * *nameTab; /* array of nodes */ + + long nbChars; /* number of xmlChar processed */ + long checkIndex; /* used by progressive parsing lookup */ + int keepBlanks; /* ugly but ... */ + int disableSAX; /* SAX callbacks are disabled */ + int inSubset; /* Parsing is in int 1/ext 2 subset */ + xmlChar * intSubName; /* name of subset */ + xmlChar * extSubURI; /* URI of external subset */ + xmlChar * extSubSystem; /* SYSTEM ID of external subset */ + + /* xml:space values */ + int * space; /* Should the parser preserve spaces */ + int spaceNr; /* Depth of the parsing stack */ + int spaceMax; /* Max depth of the parsing stack */ + int * spaceTab; /* array of space infos */ + + int depth; /* to prevent entity substitution loops */ + xmlParserInputPtr entity; /* used to check entities boundaries */ + int charset; /* encoding of the in-memory content + actually an xmlCharEncoding */ + int nodelen; /* Those two fields are there to */ + int nodemem; /* Speed up large node parsing */ + int pedantic; /* signal pedantic warnings */ + void *_private; /* For user data, libxml won't touch it */ + + int loadsubset; /* should the external subset be loaded */ +}; + +/** + * a SAX Locator. + */ +typedef struct _xmlSAXLocator xmlSAXLocator; +typedef xmlSAXLocator *xmlSAXLocatorPtr; +struct _xmlSAXLocator { + const xmlChar *(*getPublicId)(void *ctx); + const xmlChar *(*getSystemId)(void *ctx); + int (*getLineNumber)(void *ctx); + int (*getColumnNumber)(void *ctx); +}; + +/** + * a SAX handler is bunch of callbacks called by the parser when processing + * of the input generate data or structure informations. + */ + +typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, + const xmlChar *publicId, const xmlChar *systemId); +typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID); +typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID); +typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, + const xmlChar *name); +typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, + const xmlChar *name); +typedef void (*entityDeclSAXFunc) (void *ctx, + const xmlChar *name, int type, const xmlChar *publicId, + const xmlChar *systemId, xmlChar *content); +typedef void (*notationDeclSAXFunc)(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId); +typedef void (*attributeDeclSAXFunc)(void *ctx, const xmlChar *elem, + const xmlChar *name, int type, int def, + const xmlChar *defaultValue, xmlEnumerationPtr tree); +typedef void (*elementDeclSAXFunc)(void *ctx, const xmlChar *name, + int type, xmlElementContentPtr content); +typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, + const xmlChar *name, const xmlChar *publicId, + const xmlChar *systemId, const xmlChar *notationName); +typedef void (*setDocumentLocatorSAXFunc) (void *ctx, + xmlSAXLocatorPtr loc); +typedef void (*startDocumentSAXFunc) (void *ctx); +typedef void (*endDocumentSAXFunc) (void *ctx); +typedef void (*startElementSAXFunc) (void *ctx, const xmlChar *name, + const xmlChar **atts); +typedef void (*endElementSAXFunc) (void *ctx, const xmlChar *name); +typedef void (*attributeSAXFunc) (void *ctx, const xmlChar *name, + const xmlChar *value); +typedef void (*referenceSAXFunc) (void *ctx, const xmlChar *name); +typedef void (*charactersSAXFunc) (void *ctx, const xmlChar *ch, + int len); +typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, + const xmlChar *ch, int len); +typedef void (*processingInstructionSAXFunc) (void *ctx, + const xmlChar *target, const xmlChar *data); +typedef void (*commentSAXFunc) (void *ctx, const xmlChar *value); +typedef void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len); +typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...); +typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...); +typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...); +typedef int (*isStandaloneSAXFunc) (void *ctx); +typedef int (*hasInternalSubsetSAXFunc) (void *ctx); +typedef int (*hasExternalSubsetSAXFunc) (void *ctx); + +typedef struct _xmlSAXHandler xmlSAXHandler; +typedef xmlSAXHandler *xmlSAXHandlerPtr; +struct _xmlSAXHandler { + internalSubsetSAXFunc internalSubset; + isStandaloneSAXFunc isStandalone; + hasInternalSubsetSAXFunc hasInternalSubset; + hasExternalSubsetSAXFunc hasExternalSubset; + resolveEntitySAXFunc resolveEntity; + getEntitySAXFunc getEntity; + entityDeclSAXFunc entityDecl; + notationDeclSAXFunc notationDecl; + attributeDeclSAXFunc attributeDecl; + elementDeclSAXFunc elementDecl; + unparsedEntityDeclSAXFunc unparsedEntityDecl; + setDocumentLocatorSAXFunc setDocumentLocator; + startDocumentSAXFunc startDocument; + endDocumentSAXFunc endDocument; + startElementSAXFunc startElement; + endElementSAXFunc endElement; + referenceSAXFunc reference; + charactersSAXFunc characters; + ignorableWhitespaceSAXFunc ignorableWhitespace; + processingInstructionSAXFunc processingInstruction; + commentSAXFunc comment; + warningSAXFunc warning; + errorSAXFunc error; + fatalErrorSAXFunc fatalError; + getParameterEntitySAXFunc getParameterEntity; + cdataBlockSAXFunc cdataBlock; + externalSubsetSAXFunc externalSubset; +}; + +/** + * External entity loaders types + */ +typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL, + const char *ID, + xmlParserCtxtPtr context); + +/** + * Global variables: just the default SAX interface tables and XML + * version infos. + */ +LIBXML_DLL_IMPORT extern const char *xmlParserVersion; + +LIBXML_DLL_IMPORT extern xmlSAXLocator xmlDefaultSAXLocator; +LIBXML_DLL_IMPORT extern xmlSAXHandler xmlDefaultSAXHandler; +LIBXML_DLL_IMPORT extern xmlSAXHandler htmlDefaultSAXHandler; +LIBXML_DLL_IMPORT extern xmlSAXHandler sgmlDefaultSAXHandler; + +/** + * entity substitution default behaviour. + */ + +#ifdef VMS +LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultVal; +#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal +#else +LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue; +#endif +LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue; + + +/** + * Init/Cleanup + */ +void xmlInitParser (void); +void xmlCleanupParser (void); + +/** + * Input functions + */ +int xmlParserInputRead (xmlParserInputPtr in, + int len); +int xmlParserInputGrow (xmlParserInputPtr in, + int len); + +/** + * xmlChar handling + */ +xmlChar * xmlStrdup (const xmlChar *cur); +xmlChar * xmlStrndup (const xmlChar *cur, + int len); +xmlChar * xmlStrsub (const xmlChar *str, + int start, + int len); +const xmlChar * xmlStrchr (const xmlChar *str, + xmlChar val); +const xmlChar * xmlStrstr (const xmlChar *str, + xmlChar *val); +const xmlChar * xmlStrcasestr (const xmlChar *str, + xmlChar *val); +int xmlStrcmp (const xmlChar *str1, + const xmlChar *str2); +int xmlStrncmp (const xmlChar *str1, + const xmlChar *str2, + int len); +int xmlStrcasecmp (const xmlChar *str1, + const xmlChar *str2); +int xmlStrncasecmp (const xmlChar *str1, + const xmlChar *str2, + int len); +int xmlStrEqual (const xmlChar *str1, + const xmlChar *str2); +int xmlStrlen (const xmlChar *str); +xmlChar * xmlStrcat (xmlChar *cur, + const xmlChar *add); +xmlChar * xmlStrncat (xmlChar *cur, + const xmlChar *add, + int len); + +/** + * Basic parsing Interfaces + */ +xmlDocPtr xmlParseDoc (xmlChar *cur); +xmlDocPtr xmlParseMemory (char *buffer, + int size); +xmlDocPtr xmlParseFile (const char *filename); +int xmlSubstituteEntitiesDefault(int val); +int xmlKeepBlanksDefault (int val); +void xmlStopParser (xmlParserCtxtPtr ctxt); +int xmlPedanticParserDefault(int val); + +/** + * Recovery mode + */ +xmlDocPtr xmlRecoverDoc (xmlChar *cur); +xmlDocPtr xmlRecoverMemory (char *buffer, + int size); +xmlDocPtr xmlRecoverFile (const char *filename); + +/** + * Less common routines and SAX interfaces + */ +int xmlParseDocument (xmlParserCtxtPtr ctxt); +int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); +xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax, + xmlChar *cur, + int recovery); +int xmlSAXUserParseFile (xmlSAXHandlerPtr sax, + void *user_data, + const char *filename); +int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, + void *user_data, + char *buffer, + int size); +xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax, + char *buffer, + int size, + int recovery); +xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax, + const char *filename, + int recovery); +xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax, + const char *filename); +xmlDocPtr xmlParseEntity (const char *filename); +xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax, + xmlParserInputBufferPtr input, + xmlCharEncoding enc); +int xmlParseBalancedChunkMemory(xmlDocPtr doc, + xmlSAXHandlerPtr sax, + void *user_data, + int depth, + const xmlChar *string, + xmlNodePtr *list); +int xmlParseExternalEntity (xmlDocPtr doc, + xmlSAXHandlerPtr sax, + void *user_data, + int depth, + const xmlChar *URL, + const xmlChar *ID, + xmlNodePtr *list); +int xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, + const xmlChar *URL, + const xmlChar *ID, + xmlNodePtr *list); + +/** + * SAX initialization routines + */ +void xmlDefaultSAXHandlerInit(void); +void htmlDefaultSAXHandlerInit(void); + +/** + * Parser contexts handling. + */ +void xmlInitParserCtxt (xmlParserCtxtPtr ctxt); +void xmlClearParserCtxt (xmlParserCtxtPtr ctxt); +void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); +void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, + const xmlChar* buffer, + const char* filename); +xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); + +/** + * Reading/setting optional parsing features. + */ + +int xmlGetFeaturesList (int *len, + const char **result); +int xmlGetFeature (xmlParserCtxtPtr ctxt, + const char *name, + void *result); +int xmlSetFeature (xmlParserCtxtPtr ctxt, + const char *name, + void *value); + +/** + * Interfaces for the Push mode + */ +xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename); +int xmlParseChunk (xmlParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); + +/** + * Special I/O mode + */ + +xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, + void *user_data, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + xmlCharEncoding enc); + +xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt, + xmlParserInputBufferPtr input, + xmlCharEncoding enc); + +/** + * Node infos + */ +const xmlParserNodeInfo* + xmlParserFindNodeInfo (const xmlParserCtxt* ctxt, + const xmlNode* node); +void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); +void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); +unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq, + const xmlNode* node); +void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, + const xmlParserNodeInfo* info); + +/* + * External entities handling actually implemented in xmlIO + */ + +void xmlSetExternalEntityLoader(xmlExternalEntityLoader f); +xmlExternalEntityLoader + xmlGetExternalEntityLoader(void); +xmlParserInputPtr + xmlLoadExternalEntity (const char *URL, + const char *ID, + xmlParserCtxtPtr context); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_PARSER_H__ */ + diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h new file mode 100644 index 00000000..3fdb8f6e --- /dev/null +++ b/include/libxml/parserInternals.h @@ -0,0 +1,314 @@ +/* + * parserInternals.h : internals routines exported by the parser. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + * + * 14 Nov 2000 ht - truncated declaration of xmlParseElementChildrenContentDecl + * for VMS + * + */ + +#ifndef __XML_PARSER_INTERNALS_H__ +#define __XML_PARSER_INTERNALS_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + /* + * Identifiers can be longer, but this will be more costly + * at runtime. + */ +#define XML_MAX_NAMELEN 100 + +/* + * The parser tries to always have that amount of input ready + * one of the point is providing context when reporting errors + */ +#define INPUT_CHUNK 250 + +/************************************************************************ + * * + * UNICODE version of the macros. * + * * + ************************************************************************/ +/* + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ +#define IS_CHAR(c) \ + ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \ + ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \ + (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \ + (((c) >= 0x10000) && ((c) <= 0x10FFFF))) + +/* + * [3] S ::= (#x20 | #x9 | #xD | #xA)+ + */ +#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \ + ((c) == 0x0D)) + +/* + * [85] BaseChar ::= ... long list see REC ... + */ +#define IS_BASECHAR(c) xmlIsBaseChar(c) + +/* + * [88] Digit ::= ... long list see REC ... + */ +#define IS_DIGIT(c) xmlIsDigit(c) + +/* + * [87] CombiningChar ::= ... long list see REC ... + */ +#define IS_COMBINING(c) xmlIsCombining(c) + +/* + * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | + * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | + * [#x309D-#x309E] | [#x30FC-#x30FE] + */ +#define IS_EXTENDER(c) xmlIsExtender(c) + +/* + * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] + */ +#define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c) + +/* + * [84] Letter ::= BaseChar | Ideographic + */ +#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) + + +/* + * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + */ +#define IS_PUBIDCHAR(c) xmlIsPubidChar(c) + +#define SKIP_EOL(p) \ + if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ + if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } + +#define MOVETO_ENDTAG(p) \ + while ((*p) && (*(p) != '>')) (p)++ + +#define MOVETO_STARTTAG(p) \ + while ((*p) && (*(p) != '<')) (p)++ + +/** + * Global vaiables affecting the default parser behaviour. + */ + +LIBXML_DLL_IMPORT extern int xmlParserDebugEntities; +LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue; +LIBXML_DLL_IMPORT extern int xmlParserDebugEntities; +LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue; +LIBXML_DLL_IMPORT extern int xmlDoValidityCheckingDefaultValue; +LIBXML_DLL_IMPORT extern int xmlLoadExtDtdDefaultValue; +LIBXML_DLL_IMPORT extern int xmlPedanticParserDefaultValue; +LIBXML_DLL_IMPORT extern int xmlKeepBlanksDefaultValue; +LIBXML_DLL_IMPORT extern xmlChar xmlStringText[]; +LIBXML_DLL_IMPORT extern xmlChar xmlStringTextNoenc[]; +LIBXML_DLL_IMPORT extern xmlChar xmlStringComment[]; + +/* + * Function to finish teh work of the macros where needed + */ +int xmlIsBaseChar (int c); +int xmlIsBlank (int c); +int xmlIsPubidChar (int c); +int xmlIsLetter (int c); +int xmlIsDigit (int c); +int xmlIsIdeographic(int c); +int xmlIsCombining (int c); +int xmlIsExtender (int c); +int xmlIsCombining (int c); +int xmlIsChar (int c); + +/** + * Parser context + */ +xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); +xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename); +xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer, + int size); +xmlParserCtxtPtr xmlNewParserCtxt (void); +xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL, + const xmlChar *ID, + const xmlChar *base); +int xmlSwitchEncoding (xmlParserCtxtPtr ctxt, + xmlCharEncoding enc); +int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt, + xmlCharEncodingHandlerPtr handler); +void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); + +/** + * Entities + */ +void xmlHandleEntity (xmlParserCtxtPtr ctxt, + xmlEntityPtr entity); + +/** + * Input Streams + */ +xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt, + xmlEntityPtr entity); +void xmlPushInput (xmlParserCtxtPtr ctxt, + xmlParserInputPtr input); +xmlChar xmlPopInput (xmlParserCtxtPtr ctxt); +void xmlFreeInputStream (xmlParserInputPtr input); +xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt, + const char *filename); +xmlParserInputPtr xmlNewInputStream (xmlParserCtxtPtr ctxt); + +/** + * Namespaces. + */ +xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt, + const xmlChar *name, + xmlChar **prefix); +xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt); +xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt, + xmlChar **prefix); +xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt); +void xmlParseNamespace (xmlParserCtxtPtr ctxt); + +/** + * Generic production rules + */ +xmlChar * xmlScanName (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseName (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt, + xmlChar **orig); +xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt); +xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt); +void xmlParseCharData (xmlParserCtxtPtr ctxt, + int cdata); +xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt, + xmlChar **publicID, + int strict); +void xmlParseComment (xmlParserCtxtPtr ctxt); +xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt); +void xmlParsePI (xmlParserCtxtPtr ctxt); +void xmlParseNotationDecl (xmlParserCtxtPtr ctxt); +void xmlParseEntityDecl (xmlParserCtxtPtr ctxt); +int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt, + xmlChar **value); +xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt); +xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt); +int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt, + xmlEnumerationPtr *tree); +int xmlParseAttributeType (xmlParserCtxtPtr ctxt, + xmlEnumerationPtr *tree); +void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt); +xmlElementContentPtr xmlParseElementMixedContentDecl + (xmlParserCtxtPtr ctxt); +#ifdef VMS +xmlElementContentPtr xmlParseElementChildrenContentD + (xmlParserCtxtPtr ctxt); +#define xmlParseElementChildrenContentDecl xmlParseElementChildrenContentD +#else +xmlElementContentPtr xmlParseElementChildrenContentDecl + (xmlParserCtxtPtr ctxt); +#endif +int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, + xmlChar *name, + xmlElementContentPtr *result); +int xmlParseElementDecl (xmlParserCtxtPtr ctxt); +void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt); +int xmlParseCharRef (xmlParserCtxtPtr ctxt); +xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt); +void xmlParseReference (xmlParserCtxtPtr ctxt); +void xmlParsePEReference (xmlParserCtxtPtr ctxt); +void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt, + xmlChar **value); +xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt); +void xmlParseEndTag (xmlParserCtxtPtr ctxt); +void xmlParseCDSect (xmlParserCtxtPtr ctxt); +void xmlParseContent (xmlParserCtxtPtr ctxt); +void xmlParseElement (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt); +xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); +int xmlParseSDDecl (xmlParserCtxtPtr ctxt); +void xmlParseXMLDecl (xmlParserCtxtPtr ctxt); +void xmlParseTextDecl (xmlParserCtxtPtr ctxt); +void xmlParseMisc (xmlParserCtxtPtr ctxt); +void xmlParseExternalSubset (xmlParserCtxtPtr ctxt, + const xmlChar *ExternalID, + const xmlChar *SystemID); +/* + * Entities substitution + */ +#define XML_SUBSTITUTE_NONE 0 +#define XML_SUBSTITUTE_REF 1 +#define XML_SUBSTITUTE_PEREF 2 +#define XML_SUBSTITUTE_BOTH 3 + +xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt, + int len, + int what, + xmlChar end, + xmlChar end2, + xmlChar end3); +xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt, + const xmlChar *str, + int what, + xmlChar end, + xmlChar end2, + xmlChar end3); + +/* + * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP + */ +int nodePush (xmlParserCtxtPtr ctxt, + xmlNodePtr value); +xmlNodePtr nodePop (xmlParserCtxtPtr ctxt); +int inputPush (xmlParserCtxtPtr ctxt, + xmlParserInputPtr value); +xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt); + +/* + * other comodities shared between parser.c and parserInternals + */ +int xmlSkipBlankChars (xmlParserCtxtPtr ctxt); +int xmlStringCurrentChar (xmlParserCtxtPtr ctxt, + const xmlChar *cur, + int *len); +void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); +void xmlParserHandleReference(xmlParserCtxtPtr ctxt); +xmlChar *namePop (xmlParserCtxtPtr ctxt); +int xmlCheckLanguageID (const xmlChar *lang); + +/* + * Really core function shared with HTML parser + */ +int xmlCurrentChar (xmlParserCtxtPtr ctxt, + int *len); +int xmlCopyChar (int len, + xmlChar *out, + int val); +void xmlNextChar (xmlParserCtxtPtr ctxt); +void xmlParserInputShrink (xmlParserInputPtr in); + +#ifdef LIBXML_HTML_ENABLED +/* + * Actually comes from the HTML parser but launched from the init stuff + */ +void htmlInitAutoClose (void); +#endif +#ifdef __cplusplus +} +#endif +#endif /* __XML_PARSER_INTERNALS_H__ */ diff --git a/include/libxml/tree.h b/include/libxml/tree.h new file mode 100644 index 00000000..648817d0 --- /dev/null +++ b/include/libxml/tree.h @@ -0,0 +1,701 @@ +/* + * tree.h : describes the structures found in an tree resulting + * from an XML parsing. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + * + * 14 Nov 2000 ht - added redefinition of xmlBufferWriteChar for VMS + * + */ + +#ifndef __XML_TREE_H__ +#define __XML_TREE_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define XML_XML_NAMESPACE \ + (const xmlChar *) "http://www.w3.org/XML/1998/namespace" + +/* + * The different element types carried by an XML tree + * + * NOTE: This is synchronized with DOM Level1 values + * See http://www.w3.org/TR/REC-DOM-Level-1/ + * + * Actually this had diverged a bit, and now XML_DOCUMENT_TYPE_NODE should + * be deprecated to use an XML_DTD_NODE. + */ +typedef enum { + XML_ELEMENT_NODE= 1, + XML_ATTRIBUTE_NODE= 2, + XML_TEXT_NODE= 3, + XML_CDATA_SECTION_NODE= 4, + XML_ENTITY_REF_NODE= 5, + XML_ENTITY_NODE= 6, + XML_PI_NODE= 7, + XML_COMMENT_NODE= 8, + XML_DOCUMENT_NODE= 9, + XML_DOCUMENT_TYPE_NODE= 10, + XML_DOCUMENT_FRAG_NODE= 11, + XML_NOTATION_NODE= 12, + XML_HTML_DOCUMENT_NODE= 13, + XML_DTD_NODE= 14, + XML_ELEMENT_DECL= 15, + XML_ATTRIBUTE_DECL= 16, + XML_ENTITY_DECL= 17, + XML_NAMESPACE_DECL= 18, + XML_XINCLUDE_START= 19, + XML_XINCLUDE_END= 20 +#ifdef LIBXML_SGML_ENABLED + ,XML_SGML_DOCUMENT_NODE= 21 +#endif +} xmlElementType; + +/* + * Size of an internal character representation. + * + * We use 8bit chars internal representation for memory efficiency, + * Note that with 8 bits wide xmlChars one can still use UTF-8 to handle + * correctly non ISO-Latin input. + */ + +typedef unsigned char xmlChar; + +#ifndef WIN32 +#ifndef CHAR +#define CHAR xmlChar +#endif +#endif + +#define BAD_CAST (xmlChar *) + +/* + * a DTD Notation definition + */ + +typedef struct _xmlNotation xmlNotation; +typedef xmlNotation *xmlNotationPtr; +struct _xmlNotation { + const xmlChar *name; /* Notation name */ + const xmlChar *PublicID; /* Public identifier, if any */ + const xmlChar *SystemID; /* System identifier, if any */ +}; + +/* + * a DTD Attribute definition + */ + +typedef enum { + XML_ATTRIBUTE_CDATA = 1, + XML_ATTRIBUTE_ID, + XML_ATTRIBUTE_IDREF , + XML_ATTRIBUTE_IDREFS, + XML_ATTRIBUTE_ENTITY, + XML_ATTRIBUTE_ENTITIES, + XML_ATTRIBUTE_NMTOKEN, + XML_ATTRIBUTE_NMTOKENS, + XML_ATTRIBUTE_ENUMERATION, + XML_ATTRIBUTE_NOTATION +} xmlAttributeType; + +typedef enum { + XML_ATTRIBUTE_NONE = 1, + XML_ATTRIBUTE_REQUIRED, + XML_ATTRIBUTE_IMPLIED, + XML_ATTRIBUTE_FIXED +} xmlAttributeDefault; + +typedef struct _xmlEnumeration xmlEnumeration; +typedef xmlEnumeration *xmlEnumerationPtr; +struct _xmlEnumeration { + struct _xmlEnumeration *next; /* next one */ + const xmlChar *name; /* Enumeration name */ +}; + +typedef struct _xmlAttribute xmlAttribute; +typedef xmlAttribute *xmlAttributePtr; +struct _xmlAttribute { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */ + const xmlChar *name; /* Attribute name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + struct _xmlAttribute *nexth; /* next in hash table */ + xmlAttributeType atype; /* The attribute type */ + xmlAttributeDefault def; /* the default */ + const xmlChar *defaultValue; /* or the default value */ + xmlEnumerationPtr tree; /* or the enumeration tree if any */ + const xmlChar *prefix; /* the namespace prefix if any */ + const xmlChar *elem; /* Element holding the attribute */ +}; + +/* + * a DTD Element definition. + */ +typedef enum { + XML_ELEMENT_CONTENT_PCDATA = 1, + XML_ELEMENT_CONTENT_ELEMENT, + XML_ELEMENT_CONTENT_SEQ, + XML_ELEMENT_CONTENT_OR +} xmlElementContentType; + +typedef enum { + XML_ELEMENT_CONTENT_ONCE = 1, + XML_ELEMENT_CONTENT_OPT, + XML_ELEMENT_CONTENT_MULT, + XML_ELEMENT_CONTENT_PLUS +} xmlElementContentOccur; + +typedef struct _xmlElementContent xmlElementContent; +typedef xmlElementContent *xmlElementContentPtr; +struct _xmlElementContent { + xmlElementContentType type; /* PCDATA, ELEMENT, SEQ or OR */ + xmlElementContentOccur ocur; /* ONCE, OPT, MULT or PLUS */ + const xmlChar *name; /* Element name */ + struct _xmlElementContent *c1; /* first child */ + struct _xmlElementContent *c2; /* second child */ +}; + +typedef enum { + XML_ELEMENT_TYPE_EMPTY = 1, + XML_ELEMENT_TYPE_ANY, + XML_ELEMENT_TYPE_MIXED, + XML_ELEMENT_TYPE_ELEMENT +} xmlElementTypeVal; + +typedef struct _xmlElement xmlElement; +typedef xmlElement *xmlElementPtr; +struct _xmlElement { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_ELEMENT_DECL, must be second ! */ + const xmlChar *name; /* Element name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + xmlElementTypeVal etype; /* The type */ + xmlElementContentPtr content; /* the allowed element content */ + xmlAttributePtr attributes; /* List of the declared attributes */ + const xmlChar *prefix; /* the namespace prefix if any */ +}; + +/* + * An XML namespace. + * Note that prefix == NULL is valid, it defines the default namespace + * within the subtree (until overriden). + * + * XML_GLOBAL_NAMESPACE is now deprecated for good + * xmlNsType is unified with xmlElementType + */ + +#define XML_LOCAL_NAMESPACE XML_NAMESPACE_DECL +typedef xmlElementType xmlNsType; + +typedef struct _xmlNs xmlNs; +typedef xmlNs *xmlNsPtr; +struct _xmlNs { + struct _xmlNs *next; /* next Ns link for this node */ + xmlNsType type; /* global or local */ + const xmlChar *href; /* URL for the namespace */ + const xmlChar *prefix; /* prefix for the namespace */ +}; + +/* + * An XML DtD, as defined by parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + /* End of common part */ + void *notations; /* Hash table for notations if any */ + void *elements; /* Hash table for elements if any */ + void *attributes; /* Hash table for attributes if any */ + void *entities; /* Hash table for entities if any */ + const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */ + const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */ + void *pentities; /* Hash table for param entities if any */ +}; + +/* + * A attribute of an XML node. + */ +typedef struct _xmlAttr xmlAttr; +typedef xmlAttr *xmlAttrPtr; +struct _xmlAttr { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_ATTRIBUTE_NODE, must be second ! */ + const xmlChar *name; /* the name of the property */ + struct _xmlNode *children; /* the value of the property */ + struct _xmlNode *last; /* NULL */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlAttr *next; /* next sibling link */ + struct _xmlAttr *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + xmlNs *ns; /* pointer to the associated namespace */ + xmlAttributeType atype; /* the attribute type if validating */ +}; + +/* + * An XML ID instance. + */ + +typedef struct _xmlID xmlID; +typedef xmlID *xmlIDPtr; +struct _xmlID { + struct _xmlID *next; /* next ID */ + const xmlChar *value; /* The ID name */ + xmlAttrPtr attr; /* The attribut holding it */ +}; + +/* + * An XML IDREF instance. + */ + +typedef struct _xmlRef xmlRef; +typedef xmlRef *xmlRefPtr; +struct _xmlRef { + struct _xmlRef *next; /* next Ref */ + const xmlChar *value; /* The Ref name */ + xmlAttrPtr attr; /* The attribut holding it */ +}; + +/* + * A buffer structure + */ + +typedef enum { + XML_BUFFER_ALLOC_DOUBLEIT, + XML_BUFFER_ALLOC_EXACT +} xmlBufferAllocationScheme; + +typedef struct _xmlBuffer xmlBuffer; +typedef xmlBuffer *xmlBufferPtr; +struct _xmlBuffer { + xmlChar *content; /* The buffer content UTF8 */ + unsigned int use; /* The buffer size used */ + unsigned int size; /* The buffer size */ + xmlBufferAllocationScheme alloc; /* The realloc method */ +}; + +/* + * A node in an XML tree. + */ +typedef struct _xmlNode xmlNode; +typedef xmlNode *xmlNodePtr; +struct _xmlNode { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* type number, must be second ! */ + const xmlChar *name; /* the name of the node, or the entity */ + struct _xmlNode *children; /* parent->childs link */ + struct _xmlNode *last; /* last child link */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + xmlNs *ns; /* pointer to the associated namespace */ +#ifndef XML_USE_BUFFER_CONTENT + xmlChar *content; /* the content */ +#else + xmlBufferPtr content; /* the content in a buffer */ +#endif + + /* End of common part */ + struct _xmlAttr *properties;/* properties list */ + xmlNs *nsDef; /* namespace definitions on this node */ +}; + +/* + * An XML document. + */ +typedef struct _xmlDoc xmlDoc; +typedef xmlDoc *xmlDocPtr; +struct _xmlDoc { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */ + char *name; /* name/filename/URI of the document */ + struct _xmlNode *children; /* the document tree */ + struct _xmlNode *last; /* last child link */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* autoreference to itself */ + + /* End of common part */ + int compression;/* level of zlib compression */ + int standalone; /* standalone document (no external refs) */ + struct _xmlDtd *intSubset; /* the document internal subset */ + struct _xmlDtd *extSubset; /* the document external subset */ + struct _xmlNs *oldNs; /* Global namespace, the old way */ + const xmlChar *version; /* the XML version string */ + const xmlChar *encoding; /* external initial encoding, if any */ + void *ids; /* Hash table for ID attributes if any */ + void *refs; /* Hash table for IDREFs attributes if any */ + const xmlChar *URL; /* The URI for that document */ + int charset; /* encoding of the in-memory content + actually an xmlCharEncoding */ +}; + +/* + * Compatibility naming layer with libxml1 + */ +#ifndef xmlChildrenNode +#define xmlChildrenNode children +#define xmlRootNode children +#endif + +/* + * Variables. + */ +LIBXML_DLL_IMPORT extern xmlNsPtr baseDTD; +LIBXML_DLL_IMPORT extern int oldXMLWDcompatibility;/* maintain compatibility with old WD */ +LIBXML_DLL_IMPORT extern int xmlIndentTreeOutput; /* try to indent the tree dumps */ +LIBXML_DLL_IMPORT extern xmlBufferAllocationScheme xmlBufferAllocScheme; /* alloc scheme to use */ +LIBXML_DLL_IMPORT extern int xmlSaveNoEmptyTags; /* save empty tags as */ + +/* + * Handling Buffers. + */ + +xmlBufferPtr xmlBufferCreate (void); +xmlBufferPtr xmlBufferCreateSize (size_t size); +void xmlBufferFree (xmlBufferPtr buf); +int xmlBufferDump (FILE *file, + xmlBufferPtr buf); +void xmlBufferAdd (xmlBufferPtr buf, + const xmlChar *str, + int len); +void xmlBufferAddHead (xmlBufferPtr buf, + const xmlChar *str, + int len); +void xmlBufferCat (xmlBufferPtr buf, + const xmlChar *str); +void xmlBufferCCat (xmlBufferPtr buf, + const char *str); +int xmlBufferShrink (xmlBufferPtr buf, + unsigned int len); +int xmlBufferGrow (xmlBufferPtr buf, + unsigned int len); +void xmlBufferEmpty (xmlBufferPtr buf); +const xmlChar* xmlBufferContent (const xmlBufferPtr buf); +int xmlBufferUse (const xmlBufferPtr buf); +void xmlBufferSetAllocationScheme(xmlBufferPtr buf, + xmlBufferAllocationScheme scheme); +int xmlBufferLength (const xmlBufferPtr buf); + +/* + * Creating/freeing new structures + */ +xmlDtdPtr xmlCreateIntSubset (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlNewDtd (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc); +void xmlFreeDtd (xmlDtdPtr cur); +xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc, + const xmlChar *href, + const xmlChar *prefix); +xmlNsPtr xmlNewNs (xmlNodePtr node, + const xmlChar *href, + const xmlChar *prefix); +void xmlFreeNs (xmlNsPtr cur); +xmlDocPtr xmlNewDoc (const xmlChar *version); +void xmlFreeDoc (xmlDocPtr cur); +xmlAttrPtr xmlNewDocProp (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *value); +xmlAttrPtr xmlNewProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); +xmlAttrPtr xmlNewNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *value); +void xmlFreePropList (xmlAttrPtr cur); +void xmlFreeProp (xmlAttrPtr cur); +xmlAttrPtr xmlCopyProp (xmlNodePtr target, + xmlAttrPtr cur); +xmlAttrPtr xmlCopyPropList (xmlNodePtr target, + xmlAttrPtr cur); +xmlDtdPtr xmlCopyDtd (xmlDtdPtr dtd); +xmlDocPtr xmlCopyDoc (xmlDocPtr doc, + int recursive); + +/* + * Creating new nodes + */ +xmlNodePtr xmlNewDocNode (xmlDocPtr doc, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocRawNode (xmlDocPtr doc, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewNode (xmlNsPtr ns, + const xmlChar *name); +xmlNodePtr xmlNewChild (xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewTextChild (xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocText (xmlDocPtr doc, + const xmlChar *content); +xmlNodePtr xmlNewText (const xmlChar *content); +xmlNodePtr xmlNewPI (const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocTextLen (xmlDocPtr doc, + const xmlChar *content, + int len); +xmlNodePtr xmlNewTextLen (const xmlChar *content, + int len); +xmlNodePtr xmlNewDocComment (xmlDocPtr doc, + const xmlChar *content); +xmlNodePtr xmlNewComment (const xmlChar *content); +xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc, + const xmlChar *content, + int len); +xmlNodePtr xmlNewCharRef (xmlDocPtr doc, + const xmlChar *name); +xmlNodePtr xmlNewReference (xmlDocPtr doc, + const xmlChar *name); +xmlNodePtr xmlCopyNode (xmlNodePtr node, + int recursive); +xmlNodePtr xmlCopyNodeList (xmlNodePtr node); +xmlNodePtr xmlNewDocFragment (xmlDocPtr doc); + +/* + * Navigating + */ +xmlNodePtr xmlDocGetRootElement (xmlDocPtr doc); +xmlNodePtr xmlGetLastChild (xmlNodePtr parent); +int xmlNodeIsText (xmlNodePtr node); +int xmlIsBlankNode (xmlNodePtr node); + +/* + * Changing the structure + */ +xmlNodePtr xmlDocSetRootElement (xmlDocPtr doc, + xmlNodePtr root); +void xmlNodeSetName (xmlNodePtr cur, + const xmlChar *name); +xmlNodePtr xmlAddChild (xmlNodePtr parent, + xmlNodePtr cur); +xmlNodePtr xmlAddChildList (xmlNodePtr parent, + xmlNodePtr cur); +xmlNodePtr xmlReplaceNode (xmlNodePtr old, + xmlNodePtr cur); +xmlNodePtr xmlAddSibling (xmlNodePtr cur, + xmlNodePtr elem); +xmlNodePtr xmlAddPrevSibling (xmlNodePtr cur, + xmlNodePtr elem); +xmlNodePtr xmlAddNextSibling (xmlNodePtr cur, + xmlNodePtr elem); +void xmlUnlinkNode (xmlNodePtr cur); +xmlNodePtr xmlTextMerge (xmlNodePtr first, + xmlNodePtr second); +void xmlTextConcat (xmlNodePtr node, + const xmlChar *content, + int len); +void xmlFreeNodeList (xmlNodePtr cur); +void xmlFreeNode (xmlNodePtr cur); +void xmlSetTreeDoc (xmlNodePtr tree, + xmlDocPtr doc); +void xmlSetListDoc (xmlNodePtr list, + xmlDocPtr doc); + +/* + * Namespaces + */ +xmlNsPtr xmlSearchNs (xmlDocPtr doc, + xmlNodePtr node, + const xmlChar *nameSpace); +xmlNsPtr xmlSearchNsByHref (xmlDocPtr doc, + xmlNodePtr node, + const xmlChar *href); +xmlNsPtr * xmlGetNsList (xmlDocPtr doc, + xmlNodePtr node); +void xmlSetNs (xmlNodePtr node, + xmlNsPtr ns); +xmlNsPtr xmlCopyNamespace (xmlNsPtr cur); +xmlNsPtr xmlCopyNamespaceList (xmlNsPtr cur); + +/* + * Changing the content. + */ +xmlAttrPtr xmlSetProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); +xmlChar * xmlGetProp (xmlNodePtr node, + const xmlChar *name); +xmlAttrPtr xmlHasProp (xmlNodePtr node, + const xmlChar *name); +xmlAttrPtr xmlSetNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *value); +xmlChar * xmlGetNsProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *nameSpace); +xmlNodePtr xmlStringGetNodeList (xmlDocPtr doc, + const xmlChar *value); +xmlNodePtr xmlStringLenGetNodeList (xmlDocPtr doc, + const xmlChar *value, + int len); +xmlChar * xmlNodeListGetString (xmlDocPtr doc, + xmlNodePtr list, + int inLine); +xmlChar * xmlNodeListGetRawString (xmlDocPtr doc, + xmlNodePtr list, + int inLine); +void xmlNodeSetContent (xmlNodePtr cur, + const xmlChar *content); +void xmlNodeSetContentLen (xmlNodePtr cur, + const xmlChar *content, + int len); +void xmlNodeAddContent (xmlNodePtr cur, + const xmlChar *content); +void xmlNodeAddContentLen (xmlNodePtr cur, + const xmlChar *content, + int len); +xmlChar * xmlNodeGetContent (xmlNodePtr cur); +xmlChar * xmlNodeGetLang (xmlNodePtr cur); +void xmlNodeSetLang (xmlNodePtr cur, + const xmlChar *lang); +int xmlNodeGetSpacePreserve (xmlNodePtr cur); +void xmlNodeSetSpacePreserve (xmlNodePtr cur, int + val); +xmlChar * xmlNodeGetBase (xmlDocPtr doc, + xmlNodePtr cur); +void xmlNodeSetBase (xmlNodePtr cur, + xmlChar *uri); + +/* + * Removing content. + */ +int xmlRemoveProp (xmlAttrPtr attr); +int xmlRemoveNode (xmlNodePtr node); /* TODO */ + +/* + * Internal, don't use + */ +#ifdef VMS +void xmlBufferWriteXmlCHAR (xmlBufferPtr buf, + const xmlChar *string); +#define xmlBufferWriteCHAR xmlBufferWriteXmlCHAR +#else +void xmlBufferWriteCHAR (xmlBufferPtr buf, + const xmlChar *string); +#endif +void xmlBufferWriteChar (xmlBufferPtr buf, + const char *string); +void xmlBufferWriteQuotedString(xmlBufferPtr buf, + const xmlChar *string); + +/* + * Namespace handling + */ +int xmlReconciliateNs (xmlDocPtr doc, + xmlNodePtr tree); + +/* + * Saving + */ +void xmlDocDumpFormatMemory (xmlDocPtr cur, + xmlChar**mem, + int *size, + int format); +void xmlDocDumpMemory (xmlDocPtr cur, + xmlChar**mem, + int *size); +void xmlDocDumpMemoryEnc (xmlDocPtr out_doc, + xmlChar **doc_txt_ptr, + int * doc_txt_len, + const char *txt_encoding); +void xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, + xmlChar **doc_txt_ptr, + int * doc_txt_len, + const char *txt_encoding, + int format); +int xmlDocDump (FILE *f, + xmlDocPtr cur); +void xmlElemDump (FILE *f, + xmlDocPtr doc, + xmlNodePtr cur); +int xmlSaveFile (const char *filename, + xmlDocPtr cur); +void xmlNodeDump (xmlBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format); + +/* This one is exported from xmlIO.h + +int xmlSaveFileTo (xmlOutputBuffer *buf, + xmlDocPtr cur, + const char *encoding); + */ + +int xmlSaveFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding); + +/* + * Compression + */ +int xmlGetDocCompressMode (xmlDocPtr doc); +void xmlSetDocCompressMode (xmlDocPtr doc, + int mode); +int xmlGetCompressMode (void); +void xmlSetCompressMode (int mode); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_TREE_H__ */ + diff --git a/include/libxml/uri.h b/include/libxml/uri.h new file mode 100644 index 00000000..e7aeda41 --- /dev/null +++ b/include/libxml/uri.h @@ -0,0 +1,61 @@ +/** + * uri.c: library of generic URI related routines + * + * Reference: RFC 2396 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_URI_H__ +#define __XML_URI_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * + */ +typedef struct _xmlURI xmlURI; +typedef xmlURI *xmlURIPtr; +struct _xmlURI { + char *scheme; + char *opaque; + char *authority; + char *server; + char *user; + int port; + char *path; + char *query; + char *fragment; +}; + +/* + * This function is in tree.h: + * xmlChar * xmlNodeGetBase (xmlDocPtr doc, + * xmlNodePtr cur); + */ +xmlURIPtr xmlCreateURI (void); +xmlChar * xmlBuildURI (const xmlChar *URI, + const xmlChar *base); +xmlURIPtr xmlParseURI (const char *URI); +int xmlParseURIReference (xmlURIPtr uri, + const char *str); +xmlChar * xmlSaveUri (xmlURIPtr uri); +void xmlPrintURI (FILE *stream, + xmlURIPtr uri); +char * xmlURIUnescapeString (const char *str, + int len, + char *target); +int xmlNormalizeURIPath (char *path); +xmlChar * xmlURIEscape (const xmlChar *str); +void xmlFreeURI (xmlURIPtr uri); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_URI_H__ */ diff --git a/include/libxml/valid.h b/include/libxml/valid.h new file mode 100644 index 00000000..a7eb675d --- /dev/null +++ b/include/libxml/valid.h @@ -0,0 +1,236 @@ +/* + * valid.h : interface to the DTD handling and the validity checking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef __XML_VALID_H__ +#define __XML_VALID_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * an xmlValidCtxt is used for error reporting when validating + */ + +typedef void (*xmlValidityErrorFunc) (void *ctx, const char *msg, ...); +typedef void (*xmlValidityWarningFunc) (void *ctx, const char *msg, ...); + +typedef struct _xmlValidCtxt xmlValidCtxt; +typedef xmlValidCtxt *xmlValidCtxtPtr; +struct _xmlValidCtxt { + void *userData; /* user specific data block */ + xmlValidityErrorFunc error; /* the callback in case of errors */ + xmlValidityWarningFunc warning; /* the callback in case of warning */ + + /* Node analysis stack used when validating within entities */ + xmlNodePtr node; /* Current parsed Node */ + int nodeNr; /* Depth of the parsing stack */ + int nodeMax; /* Max depth of the parsing stack */ + xmlNodePtr *nodeTab; /* array of nodes */ + + int finishDtd; /* finished validating the Dtd ? */ + xmlDocPtr doc; /* the document */ + int valid; /* temporary validity check result */ +}; + +/* + * ALl notation declarations are stored in a table + * there is one table per DTD + */ + +typedef struct _xmlHashTable xmlNotationTable; +typedef xmlNotationTable *xmlNotationTablePtr; + +/* + * ALl element declarations are stored in a table + * there is one table per DTD + */ + +typedef struct _xmlHashTable xmlElementTable; +typedef xmlElementTable *xmlElementTablePtr; + +/* + * ALl attribute declarations are stored in a table + * there is one table per DTD + */ + +typedef struct _xmlHashTable xmlAttributeTable; +typedef xmlAttributeTable *xmlAttributeTablePtr; + +/* + * ALl IDs attributes are stored in a table + * there is one table per document + */ + +typedef struct _xmlHashTable xmlIDTable; +typedef xmlIDTable *xmlIDTablePtr; + +/* + * ALl Refs attributes are stored in a table + * there is one table per document + */ + +typedef struct _xmlHashTable xmlRefTable; +typedef xmlRefTable *xmlRefTablePtr; + +/* helper */ +xmlChar * xmlSplitQName2 (const xmlChar *name, + xmlChar **prefix); + +/* Notation */ +xmlNotationPtr xmlAddNotationDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *name, + const xmlChar *PublicID, + const xmlChar *SystemID); +xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table); +void xmlFreeNotationTable(xmlNotationTablePtr table); +void xmlDumpNotationDecl (xmlBufferPtr buf, + xmlNotationPtr nota); +void xmlDumpNotationTable(xmlBufferPtr buf, + xmlNotationTablePtr table); + +/* Element Content */ +xmlElementContentPtr xmlNewElementContent (xmlChar *name, + xmlElementContentType type); +xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content); +void xmlFreeElementContent(xmlElementContentPtr cur); +void xmlSprintfElementContent(char *buf, + xmlElementContentPtr content, + int glob); + +/* Element */ +xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *name, + xmlElementTypeVal type, + xmlElementContentPtr content); +xmlElementTablePtr xmlCopyElementTable (xmlElementTablePtr table); +void xmlFreeElementTable (xmlElementTablePtr table); +void xmlDumpElementTable (xmlBufferPtr buf, + xmlElementTablePtr table); +void xmlDumpElementDecl (xmlBufferPtr buf, + xmlElementPtr elem); + +/* Enumeration */ +xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name); +void xmlFreeEnumeration (xmlEnumerationPtr cur); +xmlEnumerationPtr xmlCopyEnumeration (xmlEnumerationPtr cur); + +/* Attribute */ +xmlAttributePtr xmlAddAttributeDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name, + const xmlChar *ns, + xmlAttributeType type, + xmlAttributeDefault def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +xmlAttributeTablePtr xmlCopyAttributeTable (xmlAttributeTablePtr table); +void xmlFreeAttributeTable (xmlAttributeTablePtr table); +void xmlDumpAttributeTable (xmlBufferPtr buf, + xmlAttributeTablePtr table); +void xmlDumpAttributeDecl (xmlBufferPtr buf, + xmlAttributePtr attr); + +/* IDs */ +xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *value, + xmlAttrPtr attr); +xmlIDTablePtr xmlCopyIDTable (xmlIDTablePtr table); +void xmlFreeIDTable (xmlIDTablePtr table); +xmlAttrPtr xmlGetID (xmlDocPtr doc, + const xmlChar *ID); +int xmlIsID (xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr); +int xmlRemoveID (xmlDocPtr doc, xmlAttrPtr attr); + +/* IDREFs */ +xmlRefPtr xmlAddRef (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *value, + xmlAttrPtr attr); +xmlRefTablePtr xmlCopyRefTable (xmlRefTablePtr table); +void xmlFreeRefTable (xmlRefTablePtr table); +int xmlIsRef (xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr); +int xmlRemoveRef (xmlDocPtr doc, xmlAttrPtr attr); + +/** + * The public function calls related to validity checking + */ + +int xmlValidateRoot (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateElementDecl (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlElementPtr elem); +xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *name, + const xmlChar *value); +int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlAttributePtr attr); +int xmlValidateAttributeValue(xmlAttributeType type, + const xmlChar *value); +int xmlValidateNotationDecl (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNotationPtr nota); +int xmlValidateDtd (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlDtdPtr dtd); +int xmlValidateDtdFinal (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateDocument (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +int xmlValidateOneElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +int xmlValidateOneAttribute (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr, + const xmlChar *value); +int xmlValidateDocumentFinal(xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateNotationUse (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *notationName); +int xmlIsMixedElement (xmlDocPtr doc, + const xmlChar *name); +xmlAttributePtr xmlGetDtdAttrDesc (xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name); +xmlNotationPtr xmlGetDtdNotationDesc (xmlDtdPtr dtd, + const xmlChar *name); +xmlElementPtr xmlGetDtdElementDesc (xmlDtdPtr dtd, + const xmlChar *name); + +int xmlValidGetValidElements(xmlNode *prev, + xmlNode *next, + const xmlChar **list, + int max); +int xmlValidGetPotentialChildren(xmlElementContent *ctree, + const xmlChar **list, + int *len, + int max); +#ifdef __cplusplus +} +#endif +#endif /* __XML_VALID_H__ */ diff --git a/include/libxml/xinclude.h b/include/libxml/xinclude.h new file mode 100644 index 00000000..eca4588c --- /dev/null +++ b/include/libxml/xinclude.h @@ -0,0 +1,26 @@ +/* + * xinclude.c : API to handle XInclude processing + * + * World Wide Web Consortium Working Draft 26 October 2000 + * http://www.w3.org/TR/2000/WD-xinclude-20001026 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_XINCLUDE_H__ +#define __XML_XINCLUDE_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int xmlXIncludeProcess (xmlDocPtr doc); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XINCLUDE_H__ */ diff --git a/include/libxml/xlink.h b/include/libxml/xlink.h new file mode 100644 index 00000000..37a54151 --- /dev/null +++ b/include/libxml/xlink.h @@ -0,0 +1,182 @@ +/* + * xlink.h : interfaces to the hyperlinks detection module + * + * See Copyright for the status of this software. + * + * Related specification: http://www.w3.org/TR/xlink + * http://www.w3.org/HTML/ + * and XBase + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_XLINK_H__ +#define __XML_XLINK_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif +/** + * Various defines for the various Link properties. + * + * NOTE: the link detection layer will try to resolve QName expansion + * of namespaces, if "foo" is the prefix for "http://foo.com/" + * then the link detection layer will expand role="foo:myrole" + * to "http://foo.com/:myrole" + * NOTE: the link detection layer will expand URI-Refences found on + * href attributes by using the base mechanism if found. + */ +typedef xmlChar *xlinkHRef; +typedef xmlChar *xlinkRole; +typedef xmlChar *xlinkTitle; + +typedef enum { + XLINK_TYPE_NONE = 0, + XLINK_TYPE_SIMPLE, + XLINK_TYPE_EXTENDED, + XLINK_TYPE_EXTENDED_SET +} xlinkType; + +typedef enum { + XLINK_SHOW_NONE = 0, + XLINK_SHOW_NEW, + XLINK_SHOW_EMBED, + XLINK_SHOW_REPLACE +} xlinkShow; + +typedef enum { + XLINK_ACTUATE_NONE = 0, + XLINK_ACTUATE_AUTO, + XLINK_ACTUATE_ONREQUEST +} xlinkActuate; + +/** + * xlinkNodeDetectFunc: + * @ctx: user data pointer + * @node: the node to check + * + * This is the prototype for the link detection routine + * It calls the default link detection callbacks upon link detection. + */ +typedef void +(*xlinkNodeDetectFunc) (void *ctx, + xmlNodePtr node); + +/** + * The link detection module interract with the upper layers using + * a set of callback registered at parsing time. + */ + +/** + * xlinkSimpleLinkFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @href: the target of the link + * @role: the role string + * @title: the link title + * + * This is the prototype for a simple link detection callback. + */ +typedef void +(*xlinkSimpleLinkFunk) (void *ctx, + xmlNodePtr node, + const xlinkHRef href, + const xlinkRole role, + const xlinkTitle title); + +/** + * xlinkExtendedLinkFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @nbLocators: the number of locators detected on the link + * @hrefs: pointer to the array of locator hrefs + * @roles: pointer to the array of locator roles + * @nbArcs: the number of arcs detected on the link + * @from: pointer to the array of source roles found on the arcs + * @to: pointer to the array of target roles found on the arcs + * @show: array of values for the show attributes found on the arcs + * @actuate: array of values for the actuate attributes found on the arcs + * @nbTitles: the number of titles detected on the link + * @title: array of titles detected on the link + * @langs: array of xml:lang values for the titles + * + * This is the prototype for a extended link detection callback. + */ +typedef void +(*xlinkExtendedLinkFunk)(void *ctx, + xmlNodePtr node, + int nbLocators, + const xlinkHRef *hrefs, + const xlinkRole *roles, + int nbArcs, + const xlinkRole *from, + const xlinkRole *to, + xlinkShow *show, + xlinkActuate *actuate, + int nbTitles, + const xlinkTitle *titles, + const xmlChar **langs); + +/** + * xlinkExtendedLinkSetFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @nbLocators: the number of locators detected on the link + * @hrefs: pointer to the array of locator hrefs + * @roles: pointer to the array of locator roles + * @nbTitles: the number of titles detected on the link + * @title: array of titles detected on the link + * @langs: array of xml:lang values for the titles + * + * This is the prototype for a extended link set detection callback. + */ +typedef void +(*xlinkExtendedLinkSetFunk) (void *ctx, + xmlNodePtr node, + int nbLocators, + const xlinkHRef *hrefs, + const xlinkRole *roles, + int nbTitles, + const xlinkTitle *titles, + const xmlChar **langs); + +/** + * This is the structure containing a set of Links detection callbacks + * + * There is no default xlink callbacks, if one want to get link + * recognition activated, those call backs must be provided before parsing. + */ +typedef struct _xlinkHandler xlinkHandler; +typedef xlinkHandler *xlinkHandlerPtr; +struct _xlinkHandler { + xlinkSimpleLinkFunk simple; + xlinkExtendedLinkFunk extended; + xlinkExtendedLinkSetFunk set; +}; + +/** + * the default detection routine, can be overriden, they call the default + * detection callbacks. + */ + +xlinkNodeDetectFunc xlinkGetDefaultDetect (void); +void xlinkSetDefaultDetect (xlinkNodeDetectFunc func); + +/** + * Routines to set/get the default handlers. + */ +xlinkHandlerPtr xlinkGetDefaultHandler (void); +void xlinkSetDefaultHandler (xlinkHandlerPtr handler); + +/* + * Link detection module itself. + */ +xlinkType xlinkIsLink (xmlDocPtr doc, + xmlNodePtr node); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XLINK_H__ */ diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h new file mode 100644 index 00000000..ecff73bc --- /dev/null +++ b/include/libxml/xmlIO.h @@ -0,0 +1,178 @@ +/* + * xmlIO.h : interface for the I/O interfaces used by the parser + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + * + * 15 Nov 2000 ht - modified for VMS + */ + +#ifndef __XML_IO_H__ +#define __XML_IO_H__ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Those are the functions and datatypes for the parser input + * I/O structures. + */ + +typedef int (*xmlInputMatchCallback) (char const *filename); +typedef void * (*xmlInputOpenCallback) (char const *filename); +typedef int (*xmlInputReadCallback) (void * context, char * buffer, int len); +typedef void (*xmlInputCloseCallback) (void * context); + +typedef struct _xmlParserInputBuffer xmlParserInputBuffer; +typedef xmlParserInputBuffer *xmlParserInputBufferPtr; +struct _xmlParserInputBuffer { + void* context; + xmlInputReadCallback readcallback; + xmlInputCloseCallback closecallback; + + xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */ + + xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */ + xmlBufferPtr raw; /* if encoder != NULL buffer for raw input */ +}; + + +/* + * Those are the functions and datatypes for the library output + * I/O structures. + */ + +typedef int (*xmlOutputMatchCallback) (char const *filename); +typedef void * (*xmlOutputOpenCallback) (char const *filename); +typedef int (*xmlOutputWriteCallback) (void * context, const char * buffer, + int len); +typedef void (*xmlOutputCloseCallback) (void * context); + +typedef struct _xmlOutputBuffer xmlOutputBuffer; +typedef xmlOutputBuffer *xmlOutputBufferPtr; +struct _xmlOutputBuffer { + void* context; + xmlOutputWriteCallback writecallback; + xmlOutputCloseCallback closecallback; + + xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */ + + xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 or ISOLatin */ + xmlBufferPtr conv; /* if encoder != NULL buffer for output */ + int written; /* total number of byte written */ +}; + +/* + * Interfaces for input + */ + +void xmlRegisterDefaultInputCallbacks (void); +xmlParserInputBufferPtr + xmlAllocParserInputBuffer (xmlCharEncoding enc); + +#ifdef VMS +xmlParserInputBufferPtr + xmlParserInputBufferCreateFname (const char *URI, + xmlCharEncoding enc); +#define xmlParserInputBufferCreateFilename xmlParserInputBufferCreateFname +#else +xmlParserInputBufferPtr + xmlParserInputBufferCreateFilename (const char *URI, + xmlCharEncoding enc); +#endif + +xmlParserInputBufferPtr + xmlParserInputBufferCreateFile (FILE *file, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateFd (int fd, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateMem (const char *mem, int size, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateIO (xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + xmlCharEncoding enc); +int xmlParserInputBufferRead (xmlParserInputBufferPtr in, + int len); +int xmlParserInputBufferGrow (xmlParserInputBufferPtr in, + int len); +int xmlParserInputBufferPush (xmlParserInputBufferPtr in, + int len, + const char *buf); +void xmlFreeParserInputBuffer (xmlParserInputBufferPtr in); +char * xmlParserGetDirectory (const char *filename); + +int xmlRegisterInputCallbacks (xmlInputMatchCallback match, + xmlInputOpenCallback open, + xmlInputReadCallback read, + xmlInputCloseCallback close); +/* + * Interfaces for output + */ +void xmlRegisterDefaultOutputCallbacks(void); +xmlOutputBufferPtr + xmlAllocOutputBuffer (xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateFilename (const char *URI, + xmlCharEncodingHandlerPtr encoder, + int compression); + +xmlOutputBufferPtr + xmlOutputBufferCreateFile (FILE *file, + xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateFd (int fd, + xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateIO (xmlOutputWriteCallback iowrite, + xmlOutputCloseCallback ioclose, + void *ioctx, + xmlCharEncodingHandlerPtr encoder); + +int xmlOutputBufferWrite (xmlOutputBufferPtr out, + int len, + const char *buf); +int xmlOutputBufferWriteString (xmlOutputBufferPtr out, + const char *str); + +int xmlOutputBufferFlush (xmlOutputBufferPtr out); +int xmlOutputBufferClose (xmlOutputBufferPtr out); + +int xmlRegisterOutputCallbacks (xmlOutputMatchCallback match, + xmlOutputOpenCallback open, + xmlOutputWriteCallback write, + xmlOutputCloseCallback close); + +/* + * This save function are part of tree.h and HTMLtree.h actually + */ +int xmlSaveFileTo (xmlOutputBuffer *buf, + xmlDocPtr cur, + const char *encoding); +void xmlNodeDumpOutput (xmlOutputBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format, + const char *encoding); +void htmlDocContentDumpOutput(xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding); +#ifdef __cplusplus +} +#endif + +#endif /* __XML_IO_H__ */ diff --git a/include/libxml/xmlerror.h b/include/libxml/xmlerror.h new file mode 100644 index 00000000..53c57518 --- /dev/null +++ b/include/libxml/xmlerror.h @@ -0,0 +1,180 @@ +#ifndef __XML_ERROR_H__ +#define __XML_ERROR_H__ + +#include +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_ERR_OK = 0, + XML_ERR_INTERNAL_ERROR, + XML_ERR_NO_MEMORY, + + XML_ERR_DOCUMENT_START, /* 3 */ + XML_ERR_DOCUMENT_EMPTY, + XML_ERR_DOCUMENT_END, + + XML_ERR_INVALID_HEX_CHARREF, /* 6 */ + XML_ERR_INVALID_DEC_CHARREF, + XML_ERR_INVALID_CHARREF, + XML_ERR_INVALID_CHAR, + + XML_ERR_CHARREF_AT_EOF, /* 10 */ + XML_ERR_CHARREF_IN_PROLOG, + XML_ERR_CHARREF_IN_EPILOG, + XML_ERR_CHARREF_IN_DTD, + XML_ERR_ENTITYREF_AT_EOF, + XML_ERR_ENTITYREF_IN_PROLOG, + XML_ERR_ENTITYREF_IN_EPILOG, + XML_ERR_ENTITYREF_IN_DTD, + XML_ERR_PEREF_AT_EOF, + XML_ERR_PEREF_IN_PROLOG, + XML_ERR_PEREF_IN_EPILOG, + XML_ERR_PEREF_IN_INT_SUBSET, + + XML_ERR_ENTITYREF_NO_NAME, /* 22 */ + XML_ERR_ENTITYREF_SEMICOL_MISSING, + + XML_ERR_PEREF_NO_NAME, /* 24 */ + XML_ERR_PEREF_SEMICOL_MISSING, + + XML_ERR_UNDECLARED_ENTITY, /* 26 */ + XML_WAR_UNDECLARED_ENTITY, + XML_ERR_UNPARSED_ENTITY, + XML_ERR_ENTITY_IS_EXTERNAL, + XML_ERR_ENTITY_IS_PARAMETER, + + XML_ERR_UNKNOWN_ENCODING, /* 31 */ + XML_ERR_UNSUPPORTED_ENCODING, + + XML_ERR_STRING_NOT_STARTED, /* 33 */ + XML_ERR_STRING_NOT_CLOSED, + XML_ERR_NS_DECL_ERROR, + + XML_ERR_ENTITY_NOT_STARTED, /* 36 */ + XML_ERR_ENTITY_NOT_FINISHED, + + XML_ERR_LT_IN_ATTRIBUTE, /* 38 */ + XML_ERR_ATTRIBUTE_NOT_STARTED, + XML_ERR_ATTRIBUTE_NOT_FINISHED, + XML_ERR_ATTRIBUTE_WITHOUT_VALUE, + XML_ERR_ATTRIBUTE_REDEFINED, + + XML_ERR_LITERAL_NOT_STARTED, /* 43 */ + XML_ERR_LITERAL_NOT_FINISHED, + + XML_ERR_COMMENT_NOT_FINISHED, /* 45 */ + + XML_ERR_PI_NOT_STARTED, /* 47 */ + XML_ERR_PI_NOT_FINISHED, + + XML_ERR_NOTATION_NOT_STARTED, /* 49 */ + XML_ERR_NOTATION_NOT_FINISHED, + + XML_ERR_ATTLIST_NOT_STARTED, /* 51 */ + XML_ERR_ATTLIST_NOT_FINISHED, + + XML_ERR_MIXED_NOT_STARTED, /* 53 */ + XML_ERR_MIXED_NOT_FINISHED, + + XML_ERR_ELEMCONTENT_NOT_STARTED, /* 55 */ + XML_ERR_ELEMCONTENT_NOT_FINISHED, + + XML_ERR_XMLDECL_NOT_STARTED, /* 57 */ + XML_ERR_XMLDECL_NOT_FINISHED, + + XML_ERR_CONDSEC_NOT_STARTED, /* 59 */ + XML_ERR_CONDSEC_NOT_FINISHED, + + XML_ERR_EXT_SUBSET_NOT_FINISHED, /* 61 */ + + XML_ERR_DOCTYPE_NOT_FINISHED, /* 62 */ + + XML_ERR_MISPLACED_CDATA_END, /* 63 */ + XML_ERR_CDATA_NOT_FINISHED, + + XML_ERR_RESERVED_XML_NAME, /* 65 */ + + XML_ERR_SPACE_REQUIRED, /* 66 */ + XML_ERR_SEPARATOR_REQUIRED, + XML_ERR_NMTOKEN_REQUIRED, + XML_ERR_NAME_REQUIRED, + XML_ERR_PCDATA_REQUIRED, + XML_ERR_URI_REQUIRED, + XML_ERR_PUBID_REQUIRED, + XML_ERR_LT_REQUIRED, + XML_ERR_GT_REQUIRED, + XML_ERR_LTSLASH_REQUIRED, + XML_ERR_EQUAL_REQUIRED, + + XML_ERR_TAG_NAME_MISMATCH, /* 77 */ + XML_ERR_TAG_NOT_FINISED, + + XML_ERR_STANDALONE_VALUE, /* 79 */ + + XML_ERR_ENCODING_NAME, /* 80 */ + + XML_ERR_HYPHEN_IN_COMMENT, /* 81 */ + + XML_ERR_INVALID_ENCODING, /* 82 */ + + XML_ERR_EXT_ENTITY_STANDALONE, /* 83 */ + + XML_ERR_CONDSEC_INVALID, /* 84 */ + + XML_ERR_VALUE_REQUIRED, /* 85 */ + + XML_ERR_NOT_WELL_BALANCED, /* 86 */ + XML_ERR_EXTRA_CONTENT, /* 87 */ + XML_ERR_ENTITY_CHAR_ERROR, /* 88 */ + XML_ERR_ENTITY_PE_INTERNAL, /* 88 */ + XML_ERR_ENTITY_LOOP, /* 89 */ + XML_ERR_ENTITY_BOUNDARY, /* 90 */ + XML_ERR_INVALID_URI, /* 91 */ + XML_ERR_URI_FRAGMENT /* 92 */ +}xmlParserErrors; + +/* + * Signature of the function to use when there is an error and + * no parsing or validity context available + */ +typedef void (*xmlGenericErrorFunc) (void *ctx, const char *msg, ...); + +/* + * Those are the default error function and associated context to use + * when when there is an error and no parsing or validity context available + */ + +LIBXML_DLL_IMPORT extern xmlGenericErrorFunc xmlGenericError; +LIBXML_DLL_IMPORT extern void *xmlGenericErrorContext; + +/* + * Use the following function to reset the two previous global variables. + */ +void xmlSetGenericErrorFunc (void *ctx, + xmlGenericErrorFunc handler); + +/* + * Default message routines used by SAX and Valid context for error + * and warning reporting + */ +void xmlParserError (void *ctx, + const char *msg, + ...); +void xmlParserWarning (void *ctx, + const char *msg, + ...); +void xmlParserValidityError (void *ctx, + const char *msg, + ...); +void xmlParserValidityWarning(void *ctx, + const char *msg, + ...); +void xmlParserPrintFileInfo (xmlParserInputPtr input); +void xmlParserPrintFileContext(xmlParserInputPtr input); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_ERROR_H__ */ diff --git a/include/libxml/xmlmemory.h b/include/libxml/xmlmemory.h new file mode 100644 index 00000000..1e533d15 --- /dev/null +++ b/include/libxml/xmlmemory.h @@ -0,0 +1,91 @@ +/* + * xmlmemory.h: interface for the memory allocation debug. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef _DEBUG_MEMORY_ALLOC_ +#define _DEBUG_MEMORY_ALLOC_ + +#include +#include + +/* + * DEBUG_MEMORY_LOCATION should be activated only done when debugging + * libxml. + */ +/* #define DEBUG_MEMORY_LOCATION */ + +#ifdef DEBUG +#ifndef DEBUG_MEMORY +#define DEBUG_MEMORY +#endif +#endif + +#ifdef DEBUG_MEMORY_LOCATION +#define MEM_LIST /* keep a list of all the allocated memory blocks */ +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The XML memory wrapper support 4 basic overloadable functions + */ +typedef void (*xmlFreeFunc)(void *); +typedef void *(*xmlMallocFunc)(int); +typedef void *(*xmlReallocFunc)(void *, int); +typedef char *(*xmlStrdupFunc)(const char *); + +/* + * The 4 interfaces used for all memory handling within libxml + */ +LIBXML_DLL_IMPORT extern xmlFreeFunc xmlFree; +LIBXML_DLL_IMPORT extern xmlMallocFunc xmlMalloc; +LIBXML_DLL_IMPORT extern xmlReallocFunc xmlRealloc; +LIBXML_DLL_IMPORT extern xmlStrdupFunc xmlMemStrdup; + +/* + * The way to overload the existing functions + */ +int xmlMemSetup (xmlFreeFunc freeFunc, + xmlMallocFunc mallocFunc, + xmlReallocFunc reallocFunc, + xmlStrdupFunc strdupFunc); +int xmlMemGet (xmlFreeFunc *freeFunc, + xmlMallocFunc *mallocFunc, + xmlReallocFunc *reallocFunc, + xmlStrdupFunc *strdupFunc); + +/* + * Initialization of the memory layer + */ +int xmlInitMemory (void); + +/* + * Those are specific to the XML debug memory wrapper + */ +int xmlMemUsed (void); +void xmlMemDisplay (FILE *fp); +void xmlMemShow (FILE *fp, int nr); +void xmlMemoryDump (void); +int xmlInitMemory (void); + +#ifdef DEBUG_MEMORY_LOCATION +#define xmlMalloc(x) xmlMallocLoc((x), __FILE__, __LINE__) +#define xmlRealloc(p, x) xmlReallocLoc((p), (x), __FILE__, __LINE__) +#define xmlMemStrdup(x) xmlMemStrdupLoc((x), __FILE__, __LINE__) + +void * xmlMallocLoc(int size, const char *file, int line); +void * xmlReallocLoc(void *ptr,int size, const char *file, int line); +char * xmlMemStrdupLoc(const char *str, const char *file, int line); +#endif /* DEBUG_MEMORY_LOCATION */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _DEBUG_MEMORY_ALLOC_ */ + diff --git a/include/libxml/xmlversion.h.in b/include/libxml/xmlversion.h.in new file mode 100644 index 00000000..71ab184c --- /dev/null +++ b/include/libxml/xmlversion.h.in @@ -0,0 +1,129 @@ +/* + * xmlversion.h : compile-time version informations for the XML parser. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_VERSION_H__ +#define __XML_VERSION_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * use those to be sure nothing nasty will happen if + * your library and includes mismatch + */ +extern void xmlCheckVersion(int version); +#define LIBXML_DOTTED_VERSION "@VERSION@" +#define LIBXML_VERSION @LIBXML_VERSION_NUMBER@ +#define LIBXML_VERSION_STRING "@LIBXML_VERSION_NUMBER@" +#define LIBXML_TEST_VERSION xmlCheckVersion(@LIBXML_VERSION_NUMBER@); + +/* + * Whether the FTP support is configured in + */ +#if @WITH_FTP@ +#define LIBXML_FTP_ENABLED +#else +#define LIBXML_FTP_DISABLED +#endif + +/* + * Whether the HTTP support is configured in + */ +#if @WITH_HTTP@ +#define LIBXML_HTTP_ENABLED +#else +#define LIBXML_HTTP_DISABLED +#endif + +/* + * Whether the HTML support is configured in + */ +#if @WITH_HTML@ +#define LIBXML_HTML_ENABLED +#else +#define LIBXML_HTML_DISABLED +#endif + +/* + * Whether the Docbook support is configured in +#if @WITH_SGML@ +#define LIBXML_SGML_ENABLED +#else +#define LIBXML_SGML_DISABLED +#endif + */ + +/* + * Whether XPath is configured in + */ +#if @WITH_XPATH@ +#define LIBXML_XPATH_ENABLED +#else +#define LIBXML_XPATH_DISABLED +#endif + +/* + * Whether XPointer is configured in + */ +#if @WITH_XPTR@ +#define LIBXML_XPTR_ENABLED +#else +#define LIBXML_XPTR_DISABLED +#endif + +/* + * Whether XInclude is configured in + */ +#if @WITH_XINCLUDE@ +#define LIBXML_XINCLUDE_ENABLED +#else +#define LIBXML_XINCLUDE_DISABLED +#endif + +/* + * Whether iconv support is available + */ +#ifndef WIN32 +#if @WITH_ICONV@ +#define LIBXML_ICONV_ENABLED +#else +#define LIBXML_ICONV_DISABLED +#endif +#endif + +/* + * Whether Debugging module is configured in + */ +#if @WITH_DEBUG@ +#define LIBXML_DEBUG_ENABLED +#else +#define LIBXML_DEBUG_DISABLED +#endif + +/* + * Whether the memory debugging is configured in + */ +#if @WITH_MEM_DEBUG@ +#define DEBUG_MEMORY_LOCATION +#endif + +#ifndef LIBXML_DLL_IMPORT +#if defined(WIN32) && !defined(STATIC) +#define LIBXML_DLL_IMPORT __declspec(dllimport) +#else +#define LIBXML_DLL_IMPORT +#endif +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif + + diff --git a/include/libxml/xpath.h b/include/libxml/xpath.h new file mode 100644 index 00000000..f8fd861e --- /dev/null +++ b/include/libxml/xpath.h @@ -0,0 +1,278 @@ +/* + * xpath.c: interface for XML Path Language implementation + * + * Reference: W3C Working Draft 5 July 1999 + * http://www.w3.org/Style/XSL/Group/1999/07/xpath-19990705.html + * + * See COPYRIGHT for the status of this software + * + * Author: Daniel.Veillard@w3.org + */ + +#ifndef __XML_XPATH_H__ +#define __XML_XPATH_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _xmlXPathContext xmlXPathContext; +typedef xmlXPathContext *xmlXPathContextPtr; +typedef struct _xmlXPathParserContext xmlXPathParserContext; +typedef xmlXPathParserContext *xmlXPathParserContextPtr; + +/** + * The set of XPath error codes + */ + +typedef enum { + XPATH_EXPRESSION_OK = 0, + XPATH_NUMBER_ERROR, + XPATH_UNFINISHED_LITERAL_ERROR, + XPATH_START_LITERAL_ERROR, + XPATH_VARIABLE_REF_ERROR, + XPATH_UNDEF_VARIABLE_ERROR, + XPATH_INVALID_PREDICATE_ERROR, + XPATH_EXPR_ERROR, + XPATH_UNCLOSED_ERROR, + XPATH_UNKNOWN_FUNC_ERROR, + XPATH_INVALID_OPERAND, + XPATH_INVALID_TYPE, + XPATH_INVALID_ARITY, + XPATH_INVALID_CTXT_SIZE, + XPATH_INVALID_CTXT_POSITION, + XPATH_MEMORY_ERROR, + XPTR_SYNTAX_ERROR, + XPTR_RESOURCE_ERROR, + XPTR_SUB_RESOURCE_ERROR, + XPATH_UNDEF_PREFIX_ERROR +} xmlXPathError; + +/* + * A node-set (an unordered collection of nodes without duplicates) + */ +typedef struct _xmlNodeSet xmlNodeSet; +typedef xmlNodeSet *xmlNodeSetPtr; +struct _xmlNodeSet { + int nodeNr; /* number of nodes in the set */ + int nodeMax; /* size of the array as allocated */ + xmlNodePtr *nodeTab; /* array of nodes in no particular order */ +}; + +/* + * An expression is evaluated to yield an object, which + * has one of the following four basic types: + * - node-set + * - boolean + * - number + * - string + * + * @@ XPointer will add more types ! + */ + +typedef enum { + XPATH_UNDEFINED = 0, + XPATH_NODESET = 1, + XPATH_BOOLEAN = 2, + XPATH_NUMBER = 3, + XPATH_STRING = 4, + XPATH_POINT = 5, + XPATH_RANGE = 6, + XPATH_LOCATIONSET = 7, + XPATH_USERS = 8, + XPATH_XSLT_TREE = 9 /* An XSLT value tree, non modifiable */ +} xmlXPathObjectType; + +typedef struct _xmlXPathObject xmlXPathObject; +typedef xmlXPathObject *xmlXPathObjectPtr; +struct _xmlXPathObject { + xmlXPathObjectType type; + xmlNodeSetPtr nodesetval; + int boolval; + double floatval; + xmlChar *stringval; + void *user; + int index; + void *user2; + int index2; +}; + +/* + * A conversion function is associated to a type and used to cast + * the new type to primitive values. + */ +typedef int (*xmlXPathConvertFunc) (xmlXPathObjectPtr obj, int type); + +/* + * Extra type: a name and a conversion function. + */ + +typedef struct _xmlXPathType xmlXPathType; +typedef xmlXPathType *xmlXPathTypePtr; +struct _xmlXPathType { + const xmlChar *name; /* the type name */ + xmlXPathConvertFunc func; /* the conversion function */ +}; + +/* + * Extra variable: a name and a value. + */ + +typedef struct _xmlXPathVariable xmlXPathVariable; +typedef xmlXPathVariable *xmlXPathVariablePtr; +struct _xmlXPathVariable { + const xmlChar *name; /* the variable name */ + xmlXPathObjectPtr value; /* the value */ +}; + +/* + * an evaluation function, the parameters are on the context stack + */ + +typedef void (*xmlXPathEvalFunc)(xmlXPathParserContextPtr ctxt, int nargs); + +/* + * Extra function: a name and a evaluation function. + */ + +typedef struct _xmlXPathFunct xmlXPathFunct; +typedef xmlXPathFunct *xmlXPathFuncPtr; +struct _xmlXPathFunct { + const xmlChar *name; /* the function name */ + xmlXPathEvalFunc func; /* the evaluation function */ +}; + +/* + * An axis traversal function. To traverse an axis, the engine calls + * the first time with cur == NULL and repeat until the function returns + * NULL indicating the end of the axis traversal. + */ + +typedef xmlXPathObjectPtr (*xmlXPathAxisFunc) (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr cur); + +/* + * Extra axis: a name and an axis function. + */ + +typedef struct _xmlXPathAxis xmlXPathAxis; +typedef xmlXPathAxis *xmlXPathAxisPtr; +struct _xmlXPathAxis { + const xmlChar *name; /* the axis name */ + xmlXPathAxisFunc func; /* the search function */ +}; + +/* + * Expression evaluation occurs with respect to a context. + * he context consists of: + * - a node (the context node) + * - a node list (the context node list) + * - a set of variable bindings + * - a function library + * - the set of namespace declarations in scope for the expression + * Following the switch to hash tables, this need to be trimmed up at + * the next binary incompatible release. + */ + +struct _xmlXPathContext { + xmlDocPtr doc; /* The current document */ + xmlNodePtr node; /* The current node */ + + int nb_variables_unused; /* unused (hash table) */ + int max_variables_unused; /* unused (hash table) */ + xmlHashTablePtr varHash; /* Hash table of defined variables */ + + int nb_types; /* number of defined types */ + int max_types; /* max number of types */ + xmlXPathTypePtr types; /* Array of defined types */ + + int nb_funcs_unused; /* unused (hash table) */ + int max_funcs_unused; /* unused (hash table) */ + xmlHashTablePtr funcHash; /* Hash table of defined funcs */ + + int nb_axis; /* number of defined axis */ + int max_axis; /* max number of axis */ + xmlXPathAxisPtr axis; /* Array of defined axis */ + + /* the namespace nodes of the context node */ + xmlNsPtr *namespaces; /* Array of namespaces */ + int nsNr; /* number of namespace in scope */ + void *user; /* function to free */ + + /* extra variables */ + int contextSize; /* the context size */ + int proximityPosition; /* the proximity position */ + + /* extra stuff for XPointer */ + int xptr; /* it this an XPointer context */ + xmlNodePtr here; /* for here() */ + xmlNodePtr origin; /* for origin() */ + + /* the set of namespace declarations in scope for the expression */ + xmlHashTablePtr nsHash; /* The namespaces hash table */ + void *varLookupFunc; /* variable lookup func */ + void *varLookupData; /* variable lookup data */ + + /* Possibility to link in an extra item */ + void *extra; /* needed for XSLT */ +}; + +/* + * An XPath parser context, it contains pure parsing informations, + * an xmlXPathContext, and the stack of objects. + */ +struct _xmlXPathParserContext { + const xmlChar *cur; /* the current char being parsed */ + const xmlChar *base; /* the full expression */ + + int error; /* error code */ + + xmlXPathContextPtr context; /* the evaluation context */ + xmlXPathObjectPtr value; /* the current value */ + int valueNr; /* number of values stacked */ + int valueMax; /* max number of values stacked */ + xmlXPathObjectPtr *valueTab; /* stack of values */ +}; + +/* + * An XPath function + * The arguments (if any) are popped out of the context stack + * and the result is pushed on the stack. + */ + +typedef void (*xmlXPathFunction) (xmlXPathParserContextPtr ctxt, int nargs); + +/************************************************************************ + * * + * Public API * + * * + ************************************************************************/ + +/** + * Evaluation functions. + */ +void xmlXPathInit (void); +xmlXPathContextPtr xmlXPathNewContext (xmlDocPtr doc); +void xmlXPathFreeContext (xmlXPathContextPtr ctxt); +xmlXPathObjectPtr xmlXPathEval (const xmlChar *str, + xmlXPathContextPtr ctxt); +xmlXPathObjectPtr xmlXPathEvalXPtrExpr (const xmlChar *str, + xmlXPathContextPtr ctxt); +void xmlXPathFreeObject (xmlXPathObjectPtr obj); +xmlXPathObjectPtr xmlXPathEvalExpression (const xmlChar *str, + xmlXPathContextPtr ctxt); +xmlNodeSetPtr xmlXPathNodeSetCreate (xmlNodePtr val); +void xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj); +void xmlXPathFreeNodeSet (xmlNodeSetPtr obj); +xmlXPathObjectPtr xmlXPathObjectCopy (xmlXPathObjectPtr val); +int xmlXPathCmpNodes (xmlNodePtr node1, + xmlNodePtr node2); + + +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_XPATH_H__ */ diff --git a/include/libxml/xpathInternals.h b/include/libxml/xpathInternals.h new file mode 100644 index 00000000..51f6ad55 --- /dev/null +++ b/include/libxml/xpathInternals.h @@ -0,0 +1,236 @@ +/* + * xpath.c: internal interfaces for XML Path Language implementation + * used to build new modules on top of XPath + * + * See COPYRIGHT for the status of this software + * + * Author: Daniel.Veillard@w3.org + */ + +#ifndef __XML_XPATH_INTERNALS_H__ +#define __XML_XPATH_INTERNALS_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/************************************************************************ + * * + * Helpers * + * * + ************************************************************************/ + +#define CHECK_ERROR \ + if (ctxt->error != XPATH_EXPRESSION_OK) return + +#define CHECK_ERROR0 \ + if (ctxt->error != XPATH_EXPRESSION_OK) return(0) + +#define XP_ERROR(X) \ + { xmlXPatherror(ctxt, __FILE__, __LINE__, X); \ + ctxt->error = (X); return; } + +#define XP_ERROR0(X) \ + { xmlXPatherror(ctxt, __FILE__, __LINE__, X); \ + ctxt->error = (X); return(0); } + +#define CHECK_TYPE(typeval) \ + if ((ctxt->value == NULL) || (ctxt->value->type != typeval)) \ + XP_ERROR(XPATH_INVALID_TYPE) + +#define CHECK_ARITY(x) \ + if (nargs != (x)) \ + XP_ERROR(XPATH_INVALID_ARITY); + +#define CAST_TO_STRING \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_STRING)) \ + xmlXPathStringFunction(ctxt, 1); + +#define CAST_TO_NUMBER \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_NUMBER)) \ + xmlXPathNumberFunction(ctxt, 1); + +#define CAST_TO_BOOLEAN \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_BOOLEAN)) \ + xmlXPathBooleanFunction(ctxt, 1); + +/* + * Varibale Lookup forwarding + */ +typedef xmlXPathObjectPtr + (*xmlXPathVariableLookupFunc) (void *ctxt, + const xmlChar *name, + const xmlChar *ns_uri); + +void xmlXPathRegisterVariableLookup (xmlXPathContextPtr ctxt, + xmlXPathVariableLookupFunc f, + void *varCtxt); + +/* + * Error reporting + */ +void xmlXPatherror (xmlXPathParserContextPtr ctxt, + const char *file, + int line, + int no); + +void xmlXPathDebugDumpObject (FILE *output, + xmlXPathObjectPtr cur, + int depth); + +/** + * Extending a context + */ + +int xmlXPathRegisterNs (xmlXPathContextPtr ctxt, + const xmlChar *prefix, + const xmlChar *ns_uri); +const xmlChar * xmlXPathNsLookup (xmlXPathContextPtr ctxt, + const xmlChar *ns_uri); +void xmlXPathRegisteredNsCleanup (xmlXPathContextPtr ctxt); + +int xmlXPathRegisterFunc (xmlXPathContextPtr ctxt, + const xmlChar *name, + xmlXPathFunction f); +int xmlXPathRegisterFuncNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathFunction f); +int xmlXPathRegisterVariable (xmlXPathContextPtr ctxt, + const xmlChar *name, + xmlXPathObjectPtr value); +int xmlXPathRegisterVariableNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathObjectPtr value); +xmlXPathFunction xmlXPathFunctionLookup (xmlXPathContextPtr ctxt, + const xmlChar *name); +xmlXPathFunction xmlXPathFunctionLookupNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +void xmlXPathRegisteredFuncsCleanup(xmlXPathContextPtr ctxt); +xmlXPathObjectPtr xmlXPathVariableLookup (xmlXPathContextPtr ctxt, + const xmlChar *name); +xmlXPathObjectPtr xmlXPathVariableLookupNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +void xmlXPathRegisteredVariablesCleanup(xmlXPathContextPtr ctxt); + +/** + * Utilities to extend XPath + */ +xmlXPathParserContextPtr + xmlXPathNewParserContext (const xmlChar *str, + xmlXPathContextPtr ctxt); +void xmlXPathFreeParserContext (xmlXPathParserContextPtr ctxt); + +/* TODO: remap to xmlXPathValuePop and Push */ +xmlXPathObjectPtr valuePop (xmlXPathParserContextPtr ctxt); +int valuePush (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr value); + +xmlXPathObjectPtr xmlXPathNewString (const xmlChar *val); +xmlXPathObjectPtr xmlXPathNewCString (const char *val); +xmlXPathObjectPtr xmlXPathNewFloat (double val); +xmlXPathObjectPtr xmlXPathNewBoolean (int val); +xmlXPathObjectPtr xmlXPathNewNodeSet (xmlNodePtr val); +xmlXPathObjectPtr xmlXPathNewValueTree (xmlNodePtr val); +void xmlXPathNodeSetAdd (xmlNodeSetPtr cur, + xmlNodePtr val); + + +void xmlXPathIdFunction (xmlXPathParserContextPtr ctxt, + int nargs); +void xmlXPathRoot (xmlXPathParserContextPtr ctxt); +void xmlXPathEvalExpr (xmlXPathParserContextPtr ctxt); +xmlChar * xmlXPathParseName (xmlXPathParserContextPtr ctxt); +xmlChar * xmlXPathParseNCName (xmlXPathParserContextPtr ctxt); + +/* + * Debug + */ +#ifdef LIBXML_DEBUG_ENABLED +double xmlXPathStringEvalNumber(const xmlChar *str); +void xmlXPathDebugDumpObject(FILE *output, xmlXPathObjectPtr cur, int depth); +#endif +/* + * Existing functions + */ + +int xmlXPathEvaluatePredicateResult(xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr res); +void xmlXPathInit(void); +void xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt); +xmlNodeSetPtr xmlXPathNodeSetCreate(xmlNodePtr val); +void xmlXPathNodeSetAdd(xmlNodeSetPtr cur, xmlNodePtr val); +xmlNodeSetPtr xmlXPathNodeSetMerge(xmlNodeSetPtr val1, xmlNodeSetPtr val2); +void xmlXPathNodeSetDel(xmlNodeSetPtr cur, xmlNodePtr val); +void xmlXPathNodeSetRemove(xmlNodeSetPtr cur, int val); +void xmlXPathFreeNodeSet(xmlNodeSetPtr obj); +xmlXPathObjectPtr xmlXPathNewNodeSet(xmlNodePtr val); +xmlXPathObjectPtr xmlXPathNewNodeSetList(xmlNodeSetPtr val); +xmlXPathObjectPtr xmlXPathWrapNodeSet(xmlNodeSetPtr val); +void xmlXPathFreeNodeSetList(xmlXPathObjectPtr obj); + + +xmlXPathObjectPtr xmlXPathNewFloat(double val); +xmlXPathObjectPtr xmlXPathNewBoolean(int val); +xmlXPathObjectPtr xmlXPathNewString(const xmlChar *val); +xmlXPathObjectPtr xmlXPathNewCString(const char *val); +void xmlXPathFreeObject(xmlXPathObjectPtr obj); +xmlXPathContextPtr xmlXPathNewContext(xmlDocPtr doc); +void xmlXPathFreeContext(xmlXPathContextPtr ctxt); + +int xmlXPathEqualValues(xmlXPathParserContextPtr ctxt); +int xmlXPathCompareValues(xmlXPathParserContextPtr ctxt, int inf, int strict); +void xmlXPathValueFlipSign(xmlXPathParserContextPtr ctxt); +void xmlXPathAddValues(xmlXPathParserContextPtr ctxt); +void xmlXPathSubValues(xmlXPathParserContextPtr ctxt); +void xmlXPathMultValues(xmlXPathParserContextPtr ctxt); +void xmlXPathDivValues(xmlXPathParserContextPtr ctxt); +void xmlXPathModValues(xmlXPathParserContextPtr ctxt); + + +/* + * Some of the axis navigation routines + */ +xmlNodePtr xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, xmlNodePtr cur); +xmlNodePtr xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur); +xmlNodePtr xmlXPathNextPrecedingSibling(xmlXPathParserContextPtr ctxt, xmlNodePtr cur); +/* + * The official core of XPath functions + */ +void xmlXPathRoot(xmlXPathParserContextPtr ctxt); +void xmlXPathLastFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathPositionFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathCountFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathLocalNameFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNamespaceURIFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStringLengthFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathConcatFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathContainsFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStartsWithFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringBeforeFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringAfterFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNormalizeFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathTranslateFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNotFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathTrueFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathFalseFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathLangFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNumberFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSumFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathFloorFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathCeilingFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathRoundFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathBooleanFunction(xmlXPathParserContextPtr ctxt, int nargs); +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_XPATH_INTERNALS_H__ */ diff --git a/include/libxml/xpointer.h b/include/libxml/xpointer.h new file mode 100644 index 00000000..786fb5a0 --- /dev/null +++ b/include/libxml/xpointer.h @@ -0,0 +1,57 @@ +/* + * xpointer.h : API to handle XML Pointers + * + * World Wide Web Consortium Working Draft 03-March-1998 + * http://www.w3.org/TR/1998/WD-xptr-19980303 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_XPTR_H__ +#define __XML_XPTR_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A Location Set + */ +typedef struct _xmlLocationSet xmlLocationSet; +typedef xmlLocationSet *xmlLocationSetPtr; +struct _xmlLocationSet { + int locNr; /* number of locations in the set */ + int locMax; /* size of the array as allocated */ + xmlXPathObjectPtr *locTab;/* array of locations */ +}; + +/* + * Handling of location sets + */ + +void xmlXPtrFreeLocationSet (xmlLocationSetPtr obj); +xmlLocationSetPtr xmlXPtrLocationSetMerge (xmlLocationSetPtr val1, + xmlLocationSetPtr val2); + +/* + * Functions + */ +xmlXPathContextPtr xmlXPtrNewContext (xmlDocPtr doc, + xmlNodePtr here, + xmlNodePtr origin); +xmlXPathObjectPtr xmlXPtrEval (const xmlChar *str, + xmlXPathContextPtr ctx); +void xmlXPtrRangeToFunction (xmlXPathParserContextPtr ctxt, + int nargs); +xmlNodePtr xmlXPtrBuildNodeList (xmlXPathObjectPtr obj); +void xmlXPtrEvalRangePredicate (xmlXPathParserContextPtr ctxt); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XPTR_H__ */ diff --git a/include/win32config.h b/include/win32config.h new file mode 100644 index 00000000..ea2cd506 --- /dev/null +++ b/include/win32config.h @@ -0,0 +1,92 @@ +#define HAVE_CTYPE_H +#define HAVE_STDLIB_H +#define HAVE_MALLOC_H +#define HAVE_TIME_H +#define HAVE_FCNTL_H + +#include + +#define LIBXML_DLL_IMPORT +#define SOCKLEN_T int + +#ifdef INCLUDE_WINSOCK +#include + +#define EWOULDBLOCK WSAEWOULDBLOCK +#define EINPROGRESS WSAEINPROGRESS +#define EALREADY WSAEALREADY +#define ENOTSOCK WSAENOTSOCK +#define EDESTADDRREQ WSAEDESTADDRREQ +#define EMSGSIZE WSAEMSGSIZE +#define EPROTOTYPE WSAEPROTOTYPE +#define ENOPROTOOPT WSAENOPROTOOPT +#define EPROTONOSUPPORT WSAEPROTONOSUPPORT +#define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT +#define EOPNOTSUPP WSAEOPNOTSUPP +#define EPFNOSUPPORT WSAEPFNOSUPPORT +#define EAFNOSUPPORT WSAEAFNOSUPPORT +#define EADDRINUSE WSAEADDRINUSE +#define EADDRNOTAVAIL WSAEADDRNOTAVAIL +#define ENETDOWN WSAENETDOWN +#define ENETUNREACH WSAENETUNREACH +#define ENETRESET WSAENETRESET +#define ECONNABORTED WSAECONNABORTED +#define ECONNRESET WSAECONNRESET +#define ENOBUFS WSAENOBUFS +#define EISCONN WSAEISCONN +#define ENOTCONN WSAENOTCONN +#define ESHUTDOWN WSAESHUTDOWN +#define ETOOMANYREFS WSAETOOMANYREFS +#define ETIMEDOUT WSAETIMEDOUT +#define ECONNREFUSED WSAECONNREFUSED +#define ELOOP WSAELOOP +#define ENAMETOOLONG WSAENAMETOOLONG +#define EHOSTDOWN WSAEHOSTDOWN +#define EHOSTUNREACH WSAEHOSTUNREACH +#define ENOTEMPTY WSAENOTEMPTY +#define EPROCLIM WSAEPROCLIM +#define EUSERS WSAEUSERS +#define EDQUOT WSAEDQUOT +#define ESTALE WSAESTALE +#define EREMOTE WSAEREMOTE +#endif /* INCLUDE_WINSOCK */ + +#define HAVE_ISINF +#define HAVE_ISNAN + +#include +static int isinf (double d) { + int expon = 0; + double val = frexp (d, &expon); + if (expon == 1025) { + if (val == 0.5) { + return 1; + } else if (val == -0.5) { + return -1; + } else { + return 0; + } + } else { + return 0; + } +} +static int isnan (double d) { + int expon = 0; + double val = frexp (d, &expon); + if (expon == 1025) { + if (val == 0.5) { + return 0; + } else if (val == -0.5) { + return 0; + } else { + return 1; + } + } else { + return 0; + } +} + +#include + +#define HAVE_SYS_STAT_H #define HAVE__STAT + diff --git a/libxml-2.0.pc.in b/libxml-2.0.pc.in new file mode 100644 index 00000000..d93b3e77 --- /dev/null +++ b/libxml-2.0.pc.in @@ -0,0 +1,12 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + + +Name: libXML +Version: @VERSION@ +Description: libXML library. +Requires: +Libs: -L${libdir} -lxml2 @Z_LIBS@ @M_LIBS@ @LIBS@ +Cflags: @XML_INCLUDEDIR@ @XML_CFLAGS@ diff --git a/libxml.4 b/libxml.4 new file mode 100644 index 00000000..5aa868f3 --- /dev/null +++ b/libxml.4 @@ -0,0 +1,66 @@ +.TH libxml 4 "12 April 2000" +.SH NAME +libxml \- library used to parse XML files +.SH DESCRIPTION +The +.I libxml +library is used to parse XML files. +Its internal document repesentation is as close as possible to the +.I DOM +(Document Object Model) interface, +an API for accessing XML or HTML structured documents. +.LP +The +.I libxml +library also has a +.IR SAX -like +interface, +which is designed to be compatible with +.IR expat (1). +NOTE: +.IR SAX , +the Simple API for XML, +is a standard interface for event-based XML parsing, +developed collaboratively by the members of the XML-DEV mailing list, +currently hosted by OASIS. +The +.I expat +library is a XML 1.0 parser written in C, +which aims to be fully conforming. +It is currently not a validating XML processor. +.LP +The +.I libxml +library now includes a nearly complete +.I XPath +implementation. +The +.I XPath +(XML Path Language) is a language for addressing parts of an +XML document, +designed to be used by both +.I XSLT +and +.IR XPointer . +.LP +The +.I libxml +library exports Push and Pull type parser interfaces for both XML and +.IR html . +.SH FILES +.TP 2.2i +.B /depot/lib/libxml_2.0.0/libxml.a +static library +.TP +.B /depot/lib/libxml_2.0.0/libxml.so +shareable library +.TP +.B /depot/package/libxml_2.0.0/bin/xmllint +binary application for parsing XML files +.SH AUTHORS +Daniel Veillard (Daniel.Veillard@w3.org). +If you download and install this package please send the author email. +Manual page by Ziying Sherwin (sherwin@nlm.nih.gov), +Lister Hill National Center for Biomedical Communications, +U.S. National Library of Medicine. +.\" end of manual page diff --git a/libxml.m4 b/libxml.m4 new file mode 100644 index 00000000..1401af53 --- /dev/null +++ b/libxml.m4 @@ -0,0 +1,148 @@ +dnl Code shamelessly stolen from glib-config by Sebastian Rittau +dnl AM_PATH_XML([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]]) +AC_DEFUN(AM_PATH_XML,[ +AC_ARG_WITH(xml-prefix, + [ --with-xml-prefix=PFX Prefix where libxml is installed (optional)], + xml_config_prefix="$withval", xml_config_prefix="") +AC_ARG_ENABLE(xmltest, + [ --disable-xmltest Do not try to compile and run a test XML program],, + enable_xmltest=yes) + + if test x$xml_config_prefix != x ; then + xml_config_args="$xml_config_args --prefix=$xml_config_prefix" + if test x${XML_CONFIG+set} != xset ; then + XML_CONFIG=$xml_config_prefix/bin/xml-config + fi + fi + + AC_PATH_PROG(XML_CONFIG, xml-config, no) + min_xml_version=ifelse([$1], ,2.0.0, [$1]) + AC_MSG_CHECKING(for libxml - version >= $min_xml_version) + no_xml="" + if test "$XML_CONFIG" = "no" ; then + no_xml=yes + else + XML_CFLAGS=`$XML_CONFIG $xml_config_args --cflags` + XML_LIBS=`$XML_CONFIG $xml_config_args --libs` + xml_config_major_version=`$XML_CONFIG $xml_config_args --version | \ + sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\1/'` + xml_config_minor_version=`$XML_CONFIG $xml_config_args --version | \ + sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\2/'` + xml_config_micro_version=`$XML_CONFIG $xml_config_args --version | \ + sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\3/'` + if test "x$enable_xmltest" = "xyes" ; then + ac_save_CFLAGS="$CFLAGS" + ac_save_LIBS="$LIBS" + CFLAGS="$CFLAGS $XML_CFLAGS" + LIBS="$XML_LIBS $LIBS" +dnl +dnl Now check if the installed libxml is sufficiently new. +dnl + rm -f conf.xmltest + AC_TRY_RUN([ +#include +#include +#include +#include + +int +main() +{ + int xml_major_version, xml_minor_version, xml_micro_version; + int major, minor, micro; + char *tmp_version; + + system("touch conf.xmltest"); + + tmp_version = xmlStrdup("$min_xml_version"); + if(sscanf(tmp_version, "%d.%d.%d", &major, &minor, µ) != 3) { + printf("%s, bad version string\n", "$min_xml_version"); + exit(1); + } + + tmp_version = xmlStrdup(LIBXML_DOTTED_VERSION); + if(sscanf(tmp_version, "%d.%d.%d", &xml_major_version, &xml_minor_version, &xml_micro_version) != 3) { + printf("%s, bad version string\n", "$min_xml_version"); + exit(1); + } + + if((xml_major_version != $xml_config_major_version) || + (xml_minor_version != $xml_config_minor_version) || + (xml_micro_version != $xml_config_micro_version)) + { + printf("\n*** 'xml-config --version' returned %d.%d.%d, but libxml (%d.%d.%d)\n", + $xml_config_major_version, $xml_config_minor_version, $xml_config_micro_version, + xml_major_version, xml_minor_version, xml_micro_version); + printf("*** was found! If xml-config was correct, then it is best\n"); + printf("*** to remove the old version of libxml. You may also be able to fix the error\n"); + printf("*** by modifying your LD_LIBRARY_PATH enviroment variable, or by editing\n"); + printf("*** /etc/ld.so.conf. Make sure you have run ldconfig if that is\n"); + printf("*** required on your system.\n"); + printf("*** If xml-config was wrong, set the environment variable XML_CONFIG\n"); + printf("*** to point to the correct copy of xml-config, and remove the file config.cache\n"); + printf("*** before re-running configure\n"); + } + else + { + if ((xml_major_version > major) || + ((xml_major_version == major) && (xml_minor_version > minor)) || + ((xml_major_version == major) && (xml_minor_version == minor) && + (xml_micro_version >= micro))) + { + return 0; + } + else + { + printf("\n*** An old version of libxml (%d.%d.%d) was found.\n", + xml_major_version, xml_minor_version, xml_micro_version); + printf("*** You need a version of libxml newer than %d.%d.%d. The latest version of\n", + major, minor, micro); + printf("*** libxml is always available from ftp://ftp.gnome.org.\n"); + printf("***\n"); + printf("*** If you have already installed a sufficiently new version, this error\n"); + printf("*** probably means that the wrong copy of the xml-config shell script is\n"); + printf("*** being found. The easiest way to fix this is to remove the old version\n"); + printf("*** of libxml, but you can also set the XML_CONFIG environment to point to the\n"); + printf("*** correct copy of xml-config. (In this case, you will have to\n"); + printf("*** modify your LD_LIBRARY_PATH enviroment variable, or edit /etc/ld.so.conf\n"); + printf("*** so that the correct libraries are found at run-time))\n"); + } + } + return 1; +} +],, no_xml=yes,[echo $ac_n "cross compiling; assumed OK... $ac_c"]) + + CFLAGS="$ac_save_CFLAGS" + LIBS="$ac_save_LIBS" + fi + fi + + if test "x$no_xml" = x ; then + AC_MSG_RESULT(yes) + ifelse([$2], , :, [$2]) + else + AC_MSG_RESULT(no) + if test "$XML_CONFIG" = "no" ; then + echo "*** The xml-config script installed by libxml could not be found" + echo "*** If libxml was installed in PREFIX, make sure PREFIX/bin is in" + echo "*** your path, or set the XML_CONFIG environment variable to the" + echo "*** full path to xml-config." + else + if test -f conf.xmltest ; then + : + else + echo "*** Could not run libxml test program, checking why..." + CFLAGS="$CFLAGS $XML_CFLAGS" + LIBS="$LIBS $XML_LIBS" + dnl FIXME: AC_TRY_LINK + fi + fi + + XML_CFLAGS="" + XML_LIBS="" + ifelse([$3], , :, [$3]) + fi + AC_SUBST(XML_CFLAGS) + AC_SUBST(XML_LIBS) + rm -f conf.xmltest +]) diff --git a/list.c b/list.c new file mode 100644 index 00000000..bbe6144e --- /dev/null +++ b/list.c @@ -0,0 +1,706 @@ +/* + * list.c: lists handling implementation + * + * Copyright (C) 2000 Gary Pennington and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: Gary.Pennington@uk.sun.com + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include +#include +#include + +/* + * Type definition are kept internal + */ + +struct _xmlLink +{ + struct _xmlLink *next; + struct _xmlLink *prev; + void *data; +}; + +struct _xmlList +{ + xmlLinkPtr sentinel; + void (*linkDeallocator)(xmlLinkPtr ); + int (*linkCompare)(const void *, const void*); +}; + +/************************************************************************ + * * + * Interfaces * + * * + ************************************************************************/ + +/** + * xmlLinkDeallocator: + * @l: a list + * @lk: a link + * + * Unlink and deallocate @lk from list @l + */ +static void +xmlLinkDeallocator(xmlListPtr l, xmlLinkPtr lk) +{ + (lk->prev)->next = lk->next; + (lk->next)->prev = lk->prev; + if(l->linkDeallocator) + l->linkDeallocator(lk); + xmlFree(lk); +} + +/** + * xmlLinkCompare: + * @data0: first data + * @data1: second data + * + * Compares two arbitrary data + * + * Returns -1, 0 or 1 depending on whether data1 is greater equal or smaller + * than data0 + */ +static int +xmlLinkCompare(const void *data0, const void *data1) +{ + if (data0 < data1) + return (-1); + else if (data0 == data1) + return (0); + return (1); +} + +/** + * xmlListLowerSearch: + * @l: a list + * @data: a data + * + * Search data in the ordered list walking from the beginning + * + * Returns the link containing the data or NULL + */ +static xmlLinkPtr +xmlListLowerSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + + for(lk = l->sentinel->next;lk != l->sentinel && l->linkCompare(lk->data, data) <0 ;lk = lk->next); + return lk; +} + +/** + * xmlListHigherSearch: + * @l: a list + * @data: a data + * + * Search data in the ordered list walking backward from the end + * + * Returns the link containing the data or NULL + */ +static xmlLinkPtr +xmlListHigherSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + + for(lk = l->sentinel->prev;lk != l->sentinel && l->linkCompare(lk->data, data) >0 ;lk = lk->prev); + return lk; +} + +/** + * xmlListSearch: + * @l: a list + * @data: a data + * + * Search data in the list + * + * Returns the link containing the data or NULL + */ +static xmlLinkPtr +xmlListLinkSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + lk = xmlListLowerSearch(l, data); + if (lk == l->sentinel) + return NULL; + else { + if (l->linkCompare(lk->data, data) ==0) + return lk; + return NULL; + } +} + +/** + * xmlListLinkReverseSearch: + * @l: a list + * @data: a data + * + * Search data in the list processing backward + * + * Returns the link containing the data or NULL + */ +xmlLinkPtr +xmlListLinkReverseSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + lk = xmlListHigherSearch(l, data); + if (lk == l->sentinel) + return NULL; + else { + if (l->linkCompare(lk->data, data) ==0) + return lk; + return NULL; + } +} + +/** + * xmlListCreate: + * @deallocator: an optional deallocator function + * @compare: an optional comparison function + * + * Create a new list + * + * Returns the new list or NULL in case of error + */ +xmlListPtr +xmlListCreate(xmlListDeallocator deallocator, xmlListDataCompare compare) +{ + xmlListPtr l; + if (NULL == (l = (xmlListPtr )xmlMalloc( sizeof(xmlList)))) { + perror("Cannot initialize memory for list"); + return (NULL); + } + /* Initialize the list to NULL */ + memset(l, 0, sizeof(xmlList)); + + /* Add the sentinel */ + if (NULL ==(l->sentinel = (xmlLinkPtr )xmlMalloc(sizeof(xmlLink)))) { + perror("Cannot initialize memory for sentinel"); + xmlFree(l); + return (NULL); + } + l->sentinel->next = l->sentinel; + l->sentinel->prev = l->sentinel; + l->sentinel->data = NULL; + + /* If there is a link deallocator, use it */ + if (deallocator != NULL) + l->linkDeallocator = deallocator; + /* If there is a link comparator, use it */ + if (compare != NULL) + l->linkCompare = compare; + else /* Use our own */ + l->linkCompare = xmlLinkCompare; + return l; +} + +/** + * xmlListSearch: + * @l: a list + * @data: a search value + * + * Search the list for an existing value of @data + * + * Returns the value associated to @data or NULL in case of error + */ +void * +xmlListSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + lk = xmlListLinkSearch(l, data); + if (lk) + return (lk->data); + return NULL; +} + +/** + * xmlListLinkReverseSearch: + * @l: a list + * @data: a search value + * + * Search the list in reverse order for an existing value of @data + * + * Returns the value associated to @data or NULL in case of error + */ +void * +xmlListReverseSearch(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + lk = xmlListLinkReverseSearch(l, data); + if (lk) + return (lk->data); + return NULL; +} + +/** + * xmlListInsert: + * @l: a list + * @data: the data + * + * Insert data in the ordered list at the beginning for this value + * + * Returns 0 in case of success, 1 in case of failure + */ +int +xmlListInsert(xmlListPtr l, void *data) +{ + xmlLinkPtr lkPlace, lkNew; + + lkPlace = xmlListLowerSearch(l, data); + /* Add the new link */ + lkNew = (xmlLinkPtr) xmlMalloc(sizeof(xmlLink)); + if (lkNew == NULL) { + perror("Cannot initialize memory for new link"); + return (1); + } + lkNew->data = data; + lkPlace = lkPlace->prev; + lkNew->next = lkPlace->next; + (lkPlace->next)->prev = lkNew; + lkPlace->next = lkNew; + lkNew->prev = lkPlace; + return 0; +} + +/** + * xmlListAppend: + * @l: a list + * @data: the data + * + * Insert data in the ordered list at the end for this value + * + * Returns 0 in case of success, 1 in case of failure + */ +int xmlListAppend(xmlListPtr l, void *data) +{ + xmlLinkPtr lkPlace, lkNew; + + lkPlace = xmlListHigherSearch(l, data); + /* Add the new link */ + lkNew = (xmlLinkPtr) xmlMalloc(sizeof(xmlLink)); + if (lkNew == NULL) { + perror("Cannot initialize memory for new link"); + return (0); + } + lkNew->data = data; + lkNew->next = lkPlace->next; + (lkPlace->next)->prev = lkNew; + lkPlace->next = lkNew; + lkNew->prev = lkPlace; + return 1; +} + +/** + * xmlListDelete: + * @l: a list + * + * Deletes the list and its associated data + */ +void xmlListDelete(xmlListPtr l) +{ + xmlListClear(l); + xmlFree(l->sentinel); + xmlFree(l); +} + +/** + * xmlListRemoveFirst: + * @l: a list + * @data: list data + * + * Remove the first instance associated to data in the list + * + * Returns 1 if a deallocation occured, or 0 if not found + */ +int +xmlListRemoveFirst(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + + /*Find the first instance of this data */ + lk = xmlListLinkSearch(l, data); + if (lk != NULL) { + xmlLinkDeallocator(l, lk); + return 1; + } + return 0; +} + +/** + * xmlListRemoveLast: + * @l: a list + * @data: list data + * + * Remove the last instance associated to data in the list + * + * Returns 1 if a deallocation occured, or 0 if not found + */ +int +xmlListRemoveLast(xmlListPtr l, void *data) +{ + xmlLinkPtr lk; + + /*Find the last instance of this data */ + lk = xmlListLinkReverseSearch(l, data); + if (lk != NULL) { + xmlLinkDeallocator(l, lk); + return 1; + } + return 0; +} + +/** + * xmlListRemoveAll: + * @l: a list + * @data: list data + * + * Remove the all instance associated to data in the list + * + * Returns the number of deallocation, or 0 if not found + */ +int +xmlListRemoveAll(xmlListPtr l, void *data) +{ + int count=0; + + + while(xmlListRemoveFirst(l, data)) + count++; + return count; +} + +/** + * xmlListClear: + * @l: a list + * + * Remove the all data in the list + */ +void +xmlListClear(xmlListPtr l) +{ + xmlLinkPtr lk = l->sentinel->next; + + while(lk != l->sentinel) { + xmlLinkPtr next = lk->next; + + xmlLinkDeallocator(l, lk); + lk = next; + } +} + +/** + * xmlListEmpty: + * @l: a list + * + * Returns 1 if the list is empty, 0 otherwise + */ +int +xmlListEmpty(xmlListPtr l) +{ + return (l->sentinel->next == l->sentinel); +} + +/** + * xmlListFront: + * @l: a list + * + * Returns the first element in the list, or NULL + */ +xmlLinkPtr +xmlListFront(xmlListPtr l) +{ + return (l->sentinel->next); +} + +/** + * xmlListFront: + * @l: a list + * + * Returns the last element in the list, or NULL + */ +xmlLinkPtr +xmlListEnd(xmlListPtr l) +{ + return (l->sentinel->prev); +} + +/** + * xmlListSize: + * @l: a list + * + * Returns the number of elements in the list + */ +int +xmlListSize(xmlListPtr l) +{ + xmlLinkPtr lk; + int count=0; + + /* TODO: keep a counter in xmlList instead */ + for(lk = l->sentinel->next; lk != l->sentinel; lk = lk->next, count++); + return count; +} + +/** + * xmlListPopFront: + * @l: a list + * + * Removes the first element in the list + */ +void +xmlListPopFront(xmlListPtr l) +{ + if(!xmlListEmpty(l)) + xmlLinkDeallocator(l, l->sentinel->next); +} + +/** + * xmlListPopBack: + * @l: a list + * + * Removes the last element in the list + */ +void +xmlListPopBack(xmlListPtr l) +{ + if(!xmlListEmpty(l)) + xmlLinkDeallocator(l, l->sentinel->prev); +} + +/** + * xmlListPushFront: + * @l: a list + * @data: new data + * + * add the new data at the beginning of the list + * + * Returns 1 if successful, 0 otherwise + */ +int +xmlListPushFront(xmlListPtr l, void *data) +{ + xmlLinkPtr lkPlace, lkNew; + + lkPlace = l->sentinel; + /* Add the new link */ + lkNew = (xmlLinkPtr) xmlMalloc(sizeof(xmlLink)); + if (lkNew == NULL) { + perror("Cannot initialize memory for new link"); + return (0); + } + lkNew->data = data; + lkNew->next = lkPlace->next; + (lkPlace->next)->prev = lkNew; + lkPlace->next = lkNew; + lkNew->prev = lkPlace; + return 1; +} + +/** + * xmlListPushBack: + * @l: a list + * @data: new data + * + * add the new data at the end of the list + * + * Returns 1 if successful, 0 otherwise + */ +int +xmlListPushBack(xmlListPtr l, void *data) +{ + xmlLinkPtr lkPlace, lkNew; + + lkPlace = l->sentinel->prev; + /* Add the new link */ + if (NULL ==(lkNew = (xmlLinkPtr )xmlMalloc(sizeof(xmlLink)))) { + perror("Cannot initialize memory for new link"); + return (0); + } + lkNew->data = data; + lkNew->next = lkPlace->next; + (lkPlace->next)->prev = lkNew; + lkPlace->next = lkNew; + lkNew->prev = lkPlace; + return 1; +} + +/** + * xmlLinkGetData: + * @lk: a link + * + * See Returns. + * + * Returns a pointer to the data referenced from this link + */ +void * +xmlLinkGetData(xmlLinkPtr lk) +{ + return lk->data; +} + +/** + * xmlListReverse: + * @l: a list + * + * Reverse the order of the elements in the list + */ +void +xmlListReverse(xmlListPtr l) { + xmlLinkPtr lk; + xmlLinkPtr lkPrev = l->sentinel; + + for(lk = l->sentinel->next; lk != l->sentinel; lk = lk->next) { + lkPrev->next = lkPrev->prev; + lkPrev->prev = lk; + lkPrev = lk; + } + /* Fix up the last node */ + lkPrev->next = lkPrev->prev; + lkPrev->prev = lk; +} + +/** + * xmlListSort: + * @l: a list + * + * Sort all the elements in the list + */ +void +xmlListSort(xmlListPtr l) +{ + xmlListPtr lTemp; + + if(xmlListEmpty(l)) + return; + + /* I think that the real answer is to implement quicksort, the + * alternative is to implement some list copying procedure which + * would be based on a list copy followed by a clear followed by + * an insert. This is slow... + */ + + if (NULL ==(lTemp = xmlListDup(l))) + return; + xmlListClear(l); + xmlListMerge(l, lTemp); + xmlListDelete(lTemp); + return; +} + +/** + * xmlListWalk: + * @l: a list + * @walker: a processing function + * + * Walk all the element of the first from first to last and + * apply the walker function to it + */ +void +xmlListWalk(xmlListPtr l, xmlListWalker walker, const void *user) { + xmlLinkPtr lk; + + for(lk = l->sentinel->next; lk != l->sentinel; lk = lk->next) { + if((walker(lk->data, user)) == 0) + break; + } +} + +/** + * xmlListReverseWalk: + * @l: a list + * @walker: a processing function + * + * Walk all the element of the list in reverse order and + * apply the walker function to it + */ +void +xmlListReverseWalk(xmlListPtr l, xmlListWalker walker, const void *user) { + xmlLinkPtr lk; + + for(lk = l->sentinel->prev; lk != l->sentinel; lk = lk->prev) { + if((walker(lk->data, user)) == 0) + break; + } +} + +/** + * xmlListMerge: + * @l1: the original list + * @l2: the new list + * + * include all the elements of the second list in the first one and + * clear the second list + */ +void +xmlListMerge(xmlListPtr l1, xmlListPtr l2) +{ + xmlListCopy(l1, l2); + xmlListClear(l2); +} + +/** + * xmlListDup: + * @old: the list + * + * Duplicate the list + * + * Returns a new copy of the list or NULL in case of error + */ +xmlListPtr +xmlListDup(const xmlListPtr old) +{ + xmlListPtr cur; + /* Hmmm, how to best deal with allocation issues when copying + * lists. If there is a de-allocator, should responsibility lie with + * the new list or the old list. Surely not both. I'll arbitrarily + * set it to be the old list for the time being whilst I work out + * the answer + */ + if (NULL ==(cur = xmlListCreate(NULL, old->linkCompare))) + return (NULL); + if (0 != xmlListCopy(cur, old)) + return NULL; + return cur; +} + +/** + * xmlListCopy: + * @cur: the new list + * @old: the old list + * + * Move all the element from the old list in the new list + * + * Returns 0 in case of success 1 in case of error + */ +int +xmlListCopy(xmlListPtr cur, const xmlListPtr old) +{ + /* Walk the old tree and insert the data into the new one */ + xmlLinkPtr lk; + + for(lk = old->sentinel->next; lk != old->sentinel; lk = lk->next) { + if (0 !=xmlListInsert(cur, lk->data)) { + xmlListDelete(cur); + return (1); + } + } + return (0); +} +/* xmlListUnique() */ +/* xmlListSwap */ diff --git a/list.h b/list.h new file mode 100644 index 00000000..a708ef20 --- /dev/null +++ b/list.h @@ -0,0 +1,81 @@ +/* + * list.h: lists interfaces + * + * Copyright (C) 2000 Gary Pennington and Daniel Veillard. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND + * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. + * + * Author: Gary.Pennington@uk.sun.com + */ + +typedef struct _xmlLink xmlLink; +typedef xmlLink *xmlLinkPtr; + +typedef struct _xmlList xmlList; +typedef xmlList *xmlListPtr; + +typedef void (*xmlListDeallocator) (xmlLinkPtr lk); +typedef int (*xmlListDataCompare) (const void *data0, const void *data1); +typedef int (*xmlListWalker) (const void *data, const void *user); + +/* Creation/Deletion */ +xmlListPtr xmlListCreate (xmlListDeallocator deallocator, + xmlListDataCompare compare); +void xmlListDelete (xmlListPtr l); + +/* Basic Operators */ +void * xmlListSearch (xmlListPtr l, + void *data); +void * xmlListReverseSearch (xmlListPtr l, + void *data); +int xmlListInsert (xmlListPtr l, + void *data) ; +int xmlListAppend (xmlListPtr l, + void *data) ; +int xmlListRemoveFirst (xmlListPtr l, + void *data); +int xmlListRemoveLast (xmlListPtr l, + void *data); +int xmlListRemoveAll (xmlListPtr l, + void *data); +void xmlListClear (xmlListPtr l); +int xmlListEmpty (xmlListPtr l); +xmlLinkPtr xmlListFront (xmlListPtr l); +xmlLinkPtr xmlListEnd (xmlListPtr l); +int xmlListSize (xmlListPtr l); + +void xmlListPopFront (xmlListPtr l); +void xmlListPopBack (xmlListPtr l); +int xmlListPushFront (xmlListPtr l, + void *data); +int xmlListPushBack (xmlListPtr l, + void *data); + +/* Advanced Operators */ +void xmlListReverse (xmlListPtr l); +void xmlListSort (xmlListPtr l); +void xmlListWalk (xmlListPtr l, + xmlListWalker walker, + const void *user); +void xmlListReverseWalk (xmlListPtr l, + xmlListWalker walker, + const void *user); +void xmlListMerge (xmlListPtr l1, + xmlListPtr l2); +xmlListPtr xmlListDup (const xmlListPtr old); +int xmlListCopy (xmlListPtr cur, + const xmlListPtr old); +/* Link operators */ +void * xmlLinkGetData (xmlLinkPtr lk); + +/* xmlListUnique() */ +/* xmlListSwap */ + + diff --git a/nanoftp.c b/nanoftp.c new file mode 100644 index 00000000..c7ea79af --- /dev/null +++ b/nanoftp.c @@ -0,0 +1,1964 @@ +/* + * nanoftp.c: basic FTP client support + * + * Reference: RFC 959 + */ + +#ifdef TESTING +#define STANDALONE +#define HAVE_STDLIB_H +#define HAVE_UNISTD_H +#define HAVE_SYS_SOCKET_H +#define HAVE_NETINET_IN_H +#define HAVE_NETDB_H +#define HAVE_SYS_TIME_H +#else /* STANDALONE */ +#ifdef WIN32 +#define INCLUDE_WINSOCK +#include "win32config.h" +#else +#include "config.h" +#endif +#endif /* STANDALONE */ + +#include + +#ifdef LIBXML_FTP_ENABLED +#include +#include + +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_NETINET_IN_H +#include +#endif +#ifdef HAVE_ARPA_INET_H +#include +#endif +#ifdef HAVE_NETDB_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_ERRNO_H +#include +#endif +#ifdef HAVE_SYS_TIME_H +#include +#endif +#ifdef HAVE_SYS_SELECT_H +#include +#endif +#ifdef HAVE_STRINGS_H +#include +#endif + +#include +#include +#include + +/* #define DEBUG_FTP 1 */ +#ifdef STANDALONE +#ifndef DEBUG_FTP +#define DEBUG_FTP 1 +#endif +#endif + +/** + * A couple portability macros + */ +#ifndef _WINSOCKAPI_ +#define closesocket(s) close(s) +#define SOCKET int +#endif + +static char hostname[100]; + +#define FTP_COMMAND_OK 200 +#define FTP_SYNTAX_ERROR 500 +#define FTP_GET_PASSWD 331 +#define FTP_BUF_SIZE 512 + +typedef struct xmlNanoFTPCtxt { + char *protocol; /* the protocol name */ + char *hostname; /* the host name */ + int port; /* the port */ + char *path; /* the path within the URL */ + char *user; /* user string */ + char *passwd; /* passwd string */ + struct sockaddr_in ftpAddr; /* the socket address struct */ + int passive; /* currently we support only passive !!! */ + SOCKET controlFd; /* the file descriptor for the control socket */ + SOCKET dataFd; /* the file descriptor for the data socket */ + int state; /* WRITE / READ / CLOSED */ + int returnValue; /* the protocol return value */ + /* buffer for data received from the control connection */ + char controlBuf[FTP_BUF_SIZE + 1]; + int controlBufIndex; + int controlBufUsed; + int controlBufAnswer; +} xmlNanoFTPCtxt, *xmlNanoFTPCtxtPtr; + +static int initialized = 0; +static char *proxy = NULL; /* the proxy name if any */ +static int proxyPort = 0; /* the proxy port if any */ +static char *proxyUser = NULL; /* user for proxy authentication */ +static char *proxyPasswd = NULL;/* passwd for proxy authentication */ +static int proxyType = 0; /* uses TYPE or a@b ? */ + +/** + * xmlNanoFTPInit: + * + * Initialize the FTP protocol layer. + * Currently it just checks for proxy informations, + * and get the hostname + */ + +void +xmlNanoFTPInit(void) { + const char *env; +#ifdef _WINSOCKAPI_ + WSADATA wsaData; +#endif + + if (initialized) + return; + +#ifdef _WINSOCKAPI_ + if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0) + return; +#endif + + gethostname(hostname, sizeof(hostname)); + + proxyPort = 21; + env = getenv("no_proxy"); + if (env != NULL) + return; + env = getenv("ftp_proxy"); + if (env != NULL) { + xmlNanoFTPScanProxy(env); + } else { + env = getenv("FTP_PROXY"); + if (env != NULL) { + xmlNanoFTPScanProxy(env); + } + } + env = getenv("ftp_proxy_user"); + if (env != NULL) { + proxyUser = xmlMemStrdup(env); + } + env = getenv("ftp_proxy_password"); + if (env != NULL) { + proxyPasswd = xmlMemStrdup(env); + } + initialized = 1; +} + +/** + * xmlNanoFTPClenup: + * + * Cleanup the FTP protocol layer. This cleanup proxy informations. + */ + +void +xmlNanoFTPCleanup(void) { + if (proxy != NULL) { + xmlFree(proxy); + proxy = NULL; + } + if (proxyUser != NULL) { + xmlFree(proxyUser); + proxyUser = NULL; + } + if (proxyPasswd != NULL) { + xmlFree(proxyPasswd); + proxyPasswd = NULL; + } + hostname[0] = 0; +#ifdef _WINSOCKAPI_ + if (initialized) + WSACleanup(); +#endif + initialized = 0; + return; +} + +/** + * xmlNanoFTPProxy: + * @host: the proxy host name + * @port: the proxy port + * @user: the proxy user name + * @passwd: the proxy password + * @type: the type of proxy 1 for using SITE, 2 for USER a@b + * + * Setup the FTP proxy informations. + * This can also be done by using ftp_proxy ftp_proxy_user and + * ftp_proxy_password environment variables. + */ + +void +xmlNanoFTPProxy(const char *host, int port, const char *user, + const char *passwd, int type) { + if (proxy != NULL) + xmlFree(proxy); + if (proxyUser != NULL) + xmlFree(proxyUser); + if (proxyPasswd != NULL) + xmlFree(proxyPasswd); + if (host) + proxy = xmlMemStrdup(host); + if (user) + proxyUser = xmlMemStrdup(user); + if (passwd) + proxyPasswd = xmlMemStrdup(passwd); + proxyPort = port; + proxyType = type; +} + +/** + * xmlNanoFTPScanURL: + * @ctx: an FTP context + * @URL: The URL used to initialize the context + * + * (Re)Initialize an FTP context by parsing the URL and finding + * the protocol host port and path it indicates. + */ + +static void +xmlNanoFTPScanURL(void *ctx, const char *URL) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + const char *cur = URL; + char buf[4096]; + int index = 0; + int port = 0; + + if (ctxt->protocol != NULL) { + xmlFree(ctxt->protocol); + ctxt->protocol = NULL; + } + if (ctxt->hostname != NULL) { + xmlFree(ctxt->hostname); + ctxt->hostname = NULL; + } + if (ctxt->path != NULL) { + xmlFree(ctxt->path); + ctxt->path = NULL; + } + if (URL == NULL) return; + buf[index] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[index] = 0; + ctxt->protocol = xmlMemStrdup(buf); + index = 0; + cur += 3; + break; + } + buf[index++] = *cur++; + } + if (*cur == 0) return; + + buf[index] = 0; + while (1) { + if (cur[0] == ':') { + buf[index] = 0; + ctxt->hostname = xmlMemStrdup(buf); + index = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != 0) ctxt->port = port; + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[index] = 0; + ctxt->hostname = xmlMemStrdup(buf); + index = 0; + break; + } + buf[index++] = *cur++; + } + if (*cur == 0) + ctxt->path = xmlMemStrdup("/"); + else { + index = 0; + buf[index] = 0; + while (*cur != 0) + buf[index++] = *cur++; + buf[index] = 0; + ctxt->path = xmlMemStrdup(buf); + } +} + +/** + * xmlNanoFTPUpdateURL: + * @ctx: an FTP context + * @URL: The URL used to update the context + * + * Update an FTP context by parsing the URL and finding + * new path it indicates. If there is an error in the + * protocol, hostname, port or other information, the + * error is raised. It indicates a new connection has to + * be established. + * + * Returns 0 if Ok, -1 in case of error (other host). + */ + +int +xmlNanoFTPUpdateURL(void *ctx, const char *URL) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + const char *cur = URL; + char buf[4096]; + int index = 0; + int port = 0; + + if (URL == NULL) + return(-1); + if (ctxt == NULL) + return(-1); + if (ctxt->protocol == NULL) + return(-1); + if (ctxt->hostname == NULL) + return(-1); + buf[index] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[index] = 0; + if (strcmp(ctxt->protocol, buf)) + return(-1); + index = 0; + cur += 3; + break; + } + buf[index++] = *cur++; + } + if (*cur == 0) + return(-1); + + buf[index] = 0; + while (1) { + if (cur[0] == ':') { + buf[index] = 0; + if (strcmp(ctxt->hostname, buf)) + return(-1); + index = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != ctxt->port) + return(-1); + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[index] = 0; + if (strcmp(ctxt->hostname, buf)) + return(-1); + index = 0; + break; + } + buf[index++] = *cur++; + } + if (ctxt->path != NULL) { + xmlFree(ctxt->path); + ctxt->path = NULL; + } + + if (*cur == 0) + ctxt->path = xmlMemStrdup("/"); + else { + index = 0; + buf[index] = 0; + while (*cur != 0) + buf[index++] = *cur++; + buf[index] = 0; + ctxt->path = xmlMemStrdup(buf); + } + return(0); +} + +/** + * xmlNanoFTPScanProxy: + * @URL: The proxy URL used to initialize the proxy context + * + * (Re)Initialize the FTP Proxy context by parsing the URL and finding + * the protocol host port it indicates. + * Should be like ftp://myproxy/ or ftp://myproxy:3128/ + * A NULL URL cleans up proxy informations. + */ + +void +xmlNanoFTPScanProxy(const char *URL) { + const char *cur = URL; + char buf[4096]; + int index = 0; + int port = 0; + + if (proxy != NULL) { + xmlFree(proxy); + proxy = NULL; + } + if (proxyPort != 0) { + proxyPort = 0; + } +#ifdef DEBUG_FTP + if (URL == NULL) + xmlGenericError(xmlGenericErrorContext, "Removing FTP proxy info\n"); + else + xmlGenericError(xmlGenericErrorContext, "Using FTP proxy %s\n", URL); +#endif + if (URL == NULL) return; + buf[index] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[index] = 0; + index = 0; + cur += 3; + break; + } + buf[index++] = *cur++; + } + if (*cur == 0) return; + + buf[index] = 0; + while (1) { + if (cur[0] == ':') { + buf[index] = 0; + proxy = xmlMemStrdup(buf); + index = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != 0) proxyPort = port; + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[index] = 0; + proxy = xmlMemStrdup(buf); + index = 0; + break; + } + buf[index++] = *cur++; + } +} + +/** + * xmlNanoFTPNewCtxt: + * @URL: The URL used to initialize the context + * + * Allocate and initialize a new FTP context. + * + * Returns an FTP context or NULL in case of error. + */ + +void* +xmlNanoFTPNewCtxt(const char *URL) { + xmlNanoFTPCtxtPtr ret; + + ret = (xmlNanoFTPCtxtPtr) xmlMalloc(sizeof(xmlNanoFTPCtxt)); + if (ret == NULL) return(NULL); + + memset(ret, 0, sizeof(xmlNanoFTPCtxt)); + ret->port = 21; + ret->passive = 1; + ret->returnValue = 0; + ret->controlBufIndex = 0; + ret->controlBufUsed = 0; + + if (URL != NULL) + xmlNanoFTPScanURL(ret, URL); + + return(ret); +} + +/** + * xmlNanoFTPFreeCtxt: + * @ctx: an FTP context + * + * Frees the context after closing the connection. + */ + +void +xmlNanoFTPFreeCtxt(void * ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + if (ctxt == NULL) return; + if (ctxt->hostname != NULL) xmlFree(ctxt->hostname); + if (ctxt->protocol != NULL) xmlFree(ctxt->protocol); + if (ctxt->path != NULL) xmlFree(ctxt->path); + ctxt->passive = 1; + if (ctxt->controlFd >= 0) closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + ctxt->controlBufIndex = -1; + ctxt->controlBufUsed = -1; + xmlFree(ctxt); +} + +/** + * xmlNanoFTPParseResponse: + * @ctx: the FTP connection context + * @buf: the buffer containing the response + * @len: the buffer length + * + * Parsing of the server answer, we just extract the code. + * + * returns 0 for errors + * +XXX for last line of response + * -XXX for response to be continued + */ +static int +xmlNanoFTPParseResponse(void *ctx, char *buf, int len) { + int val = 0; + + if (len < 3) return(-1); + if ((*buf >= '0') && (*buf <= '9')) + val = val * 10 + (*buf - '0'); + else + return(0); + buf++; + if ((*buf >= '0') && (*buf <= '9')) + val = val * 10 + (*buf - '0'); + else + return(0); + buf++; + if ((*buf >= '0') && (*buf <= '9')) + val = val * 10 + (*buf - '0'); + else + return(0); + buf++; + if (*buf == '-') + return(-val); + return(val); +} + +/** + * xmlNanoFTPGetMore: + * @ctx: an FTP context + * + * Read more information from the FTP control connection + * Returns the number of bytes read, < 0 indicates an error + */ +static int +xmlNanoFTPGetMore(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + int len; + int size; + + if ((ctxt->controlBufIndex < 0) || (ctxt->controlBufIndex > FTP_BUF_SIZE)) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : controlBufIndex = %d\n", + ctxt->controlBufIndex); +#endif + return(-1); + } + + if ((ctxt->controlBufUsed < 0) || (ctxt->controlBufUsed > FTP_BUF_SIZE)) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : controlBufUsed = %d\n", + ctxt->controlBufUsed); +#endif + return(-1); + } + if (ctxt->controlBufIndex > ctxt->controlBufUsed) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : controlBufIndex > controlBufUsed %d > %d\n", + ctxt->controlBufIndex, ctxt->controlBufUsed); +#endif + return(-1); + } + + /* + * First pack the control buffer + */ + if (ctxt->controlBufIndex > 0) { + memmove(&ctxt->controlBuf[0], &ctxt->controlBuf[ctxt->controlBufIndex], + ctxt->controlBufUsed - ctxt->controlBufIndex); + ctxt->controlBufUsed -= ctxt->controlBufIndex; + ctxt->controlBufIndex = 0; + } + size = FTP_BUF_SIZE - ctxt->controlBufUsed; + if (size == 0) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : buffer full %d \n", ctxt->controlBufUsed); +#endif + return(0); + } + + /* + * Read the amount left on teh control connection + */ + if ((len = recv(ctxt->controlFd, &ctxt->controlBuf[ctxt->controlBufIndex], + size, 0)) < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetMore : read %d [%d - %d]\n", len, + ctxt->controlBufUsed, ctxt->controlBufUsed + len); +#endif + ctxt->controlBufUsed += len; + ctxt->controlBuf[ctxt->controlBufUsed] = 0; + + return(len); +} + +/** + * xmlNanoFTPReadResponse: + * @ctx: an FTP context + * + * Read the response from the FTP server after a command. + * Returns the code number + */ +static int +xmlNanoFTPReadResponse(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char *ptr, *end; + int len; + int res = -1, cur = -1; + +get_more: + /* + * Assumes everything up to controlBuf[controlBufIndex] has been read + * and analyzed. + */ + len = xmlNanoFTPGetMore(ctx); + if (len < 0) { + return(-1); + } + if ((ctxt->controlBufUsed == 0) && (len == 0)) { + return(-1); + } + ptr = &ctxt->controlBuf[ctxt->controlBufIndex]; + end = &ctxt->controlBuf[ctxt->controlBufUsed]; + +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "\n<<<\n%s\n--\n", ptr); +#endif + while (ptr < end) { + cur = xmlNanoFTPParseResponse(ctxt, ptr, end - ptr); + if (cur > 0) { + /* + * Successfully scanned the control code, scratch + * till the end of the line, but keep the index to be + * able to analyze the result if needed. + */ + res = cur; + ptr += 3; + ctxt->controlBufAnswer = ptr - ctxt->controlBuf; + while ((ptr < end) && (*ptr != '\n')) ptr++; + if (*ptr == '\n') ptr++; + if (*ptr == '\r') ptr++; + break; + } + while ((ptr < end) && (*ptr != '\n')) ptr++; + if (ptr >= end) { + ctxt->controlBufIndex = ctxt->controlBufUsed; + goto get_more; + } + if (*ptr != '\r') ptr++; + } + + if (res < 0) goto get_more; + ctxt->controlBufIndex = ptr - ctxt->controlBuf; +#ifdef DEBUG_FTP + ptr = &ctxt->controlBuf[ctxt->controlBufIndex]; + xmlGenericError(xmlGenericErrorContext, "\n---\n%s\n--\n", ptr); +#endif + +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "Got %d\n", res); +#endif + return(res / 100); +} + +/** + * xmlNanoFTPGetResponse: + * @ctx: an FTP context + * + * Get the response from the FTP server after a command. + * Returns the code number + */ + +int +xmlNanoFTPGetResponse(void *ctx) { + int res; + + res = xmlNanoFTPReadResponse(ctx); + + return(res); +} + +/** + * xmlNanoFTPCheckResponse: + * @ctx: an FTP context + * + * Check if there is a response from the FTP server after a command. + * Returns the code number, or 0 + */ + +int +xmlNanoFTPCheckResponse(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + fd_set rfd; + struct timeval tv; + + tv.tv_sec = 0; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->controlFd, &rfd); + switch(select(ctxt->controlFd + 1, &rfd, NULL, NULL, &tv)) { + case 0: + return(0); + case -1: +#ifdef DEBUG_FTP + perror("select"); +#endif + return(-1); + + } + + return(xmlNanoFTPReadResponse(ctx)); +} + +/** + * Send the user authentification + */ + +static int +xmlNanoFTPSendUser(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[200]; + int len; + int res; + + if (ctxt->user == NULL) + sprintf(buf, "USER anonymous\r\n"); + else +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "USER %s\r\n", ctxt->user); +#else + sprintf(buf, "USER %s\r\n", ctxt->user); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) return(res); + return(0); +} + +/** + * Send the password authentification + */ + +static int +xmlNanoFTPSendPasswd(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[200]; + int len; + int res; + + if (ctxt->passwd == NULL) +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "PASS libxml@%s\r\n", hostname); +#else + sprintf(buf, "PASS libxml@%s\r\n", hostname); +#endif + else +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "PASS %s\r\n", ctxt->passwd); +#else + sprintf(buf, "PASS %s\r\n", ctxt->passwd); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) return(res); + return(0); +} + +/** + * xmlNanoFTPQuit: + * @ctx: an FTP context + * + * Send a QUIT command to the server + * + * Returns -1 in case of error, 0 otherwise + */ + + +int +xmlNanoFTPQuit(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[200]; + int len; + int res; + + sprintf(buf, "QUIT\r\n"); + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + return(0); +} + +/** + * xmlNanoFTPConnect: + * @ctx: an FTP context + * + * Tries to open a control connection + * + * Returns -1 in case of error, 0 otherwise + */ + +int +xmlNanoFTPConnect(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + struct hostent *hp; + int port; + int res; + + if (ctxt == NULL) + return(-1); + if (ctxt->hostname == NULL) + return(-1); + + /* + * do the blocking DNS query. + */ + if (proxy) + hp = gethostbyname(proxy); + else + hp = gethostbyname(ctxt->hostname); + if (hp == NULL) + return(-1); + + /* + * Prepare the socket + */ + memset(&ctxt->ftpAddr, 0, sizeof(ctxt->ftpAddr)); + ctxt->ftpAddr.sin_family = AF_INET; + memcpy(&ctxt->ftpAddr.sin_addr, hp->h_addr_list[0], hp->h_length); + if (proxy) { + port = proxyPort; + } else { + port = ctxt->port; + } + if (port == 0) + port = 21; + ctxt->ftpAddr.sin_port = htons(port); + ctxt->controlFd = socket(AF_INET, SOCK_STREAM, 0); + if (ctxt->controlFd < 0) + return(-1); + + /* + * Do the connect. + */ + if (connect(ctxt->controlFd, (struct sockaddr *) &ctxt->ftpAddr, + sizeof(struct sockaddr_in)) < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + + /* + * Wait for the HELLO from the server. + */ + res = xmlNanoFTPGetResponse(ctxt); + if (res != 2) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + + /* + * State diagram for the login operation on the FTP server + * + * Reference: RFC 959 + * + * 1 + * +---+ USER +---+------------->+---+ + * | B |---------->| W | 2 ---->| E | + * +---+ +---+------ | -->+---+ + * | | | | | + * 3 | | 4,5 | | | + * -------------- ----- | | | + * | | | | | + * | | | | | + * | --------- | + * | 1| | | | + * V | | | | + * +---+ PASS +---+ 2 | ------>+---+ + * | |---------->| W |------------->| S | + * +---+ +---+ ---------->+---+ + * | | | | | + * 3 | |4,5| | | + * -------------- -------- | + * | | | | | + * | | | | | + * | ----------- + * | 1,3| | | | + * V | 2| | | + * +---+ ACCT +---+-- | ----->+---+ + * | |---------->| W | 4,5 -------->| F | + * +---+ +---+------------->+---+ + * + * Of course in case of using a proxy this get really nasty and is not + * standardized at all :-( + */ + if (proxy) { + int len; + char buf[400]; + + if (proxyUser != NULL) { + /* + * We need proxy auth + */ +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "USER %s\r\n", proxyUser); +#else + sprintf(buf, "USER %s\r\n", proxyUser); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + switch (res) { + case 2: + if (proxyPasswd == NULL) + break; + case 3: + if (proxyPasswd != NULL) +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "PASS %s\r\n", proxyPasswd); +#else + sprintf(buf, "PASS %s\r\n", proxyPasswd); +#endif + else +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "PASS libxml@%s\r\n", + hostname); +#else + sprintf(buf, "PASS libxml@%s\r\n", hostname); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if (res > 3) { + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + return(-1); + } + break; + case 1: + break; + case 4: + case 5: + case -1: + default: + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + return(-1); + } + } + + /* + * We assume we don't need more authentication to the proxy + * and that it succeeded :-\ + */ + switch (proxyType) { + case 0: + /* we will try in seqence */ + case 1: + /* Using SITE command */ +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "SITE %s\r\n", ctxt->hostname); +#else + sprintf(buf, "SITE %s\r\n", ctxt->hostname); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if (res == 2) { + /* we assume it worked :-\ 1 is error for SITE command */ + proxyType = 1; + break; + } + if (proxyType == 1) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + case 2: + /* USER user@host command */ + if (ctxt->user == NULL) +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "USER anonymous@%s\r\n", + ctxt->hostname); +#else + sprintf(buf, "USER anonymous@%s\r\n", ctxt->hostname); +#endif + else +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "USER %s@%s\r\n", + ctxt->user, ctxt->hostname); +#else + sprintf(buf, "USER %s@%s\r\n", + ctxt->user, ctxt->hostname); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if ((res == 1) || (res == 2)) { + /* we assume it worked :-\ */ + proxyType = 2; + return(0); + } + if (ctxt->passwd == NULL) +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "PASS libxml@%s\r\n", hostname); +#else + sprintf(buf, "PASS libxml@%s\r\n", hostname); +#endif + else +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "PASS %s\r\n", ctxt->passwd); +#else + sprintf(buf, "PASS %s\r\n", ctxt->passwd); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if ((res == 1) || (res == 2)) { + /* we assume it worked :-\ */ + proxyType = 2; + return(0); + } + if (proxyType == 2) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + case 3: + /* + * If you need support for other Proxy authentication scheme + * send the code or at least the sequence in use. + */ + default: + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + } + /* + * Non-proxy handling. + */ + res = xmlNanoFTPSendUser(ctxt); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + res = xmlNanoFTPGetResponse(ctxt); + switch (res) { + case 2: + return(0); + case 3: + break; + case 1: + case 4: + case 5: + case -1: + default: + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + res = xmlNanoFTPSendPasswd(ctxt); + if (res < 0) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + res = xmlNanoFTPGetResponse(ctxt); + switch (res) { + case 2: + break; + case 3: + xmlGenericError(xmlGenericErrorContext, + "FTP server asking for ACCNT on anonymous\n"); + case 1: + case 4: + case 5: + case -1: + default: + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + ctxt->controlFd = -1; + return(-1); + } + + return(0); +} + +/** + * xmlNanoFTPConnectTo: + * @server: an FTP server name + * @port: the port (use 21 if 0) + * + * Tries to open a control connection to the given server/port + * + * Returns an fTP context or NULL if it failed + */ + +void* +xmlNanoFTPConnectTo(const char *server, int port) { + xmlNanoFTPCtxtPtr ctxt; + int res; + + xmlNanoFTPInit(); + if (server == NULL) + return(NULL); + ctxt = (xmlNanoFTPCtxtPtr) xmlNanoFTPNewCtxt(NULL); + ctxt->hostname = xmlMemStrdup(server); + if (port != 0) + ctxt->port = port; + res = xmlNanoFTPConnect(ctxt); + if (res < 0) { + xmlNanoFTPFreeCtxt(ctxt); + return(NULL); + } + return(ctxt); +} + +/** + * xmlNanoFTPCwd: + * @ctx: an FTP context + * @directory: a directory on the server + * + * Tries to change the remote directory + * + * Returns -1 incase of error, 1 if CWD worked, 0 if it failed + */ + +int +xmlNanoFTPCwd(void *ctx, char *directory) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[400]; + int len; + int res; + + /* + * Expected response code for CWD: + * + * CWD + * 250 + * 500, 501, 502, 421, 530, 550 + */ +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "CWD %s\r\n", directory); +#else + sprintf(buf, "CWD %s\r\n", directory); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) return(res); + res = xmlNanoFTPGetResponse(ctxt); + if (res == 4) { + return(-1); + } + if (res == 2) return(1); + if (res == 5) { + return(0); + } + return(0); +} + +/** + * xmlNanoFTPGetConnection: + * @ctx: an FTP context + * + * Try to open a data connection to the server. Currently only + * passive mode is supported. + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPGetConnection(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[200], *cur; + int len, i; + int res; + unsigned char ad[6], *adp, *portp; + unsigned int temp[6]; + struct sockaddr_in dataAddr; + SOCKLEN_T dataAddrLen; + + ctxt->dataFd = socket (AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (ctxt->dataFd < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPGetConnection: failed to create socket\n"); + return(-1); + } + dataAddrLen = sizeof(dataAddr); + memset(&dataAddr, 0, dataAddrLen); + dataAddr.sin_family = AF_INET; + + if (ctxt->passive) { + sprintf(buf, "PASV\r\n"); + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPReadResponse(ctx); + if (res != 2) { + if (res == 5) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } else { + /* + * retry with an active connection + */ + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + ctxt->passive = 0; + } + } + cur = &ctxt->controlBuf[ctxt->controlBufAnswer]; + while (((*cur < '0') || (*cur > '9')) && *cur != '\0') cur++; + if (sscanf(cur, "%u,%u,%u,%u,%u,%u", &temp[0], &temp[1], &temp[2], + &temp[3], &temp[4], &temp[5]) != 6) { + xmlGenericError(xmlGenericErrorContext, + "Invalid answer to PASV\n"); + if (ctxt->dataFd != -1) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + } + return(-1); + } + for (i=0; i<6; i++) ad[i] = (unsigned char) (temp[i] & 0xff); + memcpy(&dataAddr.sin_addr, &ad[0], 4); + memcpy(&dataAddr.sin_port, &ad[4], 2); + if (connect(ctxt->dataFd, (struct sockaddr *) &dataAddr, dataAddrLen) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to create a data connection\n"); + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return (-1); + } + } else { + getsockname(ctxt->dataFd, (struct sockaddr *) &dataAddr, &dataAddrLen); + dataAddr.sin_port = 0; + if (bind(ctxt->dataFd, (struct sockaddr *) &dataAddr, dataAddrLen) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Failed to bind a port\n"); + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return (-1); + } + getsockname(ctxt->dataFd, (struct sockaddr *) &dataAddr, &dataAddrLen); + + if (listen(ctxt->dataFd, 1) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Could not listen on port %d\n", + ntohs(dataAddr.sin_port)); + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return (-1); + } + adp = (unsigned char *) &dataAddr.sin_addr; + portp = (unsigned char *) &dataAddr.sin_port; +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "PORT %d,%d,%d,%d,%d,%d\r\n", + adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff, + portp[0] & 0xff, portp[1] & 0xff); +#else + sprintf(buf, "PORT %d,%d,%d,%d,%d,%d\r\n", + adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff, + portp[0] & 0xff, portp[1] & 0xff); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPGetResponse(ctxt); + if (res != 2) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } + } + return(ctxt->dataFd); + +} + +/** + * xmlNanoFTPCloseConnection: + * @ctx: an FTP context + * + * Close the data connection from the server + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPCloseConnection(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + int res; + fd_set rfd, efd; + struct timeval tv; + + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + tv.tv_sec = 15; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->controlFd, &rfd); + FD_ZERO(&efd); + FD_SET(ctxt->controlFd, &efd); + res = select(ctxt->controlFd + 1, &rfd, NULL, &efd, &tv); + if (res < 0) { +#ifdef DEBUG_FTP + perror("select"); +#endif + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + return(-1); + } + if (res == 0) { +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, + "xmlNanoFTPCloseConnection: timeout\n"); +#endif + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + } else { + res = xmlNanoFTPGetResponse(ctxt); + if (res != 2) { + closesocket(ctxt->controlFd); ctxt->controlFd = -1; + return(-1); + } + } + return(0); +} + +/** + * xmlNanoFTPParseList: + * @list: some data listing received from the server + * @callback: the user callback + * @userData: the user callback data + * + * Parse at most one entry from the listing. + * + * Returns -1 incase of error, the lenght of data parsed otherwise + */ + +static int +xmlNanoFTPParseList(const char *list, ftpListCallback callback, void *userData) { + const char *cur = list; + char filename[151]; + char attrib[11]; + char owner[11]; + char group[11]; + char month[4]; + int year = 0; + int minute = 0; + int hour = 0; + int day = 0; + unsigned long size = 0; + int links = 0; + int i; + + if (!strncmp(cur, "total", 5)) { + cur += 5; + while (*cur == ' ') cur++; + while ((*cur >= '0') && (*cur <= '9')) + links = (links * 10) + (*cur++ - '0'); + while ((*cur == ' ') || (*cur == '\n') || (*cur == '\r')) + cur++; + return(cur - list); + } else if (*list == '+') { + return(0); + } else { + while ((*cur == ' ') || (*cur == '\n') || (*cur == '\r')) + cur++; + if (*cur == 0) return(0); + i = 0; + while (*cur != ' ') { + if (i < 10) + attrib[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + attrib[10] = 0; + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + while ((*cur >= '0') && (*cur <= '9')) + links = (links * 10) + (*cur++ - '0'); + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + i = 0; + while (*cur != ' ') { + if (i < 10) + owner[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + owner[i] = 0; + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + i = 0; + while (*cur != ' ') { + if (i < 10) + group[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + group[i] = 0; + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + while ((*cur >= '0') && (*cur <= '9')) + size = (size * 10) + (*cur++ - '0'); + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + i = 0; + while (*cur != ' ') { + if (i < 3) + month[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + month[i] = 0; + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + while ((*cur >= '0') && (*cur <= '9')) + day = (day * 10) + (*cur++ - '0'); + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + if ((cur[1] == 0) || (cur[2] == 0)) return(0); + if ((cur[1] == ':') || (cur[2] == ':')) { + while ((*cur >= '0') && (*cur <= '9')) + hour = (hour * 10) + (*cur++ - '0'); + if (*cur == ':') cur++; + while ((*cur >= '0') && (*cur <= '9')) + minute = (minute * 10) + (*cur++ - '0'); + } else { + while ((*cur >= '0') && (*cur <= '9')) + year = (year * 10) + (*cur++ - '0'); + } + while (*cur == ' ') cur++; + if (*cur == 0) return(0); + i = 0; + while ((*cur != '\n') && (*cur != '\r')) { + if (i < 150) + filename[i++] = *cur; + cur++; + if (*cur == 0) return(0); + } + filename[i] = 0; + if ((*cur != '\n') && (*cur != '\r')) + return(0); + while ((*cur == '\n') || (*cur == '\r')) + cur++; + } + if (callback != NULL) { + callback(userData, filename, attrib, owner, group, size, links, + year, month, day, hour, minute); + } + return(cur - list); +} + +/** + * xmlNanoFTPList: + * @ctx: an FTP context + * @callback: the user callback + * @userData: the user callback data + * @filename: optional files to list + * + * Do a listing on the server. All files info are passed back + * in the callbacks. + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPList(void *ctx, ftpListCallback callback, void *userData, + char *filename) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[4096 + 1]; + int len, res; + int index = 0, base; + fd_set rfd, efd; + struct timeval tv; + + if (filename == NULL) { + if (xmlNanoFTPCwd(ctxt, ctxt->path) < 1) + return(-1); + ctxt->dataFd = xmlNanoFTPGetConnection(ctxt); + if (ctxt->dataFd == -1) + return(-1); + sprintf(buf, "LIST -L\r\n"); + } else { + if (filename[0] != '/') { + if (xmlNanoFTPCwd(ctxt, ctxt->path) < 1) + return(-1); + } + ctxt->dataFd = xmlNanoFTPGetConnection(ctxt); + if (ctxt->dataFd == -1) + return(-1); +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "LIST -L %s\r\n", filename); +#else + sprintf(buf, "LIST -L %s\r\n", filename); +#endif + } + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPReadResponse(ctxt); + if (res != 1) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-res); + } + + do { + tv.tv_sec = 1; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->dataFd, &rfd); + FD_ZERO(&efd); + FD_SET(ctxt->dataFd, &efd); + res = select(ctxt->dataFd + 1, &rfd, NULL, &efd, &tv); + if (res < 0) { +#ifdef DEBUG_FTP + perror("select"); +#endif + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } + if (res == 0) { + res = xmlNanoFTPCheckResponse(ctxt); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + ctxt->dataFd = -1; + return(-1); + } + if (res == 2) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(0); + } + + continue; + } + + if ((len = recv(ctxt->dataFd, &buf[index], sizeof(buf) - (index + 1), 0)) < 0) { +#ifdef DEBUG_FTP + perror("recv"); +#endif + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + ctxt->dataFd = -1; + return(-1); + } +#ifdef DEBUG_FTP + write(1, &buf[index], len); +#endif + index += len; + buf[index] = 0; + base = 0; + do { + res = xmlNanoFTPParseList(&buf[base], callback, userData); + base += res; + } while (res > 0); + + memmove(&buf[0], &buf[base], index - base); + index -= base; + } while (len != 0); + xmlNanoFTPCloseConnection(ctxt); + return(0); +} + +/** + * xmlNanoFTPGetSocket: + * @ctx: an FTP context + * @filename: the file to retrieve (or NULL if path is in context). + * + * Initiate fetch of the given file from the server. + * + * Returns the socket for the data connection, or <0 in case of error + */ + + +int +xmlNanoFTPGetSocket(void *ctx, const char *filename) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[300]; + int res, len; + if ((filename == NULL) && (ctxt->path == NULL)) + return(-1); + ctxt->dataFd = xmlNanoFTPGetConnection(ctxt); + if (ctxt->dataFd == -1) + return(-1); + + sprintf(buf, "TYPE I\r\n"); + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPReadResponse(ctxt); + if (res != 2) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-res); + } + if (filename == NULL) +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "RETR %s\r\n", ctxt->path); +#else + sprintf(buf, "RETR %s\r\n", ctxt->path); +#endif + else +#ifdef HAVE_SNPRINTF + snprintf(buf, sizeof(buf), "RETR %s\r\n", filename); +#else + sprintf(buf, "RETR %s\r\n", filename); +#endif + buf[sizeof(buf) - 1] = 0; + len = strlen(buf); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, buf); +#endif + res = send(ctxt->controlFd, buf, len, 0); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(res); + } + res = xmlNanoFTPReadResponse(ctxt); + if (res != 1) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-res); + } + return(ctxt->dataFd); +} + +/** + * xmlNanoFTPGet: + * @ctx: an FTP context + * @callback: the user callback + * @userData: the user callback data + * @filename: the file to retrieve + * + * Fetch the given file from the server. All data are passed back + * in the callbacks. The last callback has a size of 0 block. + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPGet(void *ctx, ftpDataCallback callback, void *userData, + const char *filename) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + char buf[4096]; + int len = 0, res; + fd_set rfd; + struct timeval tv; + + if ((filename == NULL) && (ctxt->path == NULL)) + return(-1); + if (callback == NULL) + return(-1); + if (xmlNanoFTPGetSocket(ctxt, filename) < 0) + return(-1); + + do { + tv.tv_sec = 1; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->dataFd, &rfd); + res = select(ctxt->dataFd + 1, &rfd, NULL, NULL, &tv); + if (res < 0) { +#ifdef DEBUG_FTP + perror("select"); +#endif + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } + if (res == 0) { + res = xmlNanoFTPCheckResponse(ctxt); + if (res < 0) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + ctxt->dataFd = -1; + return(-1); + } + if (res == 2) { + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(0); + } + + continue; + } + if ((len = recv(ctxt->dataFd, buf, sizeof(buf), 0)) < 0) { + callback(userData, buf, len); + closesocket(ctxt->dataFd); ctxt->dataFd = -1; + return(-1); + } + callback(userData, buf, len); + } while (len != 0); + + return(xmlNanoFTPCloseConnection(ctxt)); +} + +/** + * xmlNanoFTPRead: + * @ctx: the FTP context + * @dest: a buffer + * @len: the buffer length + * + * This function tries to read @len bytes from the existing FTP connection + * and saves them in @dest. This is a blocking call. + * + * Returns the number of byte read. 0 is an indication of an end of connection. + * -1 indicates a parameter error. + */ +int +xmlNanoFTPRead(void *ctx, void *dest, int len) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + + if (ctx == NULL) return(-1); + if (ctxt->dataFd < 0) return(0); + if (dest == NULL) return(-1); + if (len <= 0) return(0); + + len = recv(ctxt->dataFd, dest, len, 0); +#ifdef DEBUG_FTP + xmlGenericError(xmlGenericErrorContext, "Recvd %d bytes\n", len); +#endif + if (len <= 0) { + xmlNanoFTPCloseConnection(ctxt); + } + return(len); +} + +/** + * xmlNanoFTPOpen: + * @URL: the URL to the resource + * + * Start to fetch the given ftp:// resource + * + * Returns an FTP context, or NULL + */ + +void* +xmlNanoFTPOpen(const char *URL) { + xmlNanoFTPCtxtPtr ctxt; + int sock; + + xmlNanoFTPInit(); + if (URL == NULL) return(NULL); + if (strncmp("ftp://", URL, 6)) return(NULL); + + ctxt = (xmlNanoFTPCtxtPtr) xmlNanoFTPNewCtxt(URL); + if (ctxt == NULL) return(NULL); + if (xmlNanoFTPConnect(ctxt) < 0) { + xmlNanoFTPFreeCtxt(ctxt); + return(NULL); + } + sock = xmlNanoFTPGetSocket(ctxt, ctxt->path); + if (sock < 0) { + xmlNanoFTPFreeCtxt(ctxt); + return(NULL); + } + return(ctxt); +} + +/** + * xmlNanoFTPClose: + * @ctx: an FTP context + * + * Close the connection and both control and transport + * + * Returns -1 incase of error, 0 otherwise + */ + +int +xmlNanoFTPClose(void *ctx) { + xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx; + + if (ctxt == NULL) + return(-1); + + if (ctxt->dataFd >= 0) { + closesocket(ctxt->dataFd); + ctxt->dataFd = -1; + } + if (ctxt->controlFd >= 0) { + xmlNanoFTPQuit(ctxt); + closesocket(ctxt->controlFd); + ctxt->controlFd = -1; + } + xmlNanoFTPFreeCtxt(ctxt); + return(0); +} + +#ifdef STANDALONE +/************************************************************************ + * * + * Basic test in Standalone mode * + * * + ************************************************************************/ +void ftpList(void *userData, const char *filename, const char* attrib, + const char *owner, const char *group, unsigned long size, int links, + int year, const char *month, int day, int hour, int minute) { + xmlGenericError(xmlGenericErrorContext, + "%s %s %s %ld %s\n", attrib, owner, group, size, filename); +} +void ftpData(void *userData, const char *data, int len) { + if (userData == NULL) return; + if (len <= 0) { + fclose(userData); + return; + } + fwrite(data, len, 1, userData); +} + +int main(int argc, char **argv) { + void *ctxt; + FILE *output; + char *tstfile = NULL; + + xmlNanoFTPInit(); + if (argc > 1) { + ctxt = xmlNanoFTPNewCtxt(argv[1]); + if (xmlNanoFTPConnect(ctxt) < 0) { + xmlGenericError(xmlGenericErrorContext, + "Couldn't connect to %s\n", argv[1]); + exit(1); + } + if (argc > 2) + tstfile = argv[2]; + } else + ctxt = xmlNanoFTPConnectTo("localhost", 0); + if (ctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Couldn't connect to localhost\n"); + exit(1); + } + xmlNanoFTPList(ctxt, ftpList, NULL, tstfile); + output = fopen("/tmp/tstdata", "w"); + if (output != NULL) { + if (xmlNanoFTPGet(ctxt, ftpData, (void *) output, tstfile) < 0) + xmlGenericError(xmlGenericErrorContext, + "Failed to get file\n"); + + } + xmlNanoFTPClose(ctxt); + xmlMemoryDump(); + exit(0); +} +#endif /* STANDALONE */ +#else /* !LIBXML_FTP_ENABLED */ +#ifdef STANDALONE +#include +int main(int argc, char **argv) { + xmlGenericError(xmlGenericErrorContext, + "%s : FTP support not compiled in\n", argv[0]); + return(0); +} +#endif /* STANDALONE */ +#endif /* LIBXML_FTP_ENABLED */ diff --git a/nanoftp.h b/nanoftp.h new file mode 100644 index 00000000..53465280 --- /dev/null +++ b/nanoftp.h @@ -0,0 +1,110 @@ +/* + * nanohttp.c: minimalist FTP implementation to fetch external subsets. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __NANO_FTP_H__ +#define __NANO_FTP_H__ + +#include +#ifdef LIBXML_FTP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ftpListCallback: + * @userData: user provided data for the callback + * @filename: the file name (including "->" when links are shown) + * @attrib: the attribute string + * @owner: the owner string + * @group: the group string + * @size: the file size + * @links: the link count + * @year: the year + * @month: the month + * @day: the day + * @hour: the hour + * @minute: the minute + * + * A callback for the xmlNanoFTPList command + * Note that only one of year and day:minute are specified + */ +typedef void (*ftpListCallback) (void *userData, + const char *filename, const char* attrib, + const char *owner, const char *group, + unsigned long size, int links, int year, + const char *month, int day, int hour, + int minute); +/** + * ftpDataCallback: + * A callback for the xmlNanoFTPGet command + */ +typedef void (*ftpDataCallback) (void *userData, const char *data, int len); + +/* + * Init + */ +void xmlNanoFTPInit (void); +void xmlNanoFTPCleanup (void); + +/* + * Creating/freeing contexts + */ +void * xmlNanoFTPNewCtxt (const char *URL); +void xmlNanoFTPFreeCtxt (void * ctx); +void * xmlNanoFTPConnectTo (const char *server, + int port); +/* + * Opening/closing session connections + */ +void * xmlNanoFTPOpen (const char *URL); +int xmlNanoFTPConnect (void *ctx); +int xmlNanoFTPClose (void *ctx); +int xmlNanoFTPQuit (void *ctx); +void xmlNanoFTPScanProxy (const char *URL); +void xmlNanoFTPProxy (const char *host, + int port, + const char *user, + const char *passwd, + int type); +int xmlNanoFTPUpdateURL (void *ctx, + const char *URL); + +/* + * Rathern internal commands + */ +int xmlNanoFTPGetResponse (void *ctx); +int xmlNanoFTPCheckResponse (void *ctx); + +/* + * CD/DIR/GET handlers + */ +int xmlNanoFTPCwd (void *ctx, + char *directory); + +int xmlNanoFTPGetConnection (void *ctx); +int xmlNanoFTPCloseConnection(void *ctx); +int xmlNanoFTPList (void *ctx, + ftpListCallback callback, + void *userData, + char *filename); +int xmlNanoFTPGetSocket (void *ctx, + const char *filename); +int xmlNanoFTPGet (void *ctx, + ftpDataCallback callback, + void *userData, + const char *filename); +int xmlNanoFTPRead (void *ctx, + void *dest, + int len); + +#ifdef __cplusplus +} +#endif /* LIBXML_FTP_ENABLED */ +#endif +#endif /* __NANO_FTP_H__ */ diff --git a/nanohttp.c b/nanohttp.c new file mode 100644 index 00000000..cf30c530 --- /dev/null +++ b/nanohttp.c @@ -0,0 +1,1202 @@ +/* + * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets. + * focuses on size, streamability, reentrancy and portability + * + * This is clearly not a general purpose HTTP implementation + * If you look for one, check: + * http://www.w3.org/Library/ + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +/* TODO add compression support, Send the Accept- , and decompress on the + fly with ZLIB if found at compile-time */ + +#ifdef WIN32 +#define INCLUDE_WINSOCK +#include "win32config.h" +#else +#include "config.h" +#endif + +#include + +#ifdef LIBXML_HTTP_ENABLED +#include +#include + +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_NETINET_IN_H +#include +#endif +#ifdef HAVE_ARPA_INET_H +#include +#endif +#ifdef HAVE_NETDB_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_ERRNO_H +#include +#endif +#ifdef HAVE_SYS_TIME_H +#include +#endif +#ifdef HAVE_SYS_SELECT_H +#include +#endif +#ifdef HAVE_STRINGS_H +#include +#endif +#ifdef SUPPORT_IP6 +#include +#endif + +#ifdef VMS +#include +#define SOCKLEN_T unsigned int +#define SOCKET int +#endif + +#include +#include /* for xmlStr(n)casecmp() */ +#include + +/** + * A couple portability macros + */ +#ifndef _WINSOCKAPI_ +#define closesocket(s) close(s) +#define SOCKET int +#endif + +#ifdef STANDALONE +#define DEBUG_HTTP +#define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n) +#define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b) +#endif + +#define XML_NANO_HTTP_MAX_REDIR 10 + +#define XML_NANO_HTTP_CHUNK 4096 + +#define XML_NANO_HTTP_CLOSED 0 +#define XML_NANO_HTTP_WRITE 1 +#define XML_NANO_HTTP_READ 2 +#define XML_NANO_HTTP_NONE 4 + +typedef struct xmlNanoHTTPCtxt { + char *protocol; /* the protocol name */ + char *hostname; /* the host name */ + int port; /* the port */ + char *path; /* the path within the URL */ + SOCKET fd; /* the file descriptor for the socket */ + int state; /* WRITE / READ / CLOSED */ + char *out; /* buffer sent (zero terminated) */ + char *outptr; /* index within the buffer sent */ + char *in; /* the receiving buffer */ + char *content; /* the start of the content */ + char *inptr; /* the next byte to read from network */ + char *inrptr; /* the next byte to give back to the client */ + int inlen; /* len of the input buffer */ + int last; /* return code for last operation */ + int returnValue; /* the protocol return value */ + char *contentType; /* the MIME type for the input */ + char *location; /* the new URL in case of redirect */ + char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */ +} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr; + +static int initialized = 0; +static char *proxy = NULL; /* the proxy name if any */ +static int proxyPort; /* the proxy port if any */ +static unsigned int timeout = 60;/* the select() timeout in seconds */ + +/** + * A portability function + */ +int socket_errno(void) { +#ifdef _WINSOCKAPI_ + return(WSAGetLastError()); +#else + return(errno); +#endif +} + +/** + * xmlNanoHTTPInit: + * + * Initialize the HTTP protocol layer. + * Currently it just checks for proxy informations + */ + +void +xmlNanoHTTPInit(void) { + const char *env; +#ifdef _WINSOCKAPI_ + WSADATA wsaData; +#endif + + if (initialized) + return; + +#ifdef _WINSOCKAPI_ + if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0) + return; +#endif + + if (proxy == NULL) { + proxyPort = 80; + env = getenv("no_proxy"); + if (env != NULL) + goto done; + env = getenv("http_proxy"); + if (env != NULL) { + xmlNanoHTTPScanProxy(env); + goto done; + } + env = getenv("HTTP_PROXY"); + if (env != NULL) { + xmlNanoHTTPScanProxy(env); + goto done; + } + } +done: + initialized = 1; +} + +/** + * xmlNanoHTTPClenup: + * + * Cleanup the HTTP protocol layer. + */ + +void +xmlNanoHTTPCleanup(void) { + if (proxy != NULL) + xmlFree(proxy); +#ifdef _WINSOCKAPI_ + if (initialized) + WSACleanup(); +#endif + initialized = 0; + return; +} + +/** + * xmlNanoHTTPTimeout: + * @delay: the delay in seconds + * + * Set the HTTP timeout, (default is 60secs). 0 means immediate + * return, while -1 infinite. + */ + +void +xmlNanoHTTPTimeout(int delay) { + timeout = (unsigned int) delay; +} + +/** + * xmlNanoHTTPScanURL: + * @ctxt: an HTTP context + * @URL: The URL used to initialize the context + * + * (Re)Initialize an HTTP context by parsing the URL and finding + * the protocol host port and path it indicates. + */ + +static void +xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) { + const char *cur = URL; + char buf[4096]; + int index = 0; + int port = 0; + + if (ctxt->protocol != NULL) { + xmlFree(ctxt->protocol); + ctxt->protocol = NULL; + } + if (ctxt->hostname != NULL) { + xmlFree(ctxt->hostname); + ctxt->hostname = NULL; + } + if (ctxt->path != NULL) { + xmlFree(ctxt->path); + ctxt->path = NULL; + } + if (URL == NULL) return; + buf[index] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[index] = 0; + ctxt->protocol = xmlMemStrdup(buf); + index = 0; + cur += 3; + break; + } + buf[index++] = *cur++; + } + if (*cur == 0) return; + + buf[index] = 0; + while (1) { + if (cur[0] == ':') { + buf[index] = 0; + ctxt->hostname = xmlMemStrdup(buf); + index = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != 0) ctxt->port = port; + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[index] = 0; + ctxt->hostname = xmlMemStrdup(buf); + index = 0; + break; + } + buf[index++] = *cur++; + } + if (*cur == 0) + ctxt->path = xmlMemStrdup("/"); + else { + index = 0; + buf[index] = 0; + while (*cur != 0) + buf[index++] = *cur++; + buf[index] = 0; + ctxt->path = xmlMemStrdup(buf); + } +} + +/** + * xmlNanoHTTPScanProxy: + * @URL: The proxy URL used to initialize the proxy context + * + * (Re)Initialize the HTTP Proxy context by parsing the URL and finding + * the protocol host port it indicates. + * Should be like http://myproxy/ or http://myproxy:3128/ + * A NULL URL cleans up proxy informations. + */ + +void +xmlNanoHTTPScanProxy(const char *URL) { + const char *cur = URL; + char buf[4096]; + int index = 0; + int port = 0; + + if (proxy != NULL) { + xmlFree(proxy); + proxy = NULL; + } + if (proxyPort != 0) { + proxyPort = 0; + } +#ifdef DEBUG_HTTP + if (URL == NULL) + xmlGenericError(xmlGenericErrorContext, + "Removing HTTP proxy info\n"); + else + xmlGenericError(xmlGenericErrorContext, + "Using HTTP proxy %s\n", URL); +#endif + if (URL == NULL) return; + buf[index] = 0; + while (*cur != 0) { + if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) { + buf[index] = 0; + index = 0; + cur += 3; + break; + } + buf[index++] = *cur++; + } + if (*cur == 0) return; + + buf[index] = 0; + while (1) { + if (cur[0] == ':') { + buf[index] = 0; + proxy = xmlMemStrdup(buf); + index = 0; + cur += 1; + while ((*cur >= '0') && (*cur <= '9')) { + port *= 10; + port += *cur - '0'; + cur++; + } + if (port != 0) proxyPort = port; + while ((cur[0] != '/') && (*cur != 0)) + cur++; + break; + } + if ((*cur == '/') || (*cur == 0)) { + buf[index] = 0; + proxy = xmlMemStrdup(buf); + index = 0; + break; + } + buf[index++] = *cur++; + } +} + +/** + * xmlNanoHTTPNewCtxt: + * @URL: The URL used to initialize the context + * + * Allocate and initialize a new HTTP context. + * + * Returns an HTTP context or NULL in case of error. + */ + +static xmlNanoHTTPCtxtPtr +xmlNanoHTTPNewCtxt(const char *URL) { + xmlNanoHTTPCtxtPtr ret; + + ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt)); + if (ret == NULL) return(NULL); + + memset(ret, 0, sizeof(xmlNanoHTTPCtxt)); + ret->port = 80; + ret->returnValue = 0; + ret->fd = -1; + + xmlNanoHTTPScanURL(ret, URL); + + return(ret); +} + +/** + * xmlNanoHTTPFreeCtxt: + * @ctxt: an HTTP context + * + * Frees the context after closing the connection. + */ + +static void +xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) { + if (ctxt == NULL) return; + if (ctxt->hostname != NULL) xmlFree(ctxt->hostname); + if (ctxt->protocol != NULL) xmlFree(ctxt->protocol); + if (ctxt->path != NULL) xmlFree(ctxt->path); + if (ctxt->out != NULL) xmlFree(ctxt->out); + if (ctxt->in != NULL) xmlFree(ctxt->in); + if (ctxt->contentType != NULL) xmlFree(ctxt->contentType); + if (ctxt->location != NULL) xmlFree(ctxt->location); + if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader); + ctxt->state = XML_NANO_HTTP_NONE; + if (ctxt->fd >= 0) closesocket(ctxt->fd); + ctxt->fd = -1; + xmlFree(ctxt); +} + +/** + * xmlNanoHTTPSend: + * @ctxt: an HTTP context + * + * Send the input needed to initiate the processing on the server side + */ + +static void +xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) { + if (ctxt->state & XML_NANO_HTTP_WRITE) { + int total_sent = 0; + while (total_sent outptr)) { + int nsent = send(ctxt->fd, ctxt->outptr+total_sent, + strlen(ctxt->outptr)-total_sent, 0); + if (nsent>0) + total_sent += nsent; +} + + ctxt->last = total_sent; + } +} + +/** + * xmlNanoHTTPRecv: + * @ctxt: an HTTP context + * + * Read information coming from the HTTP connection. + * This is a blocking call (but it blocks in select(), not read()). + * + * Returns the number of byte read or -1 in case of error. + */ + +static int +xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) { + fd_set rfd; + struct timeval tv; + + + while (ctxt->state & XML_NANO_HTTP_READ) { + if (ctxt->in == NULL) { + ctxt->in = (char *) xmlMalloc(65000 * sizeof(char)); + if (ctxt->in == NULL) { + ctxt->last = -1; + return(-1); + } + ctxt->inlen = 65000; + ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in; + } + if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) { + int delta = ctxt->inrptr - ctxt->in; + int len = ctxt->inptr - ctxt->inrptr; + + memmove(ctxt->in, ctxt->inrptr, len); + ctxt->inrptr -= delta; + ctxt->content -= delta; + ctxt->inptr -= delta; + } + if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) { + int d_inptr = ctxt->inptr - ctxt->in; + int d_content = ctxt->content - ctxt->in; + int d_inrptr = ctxt->inrptr - ctxt->in; + + ctxt->inlen *= 2; + ctxt->in = (char *) xmlRealloc(ctxt->in, ctxt->inlen); + if (ctxt->in == NULL) { + ctxt->last = -1; + return(-1); + } + ctxt->inptr = ctxt->in + d_inptr; + ctxt->content = ctxt->in + d_content; + ctxt->inrptr = ctxt->in + d_inrptr; + } + ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0); + if (ctxt->last > 0) { + ctxt->inptr += ctxt->last; + return(ctxt->last); + } + if (ctxt->last == 0) { + return(0); + } + if (ctxt->last == -1) { + switch (socket_errno()) { + case EINPROGRESS: + case EWOULDBLOCK: +#if defined(EAGAIN) && EAGAIN != EWOULDBLOCK + case EAGAIN: +#endif + break; + default: + return(0); + } + } + + tv.tv_sec = timeout; + tv.tv_usec = 0; + FD_ZERO(&rfd); + FD_SET(ctxt->fd, &rfd); + + if (select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1) + return(0); + } + return(0); +} + +/** + * xmlNanoHTTPReadLine: + * @ctxt: an HTTP context + * + * Read one line in the HTTP server output, usually for extracting + * the HTTP protocol informations from the answer header. + * + * Returns a newly allocated string with a copy of the line, or NULL + * which indicate the end of the input. + */ + +static char * +xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) { + char buf[4096]; + char *bp = buf; + + while (bp - buf < 4095) { + if (ctxt->inrptr == ctxt->inptr) { + if (xmlNanoHTTPRecv(ctxt) == 0) { + if (bp == buf) + return(NULL); + else + *bp = 0; + return(xmlMemStrdup(buf)); + } + } + *bp = *ctxt->inrptr++; + if (*bp == '\n') { + *bp = 0; + return(xmlMemStrdup(buf)); + } + if (*bp != '\r') + bp++; + } + buf[4095] = 0; + return(xmlMemStrdup(buf)); +} + + +/** + * xmlNanoHTTPScanAnswer: + * @ctxt: an HTTP context + * @line: an HTTP header line + * + * Try to extract useful informations from the server answer. + * We currently parse and process: + * - The HTTP revision/ return code + * - The Content-Type + * - The Location for redirrect processing. + * + * Returns -1 in case of failure, the file descriptor number otherwise + */ + +static void +xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) { + const char *cur = line; + + if (line == NULL) return; + + if (!strncmp(line, "HTTP/", 5)) { + int version = 0; + int ret = 0; + + cur += 5; + while ((*cur >= '0') && (*cur <= '9')) { + version *= 10; + version += *cur - '0'; + cur++; + } + if (*cur == '.') { + cur++; + if ((*cur >= '0') && (*cur <= '9')) { + version *= 10; + version += *cur - '0'; + cur++; + } + while ((*cur >= '0') && (*cur <= '9')) + cur++; + } else + version *= 10; + if ((*cur != ' ') && (*cur != '\t')) return; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if ((*cur < '0') || (*cur > '9')) return; + while ((*cur >= '0') && (*cur <= '9')) { + ret *= 10; + ret += *cur - '0'; + cur++; + } + if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return; + ctxt->returnValue = ret; + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) { + cur += 13; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if (ctxt->contentType != NULL) + xmlFree(ctxt->contentType); + ctxt->contentType = xmlMemStrdup(cur); + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) { + cur += 12; + if (ctxt->contentType != NULL) return; + while ((*cur == ' ') || (*cur == '\t')) cur++; + ctxt->contentType = xmlMemStrdup(cur); + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) { + cur += 9; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if (ctxt->location != NULL) + xmlFree(ctxt->location); + ctxt->location = xmlMemStrdup(cur); + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) { + cur += 17; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if (ctxt->authHeader != NULL) + xmlFree(ctxt->authHeader); + ctxt->authHeader = xmlMemStrdup(cur); + } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) { + cur += 19; + while ((*cur == ' ') || (*cur == '\t')) cur++; + if (ctxt->authHeader != NULL) + xmlFree(ctxt->authHeader); + ctxt->authHeader = xmlMemStrdup(cur); + } +} + +/** + * xmlNanoHTTPConnectAttempt: + * @ia: an internet adress structure + * @port: the port number + * + * Attempt a connection to the given IP:port endpoint. It forces + * non-blocking semantic on the socket, and allow 60 seconds for + * the host to answer. + * + * Returns -1 in case of failure, the file descriptor number otherwise + */ + +static int +xmlNanoHTTPConnectAttempt(struct sockaddr *addr, int port) +{ + SOCKET s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + fd_set wfd; + struct timeval tv; + int status; + + if (s==-1) { +#ifdef DEBUG_HTTP + perror("socket"); +#endif + return(-1); + } + +#ifdef _WINSOCKAPI_ + { + u_long one = 1; + + status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0; + } +#else /* _WINSOCKAPI_ */ +#if defined(VMS) + { + int enable = 1; + status = ioctl(s, FIONBIO, &enable); + } +#else /* VMS */ + if ((status = fcntl(s, F_GETFL, 0)) != -1) { +#ifdef O_NONBLOCK + status |= O_NONBLOCK; +#else /* O_NONBLOCK */ +#ifdef F_NDELAY + status |= F_NDELAY; +#endif /* F_NDELAY */ +#endif /* !O_NONBLOCK */ + status = fcntl(s, F_SETFL, status); + } + if (status < 0) { +#ifdef DEBUG_HTTP + perror("nonblocking"); +#endif + closesocket(s); + return(-1); + } +#endif /* !VMS */ +#endif /* !_WINSOCKAPI_ */ + + + if ((connect(s, addr, sizeof(*addr))==-1)) { + switch (socket_errno()) { + case EINPROGRESS: + case EWOULDBLOCK: + break; + default: + perror("connect"); + closesocket(s); + return(-1); + } + } + + tv.tv_sec = timeout; + tv.tv_usec = 0; + + FD_ZERO(&wfd); + FD_SET(s, &wfd); + + switch(select(s+1, NULL, &wfd, NULL, &tv)) + { + case 0: + /* Time out */ + closesocket(s); + return(-1); + case -1: + /* Ermm.. ?? */ +#ifdef DEBUG_HTTP + perror("select"); +#endif + closesocket(s); + return(-1); + } + + if ( FD_ISSET(s, &wfd) ) { + SOCKLEN_T len; + len = sizeof(status); + if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&status, &len) < 0 ) { + /* Solaris error code */ + return (-1); + } + if ( status ) { + closesocket(s); + errno = status; + return (-1); + } + } else { + /* pbm */ + return (-1); + } + + return(s); +} + +/** + * xmlNanoHTTPConnectHost: + * @host: the host name + * @port: the port number + * + * Attempt a connection to the given host:port endpoint. It tries + * the multiple IP provided by the DNS if available. + * + * Returns -1 in case of failure, the file descriptor number otherwise + */ + +static int +xmlNanoHTTPConnectHost(const char *host, int port) +{ + struct hostent *h; + struct sockaddr *addr; + struct in_addr ia; + struct sockaddr_in sin; +#ifdef SUPPORT_IP6 + struct in6_addr ia6; + struct sockaddr_in6 sin6; +#endif + int i; + int s; + +#if defined(SUPPORT_IP6) && defined(RES_USE_INET6) + if (!(_res.options & RES_INIT)) + res_init(); + _res.options |= RES_USE_INET6; +#endif + h=gethostbyname(host); + if (h==NULL) + { +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext,"unable to resolve '%s'.\n", host); +#endif + return(-1); + } + + for(i=0; h->h_addr_list[i]; i++) + { + if (h->h_addrtype == AF_INET) { + /* A records (IPv4) */ + memcpy(&ia, h->h_addr_list[i], h->h_length); + sin.sin_family = h->h_addrtype; + sin.sin_addr = ia; + sin.sin_port = htons(port); + addr = (struct sockaddr *)&sin; +#ifdef SUPPORT_IP6 + } else if (h->h_addrtype == AF_INET6) { + /* AAAA records (IPv6) */ + memcpy(&ia6, h->h_addr_list[i], h->h_length); + sin6.sin_family = h->h_addrtype; + sin6.sin_addr = ia6; + sin6.sin_port = htons(port); + addr = (struct sockaddr *)&sin6; +#endif + } else + break; /* for */ + + s = xmlNanoHTTPConnectAttempt(addr, port); + if (s != -1) + return(s); + } + +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, + "unable to connect to '%s'.\n", host); +#endif + return(-1); +} + + +/** + * xmlNanoHTTPOpen: + * @URL: The URL to load + * @contentType: if available the Content-Type information will be + * returned at that location + * + * This function try to open a connection to the indicated resource + * via HTTP GET. + * + * Returns NULL in case of failure, otherwise a request handler. + * The contentType, if provided must be freed by the caller + */ + +void* +xmlNanoHTTPOpen(const char *URL, char **contentType) { + if (contentType != NULL) *contentType = NULL; + return xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL); +} + +/** + * xmlNanoHTTPRead: + * @ctx: the HTTP context + * @dest: a buffer + * @len: the buffer length + * + * This function tries to read @len bytes from the existing HTTP connection + * and saves them in @dest. This is a blocking call. + * + * Returns the number of byte read. 0 is an indication of an end of connection. + * -1 indicates a parameter error. + */ +int +xmlNanoHTTPRead(void *ctx, void *dest, int len) { + xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; + + if (ctx == NULL) return(-1); + if (dest == NULL) return(-1); + if (len <= 0) return(0); + + while (ctxt->inptr - ctxt->inrptr < len) { + if (xmlNanoHTTPRecv(ctxt) == 0) break; + } + if (ctxt->inptr - ctxt->inrptr < len) + len = ctxt->inptr - ctxt->inrptr; + memcpy(dest, ctxt->inrptr, len); + ctxt->inrptr += len; + return(len); +} + +/** + * xmlNanoHTTPClose: + * @ctx: the HTTP context + * + * This function closes an HTTP context, it ends up the connection and + * free all data related to it. + */ +void +xmlNanoHTTPClose(void *ctx) { + xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; + + if (ctx == NULL) return; + + xmlNanoHTTPFreeCtxt(ctxt); +} + +/** + * xmlNanoHTTPMethod: + * @URL: The URL to load + * @method: the HTTP method to use + * @input: the input string if any + * @contentType: the Content-Type information IN and OUT + * @headers: the extra headers + * + * This function try to open a connection to the indicated resource + * via HTTP using the given @method, adding the given extra headers + * and the input buffer for the request content. + * + * Returns NULL in case of failure, otherwise a request handler. + * The contentType, if provided must be freed by the caller + */ + +void* +xmlNanoHTTPMethod(const char *URL, const char *method, const char *input, + char **contentType, const char *headers) { + xmlNanoHTTPCtxtPtr ctxt; + char *bp, *p; + int blen, ilen, ret; + int head; + int nbRedirects = 0; + char *redirURL = NULL; + + if (URL == NULL) return(NULL); + if (method == NULL) method = "GET"; + xmlNanoHTTPInit(); + +retry: + if (redirURL == NULL) + ctxt = xmlNanoHTTPNewCtxt(URL); + else { + ctxt = xmlNanoHTTPNewCtxt(redirURL); + xmlFree(redirURL); + redirURL = NULL; + } + + if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) { + xmlNanoHTTPFreeCtxt(ctxt); + if (redirURL != NULL) xmlFree(redirURL); + return(NULL); + } + if (ctxt->hostname == NULL) { + xmlNanoHTTPFreeCtxt(ctxt); + return(NULL); + } + if (proxy) { + blen = strlen(ctxt->hostname) * 2 + 16; + ret = xmlNanoHTTPConnectHost(proxy, proxyPort); + } + else { + blen = strlen(ctxt->hostname); + ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port); + } + if (ret < 0) { + xmlNanoHTTPFreeCtxt(ctxt); + return(NULL); + } + ctxt->fd = ret; + + if (input != NULL) { + ilen = strlen(input); + blen += ilen + 32; + } + else + ilen = 0; + if (headers != NULL) + blen += strlen(headers); + if (contentType && *contentType) + blen += strlen(*contentType) + 16; + blen += strlen(method) + strlen(ctxt->path) + 23; + bp = xmlMalloc(blen); + if (proxy) { + if (ctxt->port != 80) { + sprintf(bp, "%s http://%s:%d%s", method, ctxt->hostname, + ctxt->port, ctxt->path); + } + else + sprintf(bp, "%s http://%s%s", method, ctxt->hostname, ctxt->path); + } + else + sprintf(bp, "%s %s", method, ctxt->path); + p = bp + strlen(bp); + sprintf(p, " HTTP/1.0\r\nHost: %s\r\n", ctxt->hostname); + p += strlen(p); + if (contentType != NULL && *contentType) { + sprintf(p, "Content-Type: %s\r\n", *contentType); + p += strlen(p); + } + if (headers != NULL) { + strcpy(p, headers); + p += strlen(p); + } + if (input != NULL) + sprintf(p, "Content-Length: %d\r\n\r\n%s", ilen, input); + else + strcpy(p, "\r\n"); +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, + "-> %s%s", proxy? "(Proxy) " : "", bp); + if ((blen -= strlen(bp)+1) < 0) + xmlGenericError(xmlGenericErrorContext, + "ERROR: overflowed buffer by %d bytes\n", -blen); +#endif + ctxt->outptr = ctxt->out = bp; + ctxt->state = XML_NANO_HTTP_WRITE; + xmlNanoHTTPSend(ctxt); + ctxt->state = XML_NANO_HTTP_READ; + head = 1; + + while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) { + if (head && (*p == 0)) { + head = 0; + ctxt->content = ctxt->inrptr; + xmlFree(p); + break; + } + xmlNanoHTTPScanAnswer(ctxt, p); + +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, "<- %s\n", p); +#endif + xmlFree(p); + } + + if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) && + (ctxt->returnValue < 400)) { +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, + "\nRedirect to: %s\n", ctxt->location); +#endif + while (xmlNanoHTTPRecv(ctxt)) ; + if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) { + nbRedirects++; + redirURL = xmlMemStrdup(ctxt->location); + xmlNanoHTTPFreeCtxt(ctxt); + goto retry; + } + xmlNanoHTTPFreeCtxt(ctxt); +#ifdef DEBUG_HTTP + xmlGenericError(xmlGenericErrorContext, + "Too many redirects, aborting ...\n"); +#endif + return(NULL); + + } + + if (contentType != NULL) { + if (ctxt->contentType != NULL) + *contentType = xmlMemStrdup(ctxt->contentType); + else + *contentType = NULL; + } + +#ifdef DEBUG_HTTP + if (ctxt->contentType != NULL) + xmlGenericError(xmlGenericErrorContext, + "\nCode %d, content-type '%s'\n\n", + ctxt->returnValue, ctxt->contentType); + else + xmlGenericError(xmlGenericErrorContext, + "\nCode %d, no content-type\n\n", + ctxt->returnValue); +#endif + + return((void *) ctxt); +} + +/** + * xmlNanoHTTPFetch: + * @URL: The URL to load + * @filename: the filename where the content should be saved + * @contentType: if available the Content-Type information will be + * returned at that location + * + * This function try to fetch the indicated resource via HTTP GET + * and save it's content in the file. + * + * Returns -1 in case of failure, 0 incase of success. The contentType, + * if provided must be freed by the caller + */ +int +xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) { + void *ctxt; + char buf[4096]; + int fd; + int len; + + ctxt = xmlNanoHTTPOpen(URL, contentType); + if (ctxt == NULL) return(-1); + + if (!strcmp(filename, "-")) + fd = 0; + else { + fd = open(filename, O_CREAT | O_WRONLY, 00644); + if (fd < 0) { + xmlNanoHTTPClose(ctxt); + if ((contentType != NULL) && (*contentType != NULL)) { + xmlFree(*contentType); + *contentType = NULL; + } + return(-1); + } + } + + while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) { + write(fd, buf, len); + } + + xmlNanoHTTPClose(ctxt); + close(fd); + return(0); +} + +/** + * xmlNanoHTTPSave: + * @ctxt: the HTTP context + * @filename: the filename where the content should be saved + * + * This function saves the output of the HTTP transaction to a file + * It closes and free the context at the end + * + * Returns -1 in case of failure, 0 incase of success. + */ +int +xmlNanoHTTPSave(void *ctxt, const char *filename) { + char buf[4096]; + int fd; + int len; + + if (ctxt == NULL) return(-1); + + if (!strcmp(filename, "-")) + fd = 0; + else { + fd = open(filename, O_CREAT | O_WRONLY); + if (fd < 0) { + xmlNanoHTTPClose(ctxt); + return(-1); + } + } + + while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) { + write(fd, buf, len); + } + + xmlNanoHTTPClose(ctxt); + return(0); +} + +/** + * xmlNanoHTTPReturnCode: + * @ctx: the HTTP context + * + * Returns the HTTP return code for the request. + */ +int +xmlNanoHTTPReturnCode(void *ctx) { + xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; + + if (ctxt == NULL) return(-1); + + return(ctxt->returnValue); +} + +/** + * xmlNanoHTTPAuthHeader: + * @ctx: the HTTP context + * + * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate + * header. + */ +const char * +xmlNanoHTTPAuthHeader(void *ctx) { + xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; + + if (ctxt == NULL) return(NULL); + + return(ctxt->authHeader); +} + +#ifdef STANDALONE +int main(int argc, char **argv) { + char *contentType = NULL; + + if (argv[1] != NULL) { + if (argv[2] != NULL) + xmlNanoHTTPFetch(argv[1], argv[2], &contentType); + else + xmlNanoHTTPFetch(argv[1], "-", &contentType); + if (contentType != NULL) xmlFree(contentType); + } else { + xmlGenericError(xmlGenericErrorContext, + "%s: minimal HTTP GET implementation\n", argv[0]); + xmlGenericError(xmlGenericErrorContext, + "\tusage %s [ URL [ filename ] ]\n", argv[0]); + } + xmlNanoHTTPCleanup(); + xmlMemoryDump(); + return(0); +} +#endif /* STANDALONE */ +#else /* !LIBXML_HTTP_ENABLED */ +#ifdef STANDALONE +#include +int main(int argc, char **argv) { + xmlGenericError(xmlGenericErrorContext, + "%s : HTTP support not compiled in\n", argv[0]); + return(0); +} +#endif /* STANDALONE */ +#endif /* LIBXML_HTTP_ENABLED */ diff --git a/nanohttp.h b/nanohttp.h new file mode 100644 index 00000000..78d1c44d --- /dev/null +++ b/nanohttp.h @@ -0,0 +1,44 @@ +/* + * nanohttp.c: minimalist HTTP implementation to fetch external subsets. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __NANO_HTTP_H__ +#define __NANO_HTTP_H__ + +#include +#ifdef LIBXML_HTTP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif +void xmlNanoHTTPInit (void); +void xmlNanoHTTPCleanup (void); +void xmlNanoHTTPScanProxy (const char *URL); +int xmlNanoHTTPFetch (const char *URL, + const char *filename, + char **contentType); +void * xmlNanoHTTPMethod (const char *URL, + const char *method, + const char *input, + char **contentType, + const char *headers); +void * xmlNanoHTTPOpen (const char *URL, + char **contentType); +int xmlNanoHTTPReturnCode (void *ctx); +const char * xmlNanoHTTPAuthHeader(void *ctx); +int xmlNanoHTTPRead (void *ctx, + void *dest, + int len); +int xmlNanoHTTPSave (void *ctxt, + const char *filename); +void xmlNanoHTTPClose (void *ctx); +#ifdef __cplusplus +} + +#endif /* LIBXML_HTTP_ENABLED */ +#endif +#endif /* __NANO_HTTP_H__ */ diff --git a/parser.c b/parser.c new file mode 100644 index 00000000..55416288 --- /dev/null +++ b/parser.c @@ -0,0 +1,9867 @@ +/* + * parser.c : an XML 1.0 parser, namespaces and validity support are mostly + * implemented on top of the SAX interfaces + * + * References: + * The XML specification: + * http://www.w3.org/TR/REC-xml + * Original 1.0 version: + * http://www.w3.org/TR/1998/REC-xml-19980210 + * XML second edition working draft + * http://www.w3.org/TR/2000/WD-xml-2e-20000814 + * + * Okay this is a big file, the parser core is around 7000 lines, then it + * is followed by the progressive parser top routines, then the various + * high level APIs to call the parser and a few miscelaneous functions. + * A number of helper functions and deprecated ones have been moved to + * parserInternals.c to reduce this file size. + * As much as possible the functions are associated with their relative + * production in the XML specification. A few productions defining the + * different ranges of character are actually implanted either in + * parserInternals.h or parserInternals.c + * The DOM tree build is realized from the default SAX callbacks in + * the module SAX.c. + * The routines doing the validation checks are in valid.c and called either + * from the SAx callbacks or as standalones functions using a preparsed + * document. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + * + * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue + * and xmlDoValidityCheckingDefaultValue for VMS + */ + +#ifdef WIN32 +#include "win32config.h" +#define XML_DIR_SEP '\\' +#else +#include "config.h" +#define XML_DIR_SEP '/' +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_CTYPE_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_ZLIB_H +#include +#endif + + +#define XML_PARSER_BIG_BUFFER_SIZE 1000 +#define XML_PARSER_BUFFER_SIZE 100 + +/* + * Various global defaults for parsing + */ +int xmlGetWarningsDefaultValue = 1; +int xmlParserDebugEntities = 0; +#ifdef VMS +int xmlSubstituteEntitiesDefaultVal = 0; +#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal +int xmlDoValidityCheckingDefaultVal = 0; +#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal +#else +int xmlSubstituteEntitiesDefaultValue = 0; +int xmlDoValidityCheckingDefaultValue = 0; +#endif +int xmlLoadExtDtdDefaultValue = 0; +int xmlPedanticParserDefaultValue = 0; +int xmlKeepBlanksDefaultValue = 1; + +/* + * List of XML prefixed PI allowed by W3C specs + */ + +const char *xmlW3CPIs[] = { + "xml-stylesheet", + NULL +}; + +/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ +void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); +xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, + const xmlChar **str); + + +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ + +xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, + const xmlChar ** str); + +/* + * Generic function for accessing stacks in the Parser Context + */ + +#define PUSH_AND_POP(scope, type, name) \ +scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ + if (ctxt->name##Nr >= ctxt->name##Max) { \ + ctxt->name##Max *= 2; \ + ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ + ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ + if (ctxt->name##Tab == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "realloc failed !\n"); \ + return(0); \ + } \ + } \ + ctxt->name##Tab[ctxt->name##Nr] = value; \ + ctxt->name = value; \ + return(ctxt->name##Nr++); \ +} \ +scope type name##Pop(xmlParserCtxtPtr ctxt) { \ + type ret; \ + if (ctxt->name##Nr <= 0) return(0); \ + ctxt->name##Nr--; \ + if (ctxt->name##Nr > 0) \ + ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ + else \ + ctxt->name = NULL; \ + ret = ctxt->name##Tab[ctxt->name##Nr]; \ + ctxt->name##Tab[ctxt->name##Nr] = 0; \ + return(ret); \ +} \ + +/* + * Those macros actually generate the functions + */ +PUSH_AND_POP(extern, xmlParserInputPtr, input) +PUSH_AND_POP(extern, xmlNodePtr, node) +PUSH_AND_POP(extern, xmlChar*, name) + +int spacePush(xmlParserCtxtPtr ctxt, int val) { + if (ctxt->spaceNr >= ctxt->spaceMax) { + ctxt->spaceMax *= 2; + ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, + ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); + if (ctxt->spaceTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return(0); + } + } + ctxt->spaceTab[ctxt->spaceNr] = val; + ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; + return(ctxt->spaceNr++); +} + +int spacePop(xmlParserCtxtPtr ctxt) { + int ret; + if (ctxt->spaceNr <= 0) return(0); + ctxt->spaceNr--; + if (ctxt->spaceNr > 0) + ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; + else + ctxt->space = NULL; + ret = ctxt->spaceTab[ctxt->spaceNr]; + ctxt->spaceTab[ctxt->spaceNr] = -1; + return(ret); +} + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one often need to make assumption on the context to + * use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * To be used with extreme caution since operations consuming + * characters may move the input buffer to a different location ! + * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled + * This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * RAW same as CUR but in the input buffer, bypass any token + * extraction that may have been done + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * + * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding + * + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * NEXTL(l) Skip l xmlChars in the input buffer + * CUR_CHAR(l) returns the current unicode character (int), set l + * to the number of xmlChars used for the encoding [0-5]. + * CUR_SCHAR same but operate on a string instead of the context + * COPY_BUF copy the current unicode char to the target buffer, increment + * the index + * GROW, SHRINK handling of input buffers + */ + +#define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) +#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) +#define NXT(val) ctxt->input->cur[(val)] +#define CUR_PTR ctxt->input->cur + +#define SKIP(val) do { \ + ctxt->nbChars += (val),ctxt->input->cur += (val); \ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ + /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\ + if ((*ctxt->input->cur == 0) && \ + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ + xmlPopInput(ctxt); \ + } while (0) + +#define SHRINK do { \ + xmlParserInputShrink(ctxt->input); \ + if ((*ctxt->input->cur == 0) && \ + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ + xmlPopInput(ctxt); \ + } while (0) + +#define GROW do { \ + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ + if ((*ctxt->input->cur == 0) && \ + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ + xmlPopInput(ctxt); \ + } while (0) + +#define SKIP_BLANKS xmlSkipBlankChars(ctxt) + +#define NEXT xmlNextChar(ctxt) + +#define NEXTL(l) do { \ + if (*(ctxt->input->cur) == '\n') { \ + ctxt->input->line++; ctxt->input->col = 1; \ + } else ctxt->input->col++; \ + ctxt->token = 0; ctxt->input->cur += l; \ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ + /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\ + } while (0) + +#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) +#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) + +#define COPY_BUF(l,b,i,v) \ + if (l == 1) b[i++] = (xmlChar) v; \ + else i += xmlCopyChar(l,&b[i],v) + +/** + * xmlSkipBlankChars: + * @ctxt: the XML parser context + * + * skip all blanks character found at that point in the input streams. + * It pops up finished entities in the process if allowable at that point. + * + * Returns the number of space chars skipped + */ + +int +xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { + int cur, res = 0; + + /* + * It's Okay to use CUR/NEXT here since all the blanks are on + * the ASCII range. + */ + do { + cur = CUR; + while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ + NEXT; + cur = CUR; + res++; + } + while ((cur == 0) && (ctxt->inputNr > 1) && + (ctxt->instate != XML_PARSER_COMMENT)) { + xmlPopInput(ctxt); + cur = CUR; + } + /* + * Need to handle support of entities branching here + */ + if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); + /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */ + } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ + return(res); +} + +/************************************************************************ + * * + * Commodity functions to handle entities * + * * + ************************************************************************/ + +/** + * xmlPopInput: + * @ctxt: an XML parser context + * + * xmlPopInput: the current input pointed by ctxt->input came to an end + * pop it and return the next char. + * + * Returns the current xmlChar in the parser context + */ +xmlChar +xmlPopInput(xmlParserCtxtPtr ctxt) { + if (ctxt->inputNr == 1) return(0); /* End of main Input */ + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "Popping input %d\n", ctxt->inputNr); + xmlFreeInputStream(inputPop(ctxt)); + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) + return(xmlPopInput(ctxt)); + return(CUR); +} + +/** + * xmlPushInput: + * @ctxt: an XML parser context + * @input: an XML parser input fragment (entity, XML fragment ...). + * + * xmlPushInput: switch to a new input stream which is stacked on top + * of the previous one(s). + */ +void +xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { + if (input == NULL) return; + + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + xmlGenericError(xmlGenericErrorContext, + "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + xmlGenericError(xmlGenericErrorContext, + "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); + } + inputPush(ctxt, input); + GROW; +} + +/** + * xmlParseCharRef: + * @ctxt: an XML parser context + * + * parse Reference declarations + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + * + * Returns the value parsed (as an int), 0 in case of error + */ +int +xmlParseCharRef(xmlParserCtxtPtr ctxt) { + int val = 0; + int count = 0; + + if (ctxt->token != 0) { + val = ctxt->token; + ctxt->token = 0; + return(val); + } + /* + * Using RAW/CUR/NEXT is okay since we are working on ASCII range here + */ + if ((RAW == '&') && (NXT(1) == '#') && + (NXT(2) == 'x')) { + SKIP(3); + GROW; + while (RAW != ';') { /* loop blocked by count */ + if ((RAW >= '0') && (RAW <= '9') && (count < 20)) + val = val * 16 + (CUR - '0'); + else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) + val = val * 16 + (CUR - 'a') + 10; + else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) + val = val * 16 + (CUR - 'A') + 10; + else { + ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseCharRef: invalid hexadecimal value\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + val = 0; + break; + } + NEXT; + count++; + } + if (RAW == ';') { + /* on purpose to avoid reentrancy problems with NEXT and SKIP */ + ctxt->nbChars ++; + ctxt->input->cur++; + } + } else if ((RAW == '&') && (NXT(1) == '#')) { + SKIP(2); + GROW; + while (RAW != ';') { /* loop blocked by count */ + if ((RAW >= '0') && (RAW <= '9') && (count < 20)) + val = val * 10 + (CUR - '0'); + else { + ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseCharRef: invalid decimal value\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + val = 0; + break; + } + NEXT; + count++; + } + if (RAW == ';') { + /* on purpose to avoid reentrancy problems with NEXT and SKIP */ + ctxt->nbChars ++; + ctxt->input->cur++; + } + } else { + ctxt->errNo = XML_ERR_INVALID_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseCharRef: invalid value\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + + /* + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + */ + if (IS_CHAR(val)) { + return(val); + } else { + ctxt->errNo = XML_ERR_INVALID_CHAR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", + val); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + return(0); +} + +/** + * xmlParseStringCharRef: + * @ctxt: an XML parser context + * @str: a pointer to an index in the string + * + * parse Reference declarations, variant parsing from a string rather + * than an an input flow. + * + * [66] CharRef ::= '&#' [0-9]+ ';' | + * '&#x' [0-9a-fA-F]+ ';' + * + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + * + * Returns the value parsed (as an int), 0 in case of error, str will be + * updated to the current value of the index + */ +int +xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { + const xmlChar *ptr; + xmlChar cur; + int val = 0; + + if ((str == NULL) || (*str == NULL)) return(0); + ptr = *str; + cur = *ptr; + if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { + ptr += 3; + cur = *ptr; + while (cur != ';') { /* Non input consuming loop */ + if ((cur >= '0') && (cur <= '9')) + val = val * 16 + (cur - '0'); + else if ((cur >= 'a') && (cur <= 'f')) + val = val * 16 + (cur - 'a') + 10; + else if ((cur >= 'A') && (cur <= 'F')) + val = val * 16 + (cur - 'A') + 10; + else { + ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringCharRef: invalid hexadecimal value\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + val = 0; + break; + } + ptr++; + cur = *ptr; + } + if (cur == ';') + ptr++; + } else if ((cur == '&') && (ptr[1] == '#')){ + ptr += 2; + cur = *ptr; + while (cur != ';') { /* Non input consuming loops */ + if ((cur >= '0') && (cur <= '9')) + val = val * 10 + (cur - '0'); + else { + ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringCharRef: invalid decimal value\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + val = 0; + break; + } + ptr++; + cur = *ptr; + } + if (cur == ';') + ptr++; + } else { + ctxt->errNo = XML_ERR_INVALID_CHARREF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseCharRef: invalid value\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return(0); + } + *str = ptr; + + /* + * [ WFC: Legal Character ] + * Characters referred to using character references must match the + * production for Char. + */ + if (IS_CHAR(val)) { + return(val); + } else { + ctxt->errNo = XML_ERR_INVALID_CHAR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "CharRef: invalid xmlChar value %d\n", val); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + return(0); +} + +/** + * xmlParserHandlePEReference: + * @ctxt: the parser context + * + * [69] PEReference ::= '%' Name ';' + * + * [ WFC: No Recursion ] + * A parsed entity must not contain a recursive + * reference to itself, either directly or indirectly. + * + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an internal DTD + * subset which contains no parameter entity references, or a document + * with "standalone='yes'", ... ... The declaration of a parameter + * entity must precede any reference to it... + * + * [ VC: Entity Declared ] + * In a document with an external subset or external parameter entities + * with "standalone='no'", ... ... The declaration of a parameter entity + * must precede any reference to it... + * + * [ WFC: In DTD ] + * Parameter-entity references may only appear in the DTD. + * NOTE: misleading but this is handled. + * + * A PEReference may have been detected in the current input stream + * the handling is done accordingly to + * http://www.w3.org/TR/REC-xml#entproc + * i.e. + * - Included in literal in entity values + * - Included as Paraemeter Entity reference within DTDs + */ +void +xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { + xmlChar *name; + xmlEntityPtr entity = NULL; + xmlParserInputPtr input; + + if (ctxt->token != 0) { + return; + } + if (RAW != '%') return; + switch(ctxt->instate) { + case XML_PARSER_CDATA_SECTION: + return; + case XML_PARSER_COMMENT: + return; + case XML_PARSER_START_TAG: + return; + case XML_PARSER_END_TAG: + return; + case XML_PARSER_EOF: + ctxt->errNo = XML_ERR_PEREF_AT_EOF; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return; + case XML_PARSER_PROLOG: + case XML_PARSER_START: + case XML_PARSER_MISC: + ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return; + case XML_PARSER_ENTITY_DECL: + case XML_PARSER_CONTENT: + case XML_PARSER_ATTRIBUTE_VALUE: + case XML_PARSER_PI: + case XML_PARSER_SYSTEM_LITERAL: + /* we just ignore it there */ + return; + case XML_PARSER_EPILOG: + ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return; + case XML_PARSER_ENTITY_VALUE: + /* + * NOTE: in the case of entity values, we don't do the + * substitution here since we need the literal + * entity value to be able to save the internal + * subset of the document. + * This will be handled by xmlStringDecodeEntities + */ + return; + case XML_PARSER_DTD: + /* + * [WFC: Well-Formedness Constraint: PEs in Internal Subset] + * In the internal DTD subset, parameter-entity references + * can occur only where markup declarations can occur, not + * within markup declarations. + * In that case this is handled in xmlParseMarkupDecl + */ + if ((ctxt->external == 0) && (ctxt->inputNr == 1)) + return; + break; + case XML_PARSER_IGNORE: + return; + } + + NEXT; + name = xmlParseName(ctxt); + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "PE Reference: %s\n", name); + if (name == NULL) { + ctxt->errNo = XML_ERR_PEREF_NO_NAME; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } else { + if (RAW == ';') { + NEXT; + if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) + entity = ctxt->sax->getParameterEntity(ctxt->userData, name); + if (entity == NULL) { + + /* + * [ WFC: Entity Declared ] + * In a document without any DTD, a document with only an + * internal DTD subset which contains no parameter entity + * references, or a document with "standalone='yes'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((ctxt->standalone == 1) || + ((ctxt->hasExternalSubset == 0) && + (ctxt->hasPErefs == 0))) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "PEReference: %%%s; not found\n", name); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } else { + /* + * [ VC: Entity Declared ] + * In a document with an external subset or external + * parameter entities with "standalone='no'", ... + * ... The declaration of a parameter entity must precede + * any reference to it... + */ + if ((!ctxt->disableSAX) && + (ctxt->validate) && (ctxt->vctxt.error != NULL)) { + ctxt->vctxt.error(ctxt->vctxt.userData, + "PEReference: %%%s; not found\n", name); + } else if ((!ctxt->disableSAX) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "PEReference: %%%s; not found\n", name); + ctxt->valid = 0; + } + } else { + if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || + (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { + /* + * handle the extra spaces added before and after + * c.f. http://www.w3.org/TR/REC-xml#as-PE + * this is done independantly. + */ + input = xmlNewEntityInputStream(ctxt, entity); + xmlPushInput(ctxt, input); + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && + (RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + xmlParseTextDecl(ctxt); + } + if (ctxt->token == 0) + ctxt->token = ' '; + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlHandlePEReference: %s is not a parameter entity\n", + name); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + } + } else { + ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlHandlePEReference: expecting ';'\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + xmlFree(name); + } +} + +/* + * Macro used to grow the current buffer. + */ +#define growBuffer(buffer) { \ + buffer##_size *= 2; \ + buffer = (xmlChar *) \ + xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ + if (buffer == NULL) { \ + perror("realloc failed"); \ + return(NULL); \ + } \ +} + +/** + * xmlStringDecodeEntities: + * @ctxt: the parser context + * @str: the input string + * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF + * @end: an end marker xmlChar, 0 if none + * @end2: an end marker xmlChar, 0 if none + * @end3: an end marker xmlChar, 0 if none + * + * Takes a entity string content and process to do the adequate subtitutions. + * + * [67] Reference ::= EntityRef | CharRef + * + * [69] PEReference ::= '%' Name ';' + * + * Returns A newly allocated string with the substitution done. The caller + * must deallocate it ! + */ +xmlChar * +xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, + xmlChar end, xmlChar end2, xmlChar end3) { + xmlChar *buffer = NULL; + int buffer_size = 0; + + xmlChar *current = NULL; + xmlEntityPtr ent; + int c,l; + int nbchars = 0; + + if (str == NULL) + return(NULL); + + if (ctxt->depth > 40) { + ctxt->errNo = XML_ERR_ENTITY_LOOP; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Detected entity reference loop\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return(NULL); + } + + /* + * allocate a translation buffer. + */ + buffer_size = XML_PARSER_BIG_BUFFER_SIZE; + buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); + if (buffer == NULL) { + perror("xmlDecodeEntities: malloc failed"); + return(NULL); + } + + /* + * Ok loop until we reach one of the ending char or a size limit. + * we are operating on already parsed values. + */ + c = CUR_SCHAR(str, l); + while ((c != 0) && (c != end) && /* non input consuming loop */ + (c != end2) && (c != end3)) { + + if (c == 0) break; + if ((c == '&') && (str[1] == '#')) { + int val = xmlParseStringCharRef(ctxt, &str); + if (val != 0) { + COPY_BUF(0,buffer,nbchars,val); + } + } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "String decoding Entity Reference: %.30s\n", + str); + ent = xmlParseStringEntityRef(ctxt, &str); + if ((ent != NULL) && + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + if (ent->content != NULL) { + COPY_BUF(0,buffer,nbchars,ent->content[0]); + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "internal error entity has no content\n"); + } + } else if ((ent != NULL) && (ent->content != NULL)) { + xmlChar *rep; + + ctxt->depth++; + rep = xmlStringDecodeEntities(ctxt, ent->content, what, + 0, 0, 0); + ctxt->depth--; + if (rep != NULL) { + current = rep; + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars > + buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + xmlFree(rep); + } + } else if (ent != NULL) { + int i = xmlStrlen(ent->name); + const xmlChar *cur = ent->name; + + buffer[nbchars++] = '&'; + if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + for (;i > 0;i--) + buffer[nbchars++] = *cur++; + buffer[nbchars++] = ';'; + } + } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { + if (xmlParserDebugEntities) + xmlGenericError(xmlGenericErrorContext, + "String decoding PE Reference: %.30s\n", str); + ent = xmlParseStringPEReference(ctxt, &str); + if (ent != NULL) { + xmlChar *rep; + + ctxt->depth++; + rep = xmlStringDecodeEntities(ctxt, ent->content, what, + 0, 0, 0); + ctxt->depth--; + if (rep != NULL) { + current = rep; + while (*current != 0) { /* non input consuming loop */ + buffer[nbchars++] = *current++; + if (nbchars > + buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + xmlFree(rep); + } + } + } else { + COPY_BUF(l,buffer,nbchars,c); + str += l; + if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + growBuffer(buffer); + } + } + c = CUR_SCHAR(str, l); + } + buffer[nbchars++] = 0; + return(buffer); +} + + +/************************************************************************ + * * + * Commodity functions to handle xmlChars * + * * + ************************************************************************/ + +/** + * xmlStrndup: + * @cur: the input xmlChar * + * @len: the len of @cur + * + * a strndup for array of xmlChar's + * + * Returns a new xmlChar * or NULL + */ +xmlChar * +xmlStrndup(const xmlChar *cur, int len) { + xmlChar *ret; + + if ((cur == NULL) || (len < 0)) return(NULL); + ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %ld byte failed\n", + (len + 1) * (long)sizeof(xmlChar)); + return(NULL); + } + memcpy(ret, cur, len * sizeof(xmlChar)); + ret[len] = 0; + return(ret); +} + +/** + * xmlStrdup: + * @cur: the input xmlChar * + * + * a strdup for array of xmlChar's. Since they are supposed to be + * encoded in UTF-8 or an encoding with 8bit based chars, we assume + * a termination mark of '0'. + * + * Returns a new xmlChar * or NULL + */ +xmlChar * +xmlStrdup(const xmlChar *cur) { + const xmlChar *p = cur; + + if (cur == NULL) return(NULL); + while (*p != 0) p++; /* non input consuming */ + return(xmlStrndup(cur, p - cur)); +} + +/** + * xmlCharStrndup: + * @cur: the input char * + * @len: the len of @cur + * + * a strndup for char's to xmlChar's + * + * Returns a new xmlChar * or NULL + */ + +xmlChar * +xmlCharStrndup(const char *cur, int len) { + int i; + xmlChar *ret; + + if ((cur == NULL) || (len < 0)) return(NULL); + ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", + (len + 1) * (long)sizeof(xmlChar)); + return(NULL); + } + for (i = 0;i < len;i++) + ret[i] = (xmlChar) cur[i]; + ret[len] = 0; + return(ret); +} + +/** + * xmlCharStrdup: + * @cur: the input char * + * @len: the len of @cur + * + * a strdup for char's to xmlChar's + * + * Returns a new xmlChar * or NULL + */ + +xmlChar * +xmlCharStrdup(const char *cur) { + const char *p = cur; + + if (cur == NULL) return(NULL); + while (*p != '\0') p++; /* non input consuming */ + return(xmlCharStrndup(cur, p - cur)); +} + +/** + * xmlStrcmp: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * + * a strcmp for xmlChar's + * + * Returns the integer result of the comparison + */ + +int +xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { + register int tmp; + + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + do { + tmp = *str1++ - *str2; + if (tmp != 0) return(tmp); + } while (*str2++ != 0); + return 0; +} + +/** + * xmlStrEqual: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * + * Check if both string are equal of have same content + * Should be a bit more readable and faster than xmlStrEqual() + * + * Returns 1 if they are equal, 0 if they are different + */ + +int +xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { + if (str1 == str2) return(1); + if (str1 == NULL) return(0); + if (str2 == NULL) return(0); + do { + if (*str1++ != *str2) return(0); + } while (*str2++); + return(1); +} + +/** + * xmlStrncmp: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * @len: the max comparison length + * + * a strncmp for xmlChar's + * + * Returns the integer result of the comparison + */ + +int +xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { + register int tmp; + + if (len <= 0) return(0); + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + do { + tmp = *str1++ - *str2; + if (tmp != 0 || --len == 0) return(tmp); + } while (*str2++ != 0); + return 0; +} + +static xmlChar casemap[256] = { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, + 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, + 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, + 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, + 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, + 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, + 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, + 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, + 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, + 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, + 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, + 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, + 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, + 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, + 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, + 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, + 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, + 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, + 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, + 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, + 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, + 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, + 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, + 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +}; + +/** + * xmlStrcasecmp: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * + * a strcasecmp for xmlChar's + * + * Returns the integer result of the comparison + */ + +int +xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { + register int tmp; + + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + do { + tmp = casemap[*str1++] - casemap[*str2]; + if (tmp != 0) return(tmp); + } while (*str2++ != 0); + return 0; +} + +/** + * xmlStrncasecmp: + * @str1: the first xmlChar * + * @str2: the second xmlChar * + * @len: the max comparison length + * + * a strncasecmp for xmlChar's + * + * Returns the integer result of the comparison + */ + +int +xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { + register int tmp; + + if (len <= 0) return(0); + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + do { + tmp = casemap[*str1++] - casemap[*str2]; + if (tmp != 0 || --len == 0) return(tmp); + } while (*str2++ != 0); + return 0; +} + +/** + * xmlStrchr: + * @str: the xmlChar * array + * @val: the xmlChar to search + * + * a strchr for xmlChar's + * + * Returns the xmlChar * for the first occurence or NULL. + */ + +const xmlChar * +xmlStrchr(const xmlChar *str, xmlChar val) { + if (str == NULL) return(NULL); + while (*str != 0) { /* non input consuming */ + if (*str == val) return((xmlChar *) str); + str++; + } + return(NULL); +} + +/** + * xmlStrstr: + * @str: the xmlChar * array (haystack) + * @val: the xmlChar to search (needle) + * + * a strstr for xmlChar's + * + * Returns the xmlChar * for the first occurence or NULL. + */ + +const xmlChar * +xmlStrstr(const xmlChar *str, xmlChar *val) { + int n; + + if (str == NULL) return(NULL); + if (val == NULL) return(NULL); + n = xmlStrlen(val); + + if (n == 0) return(str); + while (*str != 0) { /* non input consuming */ + if (*str == *val) { + if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); + } + str++; + } + return(NULL); +} + +/** + * xmlStrcasestr: + * @str: the xmlChar * array (haystack) + * @val: the xmlChar to search (needle) + * + * a case-ignoring strstr for xmlChar's + * + * Returns the xmlChar * for the first occurence or NULL. + */ + +const xmlChar * +xmlStrcasestr(const xmlChar *str, xmlChar *val) { + int n; + + if (str == NULL) return(NULL); + if (val == NULL) return(NULL); + n = xmlStrlen(val); + + if (n == 0) return(str); + while (*str != 0) { /* non input consuming */ + if (casemap[*str] == casemap[*val]) + if (!xmlStrncasecmp(str, val, n)) return(str); + str++; + } + return(NULL); +} + +/** + * xmlStrsub: + * @str: the xmlChar * array (haystack) + * @start: the index of the first char (zero based) + * @len: the length of the substring + * + * Extract a substring of a given string + * + * Returns the xmlChar * for the first occurence or NULL. + */ + +xmlChar * +xmlStrsub(const xmlChar *str, int start, int len) { + int i; + + if (str == NULL) return(NULL); + if (start < 0) return(NULL); + if (len < 0) return(NULL); + + for (i = 0;i < start;i++) { + if (*str == 0) return(NULL); + str++; + } + if (*str == 0) return(NULL); + return(xmlStrndup(str, len)); +} + +/** + * xmlStrlen: + * @str: the xmlChar * array + * + * length of a xmlChar's string + * + * Returns the number of xmlChar contained in the ARRAY. + */ + +int +xmlStrlen(const xmlChar *str) { + int len = 0; + + if (str == NULL) return(0); + while (*str != 0) { /* non input consuming */ + str++; + len++; + } + return(len); +} + +/** + * xmlStrncat: + * @cur: the original xmlChar * array + * @add: the xmlChar * array added + * @len: the length of @add + * + * a strncat for array of xmlChar's, it will extend cur with the len + * first bytes of @add. + * + * Returns a new xmlChar *, the original @cur is reallocated if needed + * and should not be freed + */ + +xmlChar * +xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { + int size; + xmlChar *ret; + + if ((add == NULL) || (len == 0)) + return(cur); + if (cur == NULL) + return(xmlStrndup(add, len)); + + size = xmlStrlen(cur); + ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlStrncat: realloc of %ld byte failed\n", + (size + len + 1) * (long)sizeof(xmlChar)); + return(cur); + } + memcpy(&ret[size], add, len * sizeof(xmlChar)); + ret[size + len] = 0; + return(ret); +} + +/** + * xmlStrcat: + * @cur: the original xmlChar * array + * @add: the xmlChar * array added + * + * a strcat for array of xmlChar's. Since they are supposed to be + * encoded in UTF-8 or an encoding with 8bit based chars, we assume + * a termination mark of '0'. + * + * Returns a new xmlChar * containing the concatenated string. + */ +xmlChar * +xmlStrcat(xmlChar *cur, const xmlChar *add) { + const xmlChar *p = add; + + if (add == NULL) return(cur); + if (cur == NULL) + return(xmlStrdup(add)); + + while (*p != 0) p++; /* non input consuming */ + return(xmlStrncat(cur, add, p - add)); +} + +/************************************************************************ + * * + * Commodity functions, cleanup needed ? * + * * + ************************************************************************/ + +/** + * areBlanks: + * @ctxt: an XML parser context + * @str: a xmlChar * + * @len: the size of @str + * + * Is this a sequence of blank chars that one can ignore ? + * + * Returns 1 if ignorable 0 otherwise. + */ + +static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { + int i, ret; + xmlNodePtr lastChild; + + /* + * Check for xml:space value. + */ + if (*(ctxt->space) == 1) + return(0); + + /* + * Check that the string is made of blanks + */ + for (i = 0;i < len;i++) + if (!(IS_BLANK(str[i]))) return(0); + + /* + * Look if the element is mixed content in the Dtd if available + */ + if (ctxt->myDoc != NULL) { + ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); + if (ret == 0) return(1); + if (ret == 1) return(0); + } + + /* + * Otherwise, heuristic :-\ + */ + if (ctxt->keepBlanks) + return(0); + if (RAW != '<') return(0); + if (ctxt->node == NULL) return(0); + if ((ctxt->node->children == NULL) && + (RAW == '<') && (NXT(1) == '/')) return(0); + + lastChild = xmlGetLastChild(ctxt->node); + if (lastChild == NULL) { + if (ctxt->node->content != NULL) return(0); + } else if (xmlNodeIsText(lastChild)) + return(0); + else if ((ctxt->node->children != NULL) && + (xmlNodeIsText(ctxt->node->children))) + return(0); + return(1); +} + +/* + * Forward definition for recusive behaviour. + */ +void xmlParsePEReference(xmlParserCtxtPtr ctxt); +void xmlParseReference(xmlParserCtxtPtr ctxt); + +/************************************************************************ + * * + * Extra stuff for namespace support * + * Relates to http://www.w3.org/TR/WD-xml-names * + * * + ************************************************************************/ + +/** + * xmlSplitQName: + * @ctxt: an XML parser context + * @name: an XML parser context + * @prefix: a xmlChar ** + * + * parse an UTF8 encoded XML qualified name string + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns the local part, and prefix is updated + * to get the Prefix if any. + */ + +xmlChar * +xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + xmlChar *buffer = NULL; + int len = 0; + int max = XML_MAX_NAMELEN; + xmlChar *ret = NULL; + const xmlChar *cur = name; + int c; + + *prefix = NULL; + +#ifndef XML_XML_NAMESPACE + /* xml: prefix is not really a namespace */ + if ((cur[0] == 'x') && (cur[1] == 'm') && + (cur[2] == 'l') && (cur[3] == ':')) + return(xmlStrdup(name)); +#endif + + /* nasty but valid */ + if (cur[0] == ':') + return(xmlStrdup(name)); + + c = *cur++; + while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ + buf[len++] = c; + c = *cur++; + } + if (len >= max) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((c != 0) && (c != ':')) { /* tested bigname.xml */ + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + } + buffer[len++] = c; + c = *cur++; + } + buffer[len] = 0; + } + + if (buffer == NULL) + ret = xmlStrndup(buf, len); + else { + ret = buffer; + buffer = NULL; + max = XML_MAX_NAMELEN; + } + + + if (c == ':') { + c = *cur++; + if (c == 0) return(ret); + *prefix = ret; + len = 0; + + while ((c != 0) && (len < max)) { /* tested bigname2.xml */ + buf[len++] = c; + c = *cur++; + } + if (len >= max) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while (c != 0) { /* tested bigname2.xml */ + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + } + buffer[len++] = c; + c = *cur++; + } + buffer[len] = 0; + } + + if (buffer == NULL) + ret = xmlStrndup(buf, len); + else { + ret = buffer; + } + } + + return(ret); +} + +/************************************************************************ + * * + * The parser itself * + * Relates to http://www.w3.org/TR/REC-xml * + * * + ************************************************************************/ + +/** + * xmlParseName: + * @ctxt: an XML parser context + * + * parse an XML name. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + * + * Returns the Name parsed or NULL + */ + +xmlChar * +xmlParseName(xmlParserCtxtPtr ctxt) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0, l; + int c; + int count = 0; + + GROW; + c = CUR_CHAR(l); + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!IS_LETTER(c) && (c != '_') && + (c != ':'))) { + return(NULL); + } + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c)))) { + if (count++ > 100) { + count = 0; + GROW; + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + c = CUR_CHAR(l); + if (len >= XML_MAX_NAMELEN) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseName: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); + NEXTL(l); + c = CUR_CHAR(l); + } + buffer[len] = 0; + return(buffer); + } + } + return(xmlStrndup(buf, len)); +} + +/** + * xmlParseStringName: + * @ctxt: an XML parser context + * @str: a pointer to the string pointer (IN/OUT) + * + * parse an XML name. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + * + * Returns the Name parsed or NULL. The str pointer + * is updated to the current location in the string. + */ + +xmlChar * +xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + const xmlChar *cur = *str; + int len = 0, l; + int c; + + c = CUR_SCHAR(cur, l); + if (!IS_LETTER(c) && (c != '_') && + (c != ':')) { + return(NULL); + } + + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + COPY_BUF(l,buf,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringName: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + } + buffer[len] = 0; + *str = cur; + return(buffer); + } + } + *str = cur; + return(xmlStrndup(buf, len)); +} + +/** + * xmlParseNmtoken: + * @ctxt: an XML parser context + * + * parse an XML Nmtoken. + * + * [7] Nmtoken ::= (NameChar)+ + * + * [8] Nmtokens ::= Nmtoken (S Nmtoken)* + * + * Returns the Nmtoken parsed or NULL + */ + +xmlChar * +xmlParseNmtoken(xmlParserCtxtPtr ctxt) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0, l; + int c; + int count = 0; + + GROW; + c = CUR_CHAR(l); + + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + c = CUR_CHAR(l); + if (len >= XML_MAX_NAMELEN) { + /* + * Okay someone managed to make a huge token, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseNmtoken: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseName: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); + NEXTL(l); + c = CUR_CHAR(l); + } + buffer[len] = 0; + return(buffer); + } + } + if (len == 0) + return(NULL); + return(xmlStrndup(buf, len)); +} + +/** + * xmlParseEntityValue: + * @ctxt: an XML parser context + * @orig: if non-NULL store a copy of the original entity value + * + * parse a value for ENTITY declarations + * + * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | + * "'" ([^%&'] | PEReference | Reference)* "'" + * + * Returns the EntityValue parsed with reference substitued or NULL + */ + +xmlChar * +xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + int c, l; + xmlChar stop; + xmlChar *ret = NULL; + const xmlChar *cur = NULL; + xmlParserInputPtr input; + + if (RAW == '"') stop = '"'; + else if (RAW == '\'') stop = '\''; + else { + ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return(NULL); + } + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + + /* + * The content of the entity definition is copied in a buffer. + */ + + ctxt->instate = XML_PARSER_ENTITY_VALUE; + input = ctxt->input; + GROW; + NEXT; + c = CUR_CHAR(l); + /* + * NOTE: 4.4.5 Included in Literal + * When a parameter entity reference appears in a literal entity + * value, ... a single or double quote character in the replacement + * text is always treated as a normal data character and will not + * terminate the literal. + * In practice it means we stop the loop only when back at parsing + * the initial entity and the quote is found + */ + while ((IS_CHAR(c)) && ((c != stop) || /* checked */ + (ctxt->input != input))) { + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + COPY_BUF(l,buf,len,c); + NEXTL(l); + /* + * Pop-up of finished entities. + */ + while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ + xmlPopInput(ctxt); + + GROW; + c = CUR_CHAR(l); + if (c == 0) { + GROW; + c = CUR_CHAR(l); + } + } + buf[len] = 0; + + /* + * Raise problem w.r.t. '&' and '%' being used in non-entities + * reference constructs. Note Charref will be handled in + * xmlStringDecodeEntities() + */ + cur = buf; + while (*cur != 0) { /* non input consuming */ + if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { + xmlChar *name; + xmlChar tmp = *cur; + + cur++; + name = xmlParseStringName(ctxt, &cur); + if ((name == NULL) || (*cur != ';')) { + ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "EntityValue: '%c' forbidden except for entities references\n", + tmp); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + if ((ctxt->inSubset == 1) && (tmp == '%')) { + ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "EntityValue: PEReferences forbidden in internal subset\n", + tmp); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + if (name != NULL) + xmlFree(name); + } + cur++; + } + + /* + * Then PEReference entities are substituted. + */ + if (c != stop) { + ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + xmlFree(buf); + } else { + NEXT; + /* + * NOTE: 4.4.7 Bypassed + * When a general entity reference appears in the EntityValue in + * an entity declaration, it is bypassed and left as is. + * so XML_SUBSTITUTE_REF is not set here. + */ + ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, + 0, 0, 0); + if (orig != NULL) + *orig = buf; + else + xmlFree(buf); + } + + return(ret); +} + +/** + * xmlParseAttValue: + * @ctxt: an XML parser context + * + * parse a value for an attribute + * Note: the parser won't do substitution of entities here, this + * will be handled later in xmlStringGetNodeList + * + * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | + * "'" ([^<&'] | Reference)* "'" + * + * 3.3.3 Attribute-Value Normalization: + * Before the value of an attribute is passed to the application or + * checked for validity, the XML processor must normalize it as follows: + * - a character reference is processed by appending the referenced + * character to the attribute value + * - an entity reference is processed by recursively processing the + * replacement text of the entity + * - a whitespace character (#x20, #xD, #xA, #x9) is processed by + * appending #x20 to the normalized value, except that only a single + * #x20 is appended for a "#xD#xA" sequence that is part of an external + * parsed entity or the literal entity value of an internal parsed entity + * - other characters are processed by appending them to the normalized value + * If the declared value is not CDATA, then the XML processor must further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by a single space (#x20) character. + * All attributes for which no declaration has been read should be treated + * by a non-validating parser as if declared CDATA. + * + * Returns the AttValue parsed or NULL. The value has to be freed by the caller. + */ + +xmlChar * +xmlParseAttValue(xmlParserCtxtPtr ctxt) { + xmlChar limit = 0; + xmlChar *buf = NULL; + int len = 0; + int buf_size = 0; + int c, l; + xmlChar *current = NULL; + xmlEntityPtr ent; + + + SHRINK; + if (NXT(0) == '"') { + ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; + limit = '"'; + NEXT; + } else if (NXT(0) == '\'') { + limit = '\''; + ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; + NEXT; + } else { + ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return(NULL); + } + + /* + * allocate a translation buffer. + */ + buf_size = XML_PARSER_BUFFER_SIZE; + buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); + if (buf == NULL) { + perror("xmlParseAttValue: malloc failed"); + return(NULL); + } + + /* + * Ok loop until we reach one of the ending char or a size limit. + */ + c = CUR_CHAR(l); + while (((NXT(0) != limit) && /* checked */ + (c != '<')) || (ctxt->token != 0)) { + if (c == 0) break; + if (ctxt->token == '&') { + /* + * The reparsing will be done in xmlStringGetNodeList() + * called by the attribute() function in SAX.c + */ + static xmlChar buffer[6] = "&"; + + if (len > buf_size - 10) { + growBuffer(buf); + } + current = &buffer[0]; + while (*current != 0) { /* non input consuming */ + buf[len++] = *current++; + } + ctxt->token = 0; + } else if (c == '&') { + if (NXT(1) == '#') { + int val = xmlParseCharRef(ctxt); + if (val == '&') { + /* + * The reparsing will be done in xmlStringGetNodeList() + * called by the attribute() function in SAX.c + */ + static xmlChar buffer[6] = "&"; + + if (len > buf_size - 10) { + growBuffer(buf); + } + current = &buffer[0]; + while (*current != 0) { /* non input consuming */ + buf[len++] = *current++; + } + } else { + len += xmlCopyChar(0, &buf[len], val); + } + } else { + ent = xmlParseEntityRef(ctxt); + if ((ent != NULL) && + (ctxt->replaceEntities != 0)) { + xmlChar *rep; + + if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { + rep = xmlStringDecodeEntities(ctxt, ent->content, + XML_SUBSTITUTE_REF, 0, 0, 0); + if (rep != NULL) { + current = rep; + while (*current != 0) { /* non input consuming */ + buf[len++] = *current++; + if (len > buf_size - 10) { + growBuffer(buf); + } + } + xmlFree(rep); + } + } else { + if (ent->content != NULL) + buf[len++] = ent->content[0]; + } + } else if (ent != NULL) { + int i = xmlStrlen(ent->name); + const xmlChar *cur = ent->name; + + /* + * This may look absurd but is needed to detect + * entities problems + */ + if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && + (ent->content != NULL)) { + xmlChar *rep; + rep = xmlStringDecodeEntities(ctxt, ent->content, + XML_SUBSTITUTE_REF, 0, 0, 0); + if (rep != NULL) + xmlFree(rep); + } + + /* + * Just output the reference + */ + buf[len++] = '&'; + if (len > buf_size - i - 10) { + growBuffer(buf); + } + for (;i > 0;i--) + buf[len++] = *cur++; + buf[len++] = ';'; + } + } + } else { + if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { + COPY_BUF(l,buf,len,0x20); + if (len > buf_size - 10) { + growBuffer(buf); + } + } else { + COPY_BUF(l,buf,len,c); + if (len > buf_size - 10) { + growBuffer(buf); + } + } + NEXTL(l); + } + GROW; + c = CUR_CHAR(l); + } + buf[len++] = 0; + if (RAW == '<') { + ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Unescaped '<' not allowed in attributes values\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } else if (RAW != limit) { + ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } else + NEXT; + return(buf); +} + +/** + * xmlParseSystemLiteral: + * @ctxt: an XML parser context + * + * parse an XML Literal + * + * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") + * + * Returns the SystemLiteral parsed or NULL + */ + +xmlChar * +xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + int cur, l; + xmlChar stop; + int state = ctxt->instate; + int count = 0; + + SHRINK; + if (RAW == '"') { + NEXT; + stop = '"'; + } else if (RAW == '\'') { + NEXT; + stop = '\''; + } else { + ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return(NULL); + } + + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + ctxt->instate = XML_PARSER_SYSTEM_LITERAL; + cur = CUR_CHAR(l); + while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = (xmlParserInputState) state; + return(NULL); + } + } + count++; + if (count > 50) { + GROW; + count = 0; + } + COPY_BUF(l,buf,len,cur); + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + GROW; + SHRINK; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + ctxt->instate = (xmlParserInputState) state; + if (!IS_CHAR(cur)) { + ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } else { + NEXT; + } + return(buf); +} + +/** + * xmlParsePubidLiteral: + * @ctxt: an XML parser context + * + * parse an XML public literal + * + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + * + * Returns the PubidLiteral parsed or NULL. + */ + +xmlChar * +xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len = 0; + int size = XML_PARSER_BUFFER_SIZE; + xmlChar cur; + xmlChar stop; + int count = 0; + + SHRINK; + if (RAW == '"') { + NEXT; + stop = '"'; + } else if (RAW == '\'') { + NEXT; + stop = '\''; + } else { + ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "SystemLiteral \" or ' expected\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + return(NULL); + } + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + return(NULL); + } + cur = CUR; + while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ + if (len + 1 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + return(NULL); + } + } + buf[len++] = cur; + count++; + if (count > 50) { + GROW; + count = 0; + } + NEXT; + cur = CUR; + if (cur == 0) { + GROW; + SHRINK; + cur = CUR; + } + } + buf[len] = 0; + if (cur != stop) { + ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } else { + NEXT; + } + return(buf); +} + +/** + * xmlParseCharData: + * @ctxt: an XML parser context + * @cdata: int indicating whether we are within a CDATA section + * + * parse a CharData section. + * if we are within a CDATA section ']]>' marks an end of section. + * + * The right angle bracket (>) may be represented using the string ">", + * and must, for compatibility, be escaped using ">" or a character + * reference when it appears in the string "]]>" in content, when that + * string is not marking the end of a CDATA section. + * + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + */ + +void +xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { + xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; + int nbchar = 0; + int cur, l; + int count = 0; + + SHRINK; + GROW; + cur = CUR_CHAR(l); + while (((cur != '<') || (ctxt->token == '<')) && /* checked */ + ((cur != '&') || (ctxt->token == '&')) && + (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { + if ((cur == ']') && (NXT(1) == ']') && + (NXT(2) == '>')) { + if (cdata) break; + else { + ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Sequence ']]>' not allowed in content\n"); + /* Should this be relaxed ??? I see a "must here */ + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + } + COPY_BUF(l,buf,nbchar,cur); + if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { + /* + * Ok the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, + buf, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + nbchar = 0; + } + count++; + if (count > 50) { + GROW; + count = 0; + } + NEXTL(l); + cur = CUR_CHAR(l); + } + if (nbchar != 0) { + /* + * Ok the segment is to be consumed as chars. + */ + if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { + if (areBlanks(ctxt, buf, nbchar)) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, nbchar); + } + } + } +} + +/** + * xmlParseExternalID: + * @ctxt: an XML parser context + * @publicID: a xmlChar** receiving PubidLiteral + * @strict: indicate whether we should restrict parsing to only + * production [75], see NOTE below + * + * Parse an External ID or a Public ID + * + * NOTE: Productions [75] and [83] interract badly since [75] can generate + * 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [75] ExternalID ::= 'SYSTEM' S SystemLiteral + * | 'PUBLIC' S PubidLiteral S SystemLiteral + * + * [83] PublicID ::= 'PUBLIC' S PubidLiteral + * + * Returns the function returns SystemLiteral and in the second + * case publicID receives PubidLiteral, is strict is off + * it is possible to return NULL and have publicID set. + */ + +xmlChar * +xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { + xmlChar *URI = NULL; + + SHRINK; + if ((RAW == 'S') && (NXT(1) == 'Y') && + (NXT(2) == 'S') && (NXT(3) == 'T') && + (NXT(4) == 'E') && (NXT(5) == 'M')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'SYSTEM'\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + SKIP_BLANKS; + URI = xmlParseSystemLiteral(ctxt); + if (URI == NULL) { + ctxt->errNo = XML_ERR_URI_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseExternalID: SYSTEM, no URI\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + } else if ((RAW == 'P') && (NXT(1) == 'U') && + (NXT(2) == 'B') && (NXT(3) == 'L') && + (NXT(4) == 'I') && (NXT(5) == 'C')) { + SKIP(6); + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after 'PUBLIC'\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + SKIP_BLANKS; + *publicID = xmlParsePubidLiteral(ctxt); + if (*publicID == NULL) { + ctxt->errNo = XML_ERR_PUBID_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseExternalID: PUBLIC, no Public Identifier\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + if (strict) { + /* + * We don't handle [83] so "S SystemLiteral" is required. + */ + if (!IS_BLANK(CUR)) { + ctxt->errNo = XML_ERR_SPACE_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Space required after the Public Identifier\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + } else { + /* + * We handle [83] so we return immediately, if + * "S SystemLiteral" is not detected. From a purely parsing + * point of view that's a nice mess. + */ + const xmlChar *ptr; + GROW; + + ptr = CUR_PTR; + if (!IS_BLANK(*ptr)) return(NULL); + + while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ + if ((*ptr != '\'') && (*ptr != '"')) return(NULL); + } + SKIP_BLANKS; + URI = xmlParseSystemLiteral(ctxt); + if (URI == NULL) { + ctxt->errNo = XML_ERR_URI_REQUIRED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseExternalID: PUBLIC, no URI\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + } + return(URI); +} + +/** + * xmlParseComment: + * @ctxt: an XML parser context + * + * Skip an XML (SGML) comment + * The spec says that "For compatibility, the string "--" (double-hyphen) + * must not occur within comments. " + * + * [15] Comment ::= '' + */ +void +xmlParseComment(xmlParserCtxtPtr ctxt) { + xmlChar *buf = NULL; + int len; + int size = XML_PARSER_BUFFER_SIZE; + int q, ql; + int r, rl; + int cur, l; + xmlParserInputState state; + xmlParserInputPtr input = ctxt->input; + int count = 0; + + /* + * Check that there is a comment right here. + */ + if ((RAW != '<') || (NXT(1) != '!') || + (NXT(2) != '-') || (NXT(3) != '-')) return; + + state = ctxt->instate; + ctxt->instate = XML_PARSER_COMMENT; + SHRINK; + SKIP(4); + buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + q = CUR_CHAR(ql); + NEXTL(ql); + r = CUR_CHAR(rl); + NEXTL(rl); + cur = CUR_CHAR(l); + len = 0; + while (IS_CHAR(cur) && /* checked */ + ((cur != '>') || + (r != '-') || (q != '-'))) { + if ((r == '-') && (q == '-') && (len > 1)) { + ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Comment must not contain '--' (double-hyphen)`\n"); + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } + if (len + 5 >= size) { + size *= 2; + buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc of %d byte failed\n", size); + ctxt->instate = state; + return; + } + } + COPY_BUF(ql,buf,len,q); + q = r; + ql = rl; + r = cur; + rl = l; + + count++; + if (count > 50) { + GROW; + count = 0; + } + NEXTL(l); + cur = CUR_CHAR(l); + if (cur == 0) { + SHRINK; + GROW; + cur = CUR_CHAR(l); + } + } + buf[len] = 0; + if (!IS_CHAR(cur)) { + ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Comment not terminated \n"); + } + return; + } + if (cur->type == XML_ENTITY_REF_NODE) { + xmlBufferWriteChar(buf, "&"); + xmlBufferWriteCHAR(buf, cur->name); + xmlBufferWriteChar(buf, ";"); + return; + } + if (cur->type == XML_CDATA_SECTION_NODE) { + xmlBufferWriteChar(buf, "content != NULL) +#ifndef XML_USE_BUFFER_CONTENT + xmlBufferWriteCHAR(buf, cur->content); +#else + xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content)); +#endif + xmlBufferWriteChar(buf, "]]>"); + return; + } + + if (format == 1) { + tmp = cur->children; + while (tmp != NULL) { + if ((tmp->type == XML_TEXT_NODE) || + (tmp->type == XML_ENTITY_REF_NODE)) { + format = 0; + break; + } + tmp = tmp->next; + } + } + xmlBufferWriteChar(buf, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlBufferWriteCHAR(buf, cur->ns->prefix); + xmlBufferWriteChar(buf, ":"); + } + + xmlBufferWriteCHAR(buf, cur->name); + if (cur->nsDef) + xmlNsListDump(buf, cur->nsDef); + if (cur->properties != NULL) + xmlAttrListDump(buf, doc, cur->properties); + + if ((cur->content == NULL) && (cur->children == NULL) && + (!xmlSaveNoEmptyTags)) { + xmlBufferWriteChar(buf, "/>"); + return; + } + xmlBufferWriteChar(buf, ">"); + if (cur->content != NULL) { + xmlChar *buffer; + +#ifndef XML_USE_BUFFER_CONTENT + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); +#else + buffer = xmlEncodeEntitiesReentrant(doc, + xmlBufferContent(cur->content)); +#endif + if (buffer != NULL) { + xmlBufferWriteCHAR(buf, buffer); + xmlFree(buffer); + } + } + if (cur->children != NULL) { + if (format) xmlBufferWriteChar(buf, "\n"); + xmlNodeListDump(buf, doc, cur->children, + (level >= 0?level+1:-1), format); + if ((xmlIndentTreeOutput) && (format)) + for (i = 0;i < level;i++) + xmlBufferWriteChar(buf, " "); + } + xmlBufferWriteChar(buf, "ns != NULL) && (cur->ns->prefix != NULL)) { + xmlBufferWriteCHAR(buf, cur->ns->prefix); + xmlBufferWriteChar(buf, ":"); + } + + xmlBufferWriteCHAR(buf, cur->name); + xmlBufferWriteChar(buf, ">"); +} + +/** + * xmlElemDump: + * @f: the FILE * for the output + * @doc: the document + * @cur: the current node + * + * Dump an XML/HTML node, recursive behaviour,children are printed too. + */ +void +xmlElemDump(FILE *f, xmlDocPtr doc, xmlNodePtr cur) { + xmlBufferPtr buf; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlElemDump : cur == NULL\n"); +#endif + return; + } + if (doc == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlElemDump : doc == NULL\n"); +#endif + } + buf = xmlBufferCreate(); + if (buf == NULL) return; + if ((doc != NULL) && + (doc->type == XML_HTML_DOCUMENT_NODE)) { +#ifdef LIBXML_HTML_ENABLED + htmlNodeDump(buf, doc, cur); +#else + xmlGenericError(xmlGenericErrorContext, + "HTML support not compiled in\n"); +#endif /* LIBXML_HTML_ENABLED */ + } else + xmlNodeDump(buf, doc, cur, 0, 1); + xmlBufferDump(f, buf); + xmlBufferFree(buf); +} + +/************************************************************************ + * * + * Dumping XML tree content to an I/O output buffer * + * * + ************************************************************************/ + +void +xmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int level, int format, const char *encoding); +static void +xmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int level, int format, const char *encoding); +/** + * xmlNsDumpOutput: + * @buf: the XML buffer output + * @cur: a namespace + * + * Dump a local Namespace definition. + * Should be called in the context of attributes dumps. + */ +static void +xmlNsDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNsDump : Ns == NULL\n"); +#endif + return; + } + if ((cur->type == XML_LOCAL_NAMESPACE) && (cur->href != NULL)) { + /* Within the context of an element attributes */ + if (cur->prefix != NULL) { + xmlOutputBufferWriteString(buf, " xmlns:"); + xmlOutputBufferWriteString(buf, (const char *)cur->prefix); + } else + xmlOutputBufferWriteString(buf, " xmlns"); + xmlOutputBufferWriteString(buf, "="); + xmlBufferWriteQuotedString(buf->buffer, cur->href); + } +} + +/** + * xmlNsListDumpOutput: + * @buf: the XML buffer output + * @cur: the first namespace + * + * Dump a list of local Namespace definitions. + * Should be called in the context of attributes dumps. + */ +static void +xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) { + while (cur != NULL) { + xmlNsDumpOutput(buf, cur); + cur = cur->next; + } +} + +/** + * xmlDtdDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @encoding: an optional encoding string + * + * Dump the XML document DTD, if any. + */ +static void +xmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDtdPtr dtd, const char *encoding) { + if (dtd == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlDtdDump : no internal subset\n"); +#endif + return; + } + xmlOutputBufferWriteString(buf, "name); + if (dtd->ExternalID != NULL) { + xmlOutputBufferWriteString(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf->buffer, dtd->ExternalID); + xmlOutputBufferWriteString(buf, " "); + xmlBufferWriteQuotedString(buf->buffer, dtd->SystemID); + } else if (dtd->SystemID != NULL) { + xmlOutputBufferWriteString(buf, " SYSTEM "); + xmlBufferWriteQuotedString(buf->buffer, dtd->SystemID); + } + if ((dtd->entities == NULL) && (dtd->elements == NULL) && + (dtd->attributes == NULL) && (dtd->notations == NULL)) { + xmlOutputBufferWriteString(buf, ">"); + return; + } + xmlOutputBufferWriteString(buf, " [\n"); + xmlNodeListDumpOutput(buf, dtd->doc, dtd->children, -1, 0, encoding); + xmlOutputBufferWriteString(buf, "]>"); +} + +/** + * xmlAttrDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the attribute pointer + * @encoding: an optional encoding string + * + * Dump an XML attribute + */ +static void +xmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, + const char *encoding) { + xmlChar *value; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAttrDump : property == NULL\n"); +#endif + return; + } + xmlOutputBufferWriteString(buf, " "); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + value = xmlNodeListGetString(doc, cur->children, 0); + if (value) { + xmlOutputBufferWriteString(buf, "="); + xmlBufferWriteQuotedString(buf->buffer, value); + xmlFree(value); + } else { + xmlOutputBufferWriteString(buf, "=\"\""); + } +} + +/** + * xmlAttrListDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the first attribute pointer + * @encoding: an optional encoding string + * + * Dump a list of XML attributes + */ +static void +xmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlAttrPtr cur, const char *encoding) { + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlAttrListDump : property == NULL\n"); +#endif + return; + } + while (cur != NULL) { + xmlAttrDumpOutput(buf, doc, cur, encoding); + cur = cur->next; + } +} + + + +/** + * xmlNodeListDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the first node + * @level: the imbrication level for indenting + * @format: is formatting allowed + * @encoding: an optional encoding string + * + * Dump an XML node list, recursive behaviour,children are printed too. + */ +static void +xmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, int level, int format, const char *encoding) { + int i; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeListDump : node == NULL\n"); +#endif + return; + } + while (cur != NULL) { + if ((format) && (xmlIndentTreeOutput) && + (cur->type == XML_ELEMENT_NODE)) + for (i = 0;i < level;i++) + xmlOutputBufferWriteString(buf, " "); + xmlNodeDumpOutput(buf, doc, cur, level, format, encoding); + if (format) { + xmlOutputBufferWriteString(buf, "\n"); + } + cur = cur->next; + } +} + +/** + * xmlNodeDumpOutput: + * @buf: the XML buffer output + * @doc: the document + * @cur: the current node + * @level: the imbrication level for indenting + * @format: is formatting allowed + * @encoding: an optional encoding string + * + * Dump an XML node, recursive behaviour,children are printed too. + */ +void +xmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, + int level, int format, const char *encoding) { + int i; + xmlNodePtr tmp; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlNodeDump : node == NULL\n"); +#endif + return; + } + if (cur->type == XML_XINCLUDE_START) + return; + if (cur->type == XML_XINCLUDE_END) + return; + if (cur->type == XML_DTD_NODE) { + xmlDtdDumpOutput(buf, (xmlDtdPtr) cur, encoding); + return; + } + if (cur->type == XML_ELEMENT_DECL) { + xmlDumpElementDecl(buf->buffer, (xmlElementPtr) cur); + return; + } + if (cur->type == XML_ATTRIBUTE_DECL) { + xmlDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); + return; + } + if (cur->type == XML_ENTITY_DECL) { + xmlDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); + return; + } + if (cur->type == XML_TEXT_NODE) { + if (cur->content != NULL) { + if ((cur->name == xmlStringText) || + (cur->name != xmlStringTextNoenc)) { + xmlChar *buffer; + +#ifndef XML_USE_BUFFER_CONTENT + if (encoding == NULL) + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + else + buffer = xmlEncodeSpecialChars(doc, cur->content); +#else + if (encoding == NULL) + buffer = xmlEncodeEntitiesReentrant(doc, + xmlBufferContent(cur->content)); + else + buffer = xmlEncodeSpecialChars(doc, + xmlBufferContent(cur->content)); +#endif + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } else { + /* + * Disable escaping, needed for XSLT + */ +#ifndef XML_USE_BUFFER_CONTENT + xmlOutputBufferWriteString(buf, (const char *) cur->content); +#else + xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content)); +#endif + } + } + + return; + } + if (cur->type == XML_PI_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, "name); + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, " "); +#ifndef XML_USE_BUFFER_CONTENT + xmlOutputBufferWriteString(buf, (const char *)cur->content); +#else + xmlOutputBufferWriteString(buf, (const char *)xmlBufferContent(cur->content)); +#endif + } + xmlOutputBufferWriteString(buf, "?>"); + } else { + xmlOutputBufferWriteString(buf, "name); + xmlOutputBufferWriteString(buf, "?>"); + } + return; + } + if (cur->type == XML_COMMENT_NODE) { + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, ""); + } + return; + } + if (cur->type == XML_ENTITY_REF_NODE) { + xmlOutputBufferWriteString(buf, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ";"); + return; + } + if (cur->type == XML_CDATA_SECTION_NODE) { + xmlOutputBufferWriteString(buf, "content != NULL) +#ifndef XML_USE_BUFFER_CONTENT + xmlOutputBufferWriteString(buf, (const char *)cur->content); +#else + xmlOutputBufferWriteString(buf, (const char *)xmlBufferContent(cur->content)); +#endif + xmlOutputBufferWriteString(buf, "]]>"); + return; + } + + if (format == 1) { + tmp = cur->children; + while (tmp != NULL) { + if ((tmp->type == XML_TEXT_NODE) || + (tmp->type == XML_ENTITY_REF_NODE)) { + format = 0; + break; + } + tmp = tmp->next; + } + } + xmlOutputBufferWriteString(buf, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->nsDef) + xmlNsListDumpOutput(buf, cur->nsDef); + if (cur->properties != NULL) + xmlAttrListDumpOutput(buf, doc, cur->properties, encoding); + + if ((cur->content == NULL) && (cur->children == NULL) && + (!xmlSaveNoEmptyTags)) { + xmlOutputBufferWriteString(buf, "/>"); + return; + } + xmlOutputBufferWriteString(buf, ">"); + if (cur->content != NULL) { + xmlChar *buffer; + +#ifndef XML_USE_BUFFER_CONTENT + if (encoding == NULL) + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + else + buffer = xmlEncodeSpecialChars(doc, cur->content); +#else + if (encoding == NULL) + buffer = xmlEncodeEntitiesReentrant(doc, + xmlBufferContent(cur->content)); + else + buffer = xmlEncodeSpecialChars(doc, + xmlBufferContent(cur->content)); +#endif + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } + if (cur->children != NULL) { + if (format) xmlOutputBufferWriteString(buf, "\n"); + xmlNodeListDumpOutput(buf, doc, cur->children, + (level >= 0?level+1:-1), format, encoding); + if ((xmlIndentTreeOutput) && (format)) + for (i = 0;i < level;i++) + xmlOutputBufferWriteString(buf, " "); + } + xmlOutputBufferWriteString(buf, "ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); +} + +/** + * xmlDocContentDumpOutput: + * @buf: the XML buffer output + * @cur: the document + * @encoding: an optional encoding string + * @format: should formatting spaces been added + * + * Dump an XML document. + */ +static void +xmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + const char *encoding, int format) { + xmlOutputBufferWriteString(buf, "version != NULL) + xmlBufferWriteQuotedString(buf->buffer, cur->version); + else + xmlOutputBufferWriteString(buf, "\"1.0\""); + if (encoding == NULL) { + if (cur->encoding != NULL) + encoding = (const char *) cur->encoding; + else if (cur->charset != XML_CHAR_ENCODING_UTF8) + encoding = xmlGetCharEncodingName((xmlCharEncoding) cur->charset); + } + if (encoding != NULL) { + xmlOutputBufferWriteString(buf, " encoding="); + xmlBufferWriteQuotedString(buf->buffer, (xmlChar *) encoding); + } + switch (cur->standalone) { + case 0: + xmlOutputBufferWriteString(buf, " standalone=\"no\""); + break; + case 1: + xmlOutputBufferWriteString(buf, " standalone=\"yes\""); + break; + } + xmlOutputBufferWriteString(buf, "?>\n"); + if (cur->children != NULL) { + xmlNodePtr child = cur->children; + + while (child != NULL) { + xmlNodeDumpOutput(buf, cur, child, 0, format, encoding); + xmlOutputBufferWriteString(buf, "\n"); + child = child->next; + } + } +} + +/************************************************************************ + * * + * Saving functions front-ends * + * * + ************************************************************************/ + +/** + * xmlDocDumpMemoryEnc: + * @out_doc: Document to generate XML text from + * @doc_txt_ptr: Memory pointer for allocated XML text + * @doc_txt_len: Length of the generated XML text + * @txt_encoding: Character encoding to use when generating XML text + * @format: should formatting spaces been added + * + * Dump the current DOM tree into memory using the character encoding specified + * by the caller. Note it is up to the caller of this function to free the + * allocated memory. + */ + +void +xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, xmlChar **doc_txt_ptr, + int * doc_txt_len, const char * txt_encoding, int format) { + int dummy = 0; + + xmlCharEncoding doc_charset; + xmlOutputBufferPtr out_buff = NULL; + xmlCharEncodingHandlerPtr conv_hdlr = NULL; + + if (doc_txt_len == NULL) { + doc_txt_len = &dummy; /* Continue, caller just won't get length */ + } + + if (doc_txt_ptr == NULL) { + *doc_txt_len = 0; + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: Null return buffer pointer."); + return; + } + + *doc_txt_ptr = NULL; + *doc_txt_len = 0; + + if (out_doc == NULL) { + /* No document, no output */ + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: Null DOM tree document pointer.\n"); + return; + } + + /* + * Validate the encoding value, if provided. + * This logic is copied from xmlSaveFileEnc. + */ + + if (txt_encoding == NULL) + txt_encoding = (const char *) out_doc->encoding; + if (txt_encoding != NULL) { + doc_charset = xmlParseCharEncoding(txt_encoding); + + if (out_doc->charset != XML_CHAR_ENCODING_UTF8) { + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: Source document not in UTF8\n"); + return; + + } else if (doc_charset != XML_CHAR_ENCODING_UTF8) { + conv_hdlr = xmlFindCharEncodingHandler(txt_encoding); + if ( conv_hdlr == NULL ) { + xmlGenericError(xmlGenericErrorContext, + "%s: %s %s '%s'\n", + "xmlDocDumpFormatMemoryEnc", + "Failed to identify encoding handler for", + "character set", + txt_encoding); + return; + } + } + } + + if ((out_buff = xmlAllocOutputBuffer(conv_hdlr)) == NULL ) { + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: Failed to allocate output buffer.\n"); + return; + } + + xmlDocContentDumpOutput(out_buff, out_doc, txt_encoding, 1); + xmlOutputBufferFlush(out_buff); + if (out_buff->conv != NULL) { + *doc_txt_len = out_buff->conv->use; + *doc_txt_ptr = xmlStrndup(out_buff->conv->content, *doc_txt_len); + } else { + *doc_txt_len = out_buff->buffer->use; + *doc_txt_ptr = xmlStrndup(out_buff->buffer->content, *doc_txt_len); + } + (void)xmlOutputBufferClose(out_buff); + + if ((*doc_txt_ptr == NULL) && (*doc_txt_len > 0)) { + *doc_txt_len = 0; + xmlGenericError(xmlGenericErrorContext, + "xmlDocDumpFormatMemoryEnc: %s\n", + "Failed to allocate memory for document text representation."); + } + + return; +} + +/** + * xmlDocDumpMemory: + * @cur: the document + * @mem: OUT: the memory pointer + * @size: OUT: the memory lenght + * + * Dump an XML document in memory and return the xmlChar * and it's size. + * It's up to the caller to free the memory. + */ +void +xmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { + xmlDocDumpFormatMemoryEnc(cur, mem, size, NULL, 0); +} + +/** + * xmlDocDumpFormatMemory: + * @cur: the document + * @mem: OUT: the memory pointer + * @size: OUT: the memory lenght + * @format: should formatting spaces been added + * + * + * Dump an XML document in memory and return the xmlChar * and it's size. + * It's up to the caller to free the memory. + */ +void +xmlDocDumpFormatMemory(xmlDocPtr cur, xmlChar**mem, int *size, int format) { + xmlDocDumpFormatMemoryEnc(cur, mem, size, NULL, format); +} + +/** + * xmlDocDumpMemoryEnc: + * @out_doc: Document to generate XML text from + * @doc_txt_ptr: Memory pointer for allocated XML text + * @doc_txt_len: Length of the generated XML text + * @txt_encoding: Character encoding to use when generating XML text + * + * Dump the current DOM tree into memory using the character encoding specified + * by the caller. Note it is up to the caller of this function to free the + * allocated memory. + */ + +void +xmlDocDumpMemoryEnc(xmlDocPtr out_doc, xmlChar **doc_txt_ptr, + int * doc_txt_len, const char * txt_encoding) { + xmlDocDumpFormatMemoryEnc(out_doc, doc_txt_ptr, doc_txt_len, + txt_encoding, 1); +} + +/** + * xmlGetDocCompressMode: + * @doc: the document + * + * get the compression ratio for a document, ZLIB based + * Returns 0 (uncompressed) to 9 (max compression) + */ +int +xmlGetDocCompressMode (xmlDocPtr doc) { + if (doc == NULL) return(-1); + return(doc->compression); +} + +/** + * xmlSetDocCompressMode: + * @doc: the document + * @mode: the compression ratio + * + * set the compression ratio for a document, ZLIB based + * Correct values: 0 (uncompressed) to 9 (max compression) + */ +void +xmlSetDocCompressMode (xmlDocPtr doc, int mode) { + if (doc == NULL) return; + if (mode < 0) doc->compression = 0; + else if (mode > 9) doc->compression = 9; + else doc->compression = mode; +} + +/** + * xmlGetCompressMode: + * + * get the default compression mode used, ZLIB based. + * Returns 0 (uncompressed) to 9 (max compression) + */ +int + xmlGetCompressMode(void) { + return(xmlCompressMode); +} + +/** + * xmlSetCompressMode: + * @mode: the compression ratio + * + * set the default compression mode used, ZLIB based + * Correct values: 0 (uncompressed) to 9 (max compression) + */ +void +xmlSetCompressMode(int mode) { + if (mode < 0) xmlCompressMode = 0; + else if (mode > 9) xmlCompressMode = 9; + else xmlCompressMode = mode; +} + +/** + * xmlDocDump: + * @f: the FILE* + * @cur: the document + * + * Dump an XML document to an open FILE. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +xmlDocDump(FILE *f, xmlDocPtr cur) { + xmlOutputBufferPtr buf; + const char * encoding; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + if (cur == NULL) { +#ifdef DEBUG_TREE + xmlGenericError(xmlGenericErrorContext, + "xmlDocDump : document == NULL\n"); +#endif + return(-1); + } + encoding = (const char *) cur->encoding; + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + xmlGenericError(xmlGenericErrorContext, + "xmlDocDump: document not in UTF8\n"); + return(-1); + } + if (enc != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) { + xmlFree((char *) cur->encoding); + cur->encoding = NULL; + } + } + } + buf = xmlOutputBufferCreateFile(f, handler); + if (buf == NULL) return(-1); + xmlDocContentDumpOutput(buf, cur, NULL, 1); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * xmlSaveFileTo: + * @buf: an output I/O buffer + * @cur: the document + * @encoding: the encoding if any assuming the i/O layer handles the trancoding + * + * Dump an XML document to an I/O buffer. + * + * returns: the number of byte written or -1 in case of failure. + */ +int +xmlSaveFileTo(xmlOutputBuffer *buf, xmlDocPtr cur, const char *encoding) { + int ret; + + if (buf == NULL) return(0); + xmlDocContentDumpOutput(buf, cur, encoding, 1); + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * xmlSaveFileEnc: + * @filename: the filename (or URL) + * @cur: the document + * @encoding: the name of an encoding (or NULL) + * + * Dump an XML document, converting it to the given encoding + * + * returns: the number of byte written or -1 in case of failure. + */ +int +xmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { + xmlOutputBufferPtr buf; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveFileEnc: document not in UTF8\n"); + return(-1); + } + if (enc != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) { + return(-1); + } + } + } + + /* + * save the content to a temp buffer. + */ + buf = xmlOutputBufferCreateFilename(filename, handler, 0); + if (buf == NULL) return(-1); + + xmlDocContentDumpOutput(buf, cur, encoding, 1); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + +/** + * xmlSaveFile: + * @filename: the filename (or URL) + * @cur: the document + * + * Dump an XML document to a file. Will use compression if + * compiled in and enabled. If @filename is "-" the stdout file is + * used. + * returns: the number of byte written or -1 in case of failure. + */ +int +xmlSaveFile(const char *filename, xmlDocPtr cur) { + xmlOutputBufferPtr buf; + const char *encoding; + xmlCharEncodingHandlerPtr handler = NULL; + int ret; + + if (cur == NULL) + return(-1); + encoding = (const char *) cur->encoding; + + /* + * save the content to a temp buffer. + */ +#ifdef HAVE_ZLIB_H + if (cur->compression < 0) cur->compression = xmlCompressMode; +#endif + if (encoding != NULL) { + xmlCharEncoding enc; + + enc = xmlParseCharEncoding(encoding); + + if (cur->charset != XML_CHAR_ENCODING_UTF8) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveFile: document not in UTF8\n"); + return(-1); + } + if (enc != XML_CHAR_ENCODING_UTF8) { + handler = xmlFindCharEncodingHandler(encoding); + if (handler == NULL) { + xmlFree((char *) cur->encoding); + cur->encoding = NULL; + } + } + } + + buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); + if (buf == NULL) return(-1); + + xmlDocContentDumpOutput(buf, cur, NULL, 1); + + ret = xmlOutputBufferClose(buf); + return(ret); +} + diff --git a/tree.h b/tree.h new file mode 100644 index 00000000..648817d0 --- /dev/null +++ b/tree.h @@ -0,0 +1,701 @@ +/* + * tree.h : describes the structures found in an tree resulting + * from an XML parsing. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + * + * 14 Nov 2000 ht - added redefinition of xmlBufferWriteChar for VMS + * + */ + +#ifndef __XML_TREE_H__ +#define __XML_TREE_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define XML_XML_NAMESPACE \ + (const xmlChar *) "http://www.w3.org/XML/1998/namespace" + +/* + * The different element types carried by an XML tree + * + * NOTE: This is synchronized with DOM Level1 values + * See http://www.w3.org/TR/REC-DOM-Level-1/ + * + * Actually this had diverged a bit, and now XML_DOCUMENT_TYPE_NODE should + * be deprecated to use an XML_DTD_NODE. + */ +typedef enum { + XML_ELEMENT_NODE= 1, + XML_ATTRIBUTE_NODE= 2, + XML_TEXT_NODE= 3, + XML_CDATA_SECTION_NODE= 4, + XML_ENTITY_REF_NODE= 5, + XML_ENTITY_NODE= 6, + XML_PI_NODE= 7, + XML_COMMENT_NODE= 8, + XML_DOCUMENT_NODE= 9, + XML_DOCUMENT_TYPE_NODE= 10, + XML_DOCUMENT_FRAG_NODE= 11, + XML_NOTATION_NODE= 12, + XML_HTML_DOCUMENT_NODE= 13, + XML_DTD_NODE= 14, + XML_ELEMENT_DECL= 15, + XML_ATTRIBUTE_DECL= 16, + XML_ENTITY_DECL= 17, + XML_NAMESPACE_DECL= 18, + XML_XINCLUDE_START= 19, + XML_XINCLUDE_END= 20 +#ifdef LIBXML_SGML_ENABLED + ,XML_SGML_DOCUMENT_NODE= 21 +#endif +} xmlElementType; + +/* + * Size of an internal character representation. + * + * We use 8bit chars internal representation for memory efficiency, + * Note that with 8 bits wide xmlChars one can still use UTF-8 to handle + * correctly non ISO-Latin input. + */ + +typedef unsigned char xmlChar; + +#ifndef WIN32 +#ifndef CHAR +#define CHAR xmlChar +#endif +#endif + +#define BAD_CAST (xmlChar *) + +/* + * a DTD Notation definition + */ + +typedef struct _xmlNotation xmlNotation; +typedef xmlNotation *xmlNotationPtr; +struct _xmlNotation { + const xmlChar *name; /* Notation name */ + const xmlChar *PublicID; /* Public identifier, if any */ + const xmlChar *SystemID; /* System identifier, if any */ +}; + +/* + * a DTD Attribute definition + */ + +typedef enum { + XML_ATTRIBUTE_CDATA = 1, + XML_ATTRIBUTE_ID, + XML_ATTRIBUTE_IDREF , + XML_ATTRIBUTE_IDREFS, + XML_ATTRIBUTE_ENTITY, + XML_ATTRIBUTE_ENTITIES, + XML_ATTRIBUTE_NMTOKEN, + XML_ATTRIBUTE_NMTOKENS, + XML_ATTRIBUTE_ENUMERATION, + XML_ATTRIBUTE_NOTATION +} xmlAttributeType; + +typedef enum { + XML_ATTRIBUTE_NONE = 1, + XML_ATTRIBUTE_REQUIRED, + XML_ATTRIBUTE_IMPLIED, + XML_ATTRIBUTE_FIXED +} xmlAttributeDefault; + +typedef struct _xmlEnumeration xmlEnumeration; +typedef xmlEnumeration *xmlEnumerationPtr; +struct _xmlEnumeration { + struct _xmlEnumeration *next; /* next one */ + const xmlChar *name; /* Enumeration name */ +}; + +typedef struct _xmlAttribute xmlAttribute; +typedef xmlAttribute *xmlAttributePtr; +struct _xmlAttribute { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */ + const xmlChar *name; /* Attribute name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + struct _xmlAttribute *nexth; /* next in hash table */ + xmlAttributeType atype; /* The attribute type */ + xmlAttributeDefault def; /* the default */ + const xmlChar *defaultValue; /* or the default value */ + xmlEnumerationPtr tree; /* or the enumeration tree if any */ + const xmlChar *prefix; /* the namespace prefix if any */ + const xmlChar *elem; /* Element holding the attribute */ +}; + +/* + * a DTD Element definition. + */ +typedef enum { + XML_ELEMENT_CONTENT_PCDATA = 1, + XML_ELEMENT_CONTENT_ELEMENT, + XML_ELEMENT_CONTENT_SEQ, + XML_ELEMENT_CONTENT_OR +} xmlElementContentType; + +typedef enum { + XML_ELEMENT_CONTENT_ONCE = 1, + XML_ELEMENT_CONTENT_OPT, + XML_ELEMENT_CONTENT_MULT, + XML_ELEMENT_CONTENT_PLUS +} xmlElementContentOccur; + +typedef struct _xmlElementContent xmlElementContent; +typedef xmlElementContent *xmlElementContentPtr; +struct _xmlElementContent { + xmlElementContentType type; /* PCDATA, ELEMENT, SEQ or OR */ + xmlElementContentOccur ocur; /* ONCE, OPT, MULT or PLUS */ + const xmlChar *name; /* Element name */ + struct _xmlElementContent *c1; /* first child */ + struct _xmlElementContent *c2; /* second child */ +}; + +typedef enum { + XML_ELEMENT_TYPE_EMPTY = 1, + XML_ELEMENT_TYPE_ANY, + XML_ELEMENT_TYPE_MIXED, + XML_ELEMENT_TYPE_ELEMENT +} xmlElementTypeVal; + +typedef struct _xmlElement xmlElement; +typedef xmlElement *xmlElementPtr; +struct _xmlElement { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_ELEMENT_DECL, must be second ! */ + const xmlChar *name; /* Element name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + xmlElementTypeVal etype; /* The type */ + xmlElementContentPtr content; /* the allowed element content */ + xmlAttributePtr attributes; /* List of the declared attributes */ + const xmlChar *prefix; /* the namespace prefix if any */ +}; + +/* + * An XML namespace. + * Note that prefix == NULL is valid, it defines the default namespace + * within the subtree (until overriden). + * + * XML_GLOBAL_NAMESPACE is now deprecated for good + * xmlNsType is unified with xmlElementType + */ + +#define XML_LOCAL_NAMESPACE XML_NAMESPACE_DECL +typedef xmlElementType xmlNsType; + +typedef struct _xmlNs xmlNs; +typedef xmlNs *xmlNsPtr; +struct _xmlNs { + struct _xmlNs *next; /* next Ns link for this node */ + xmlNsType type; /* global or local */ + const xmlChar *href; /* URL for the namespace */ + const xmlChar *prefix; /* prefix for the namespace */ +}; + +/* + * An XML DtD, as defined by parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + /* End of common part */ + void *notations; /* Hash table for notations if any */ + void *elements; /* Hash table for elements if any */ + void *attributes; /* Hash table for attributes if any */ + void *entities; /* Hash table for entities if any */ + const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */ + const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */ + void *pentities; /* Hash table for param entities if any */ +}; + +/* + * A attribute of an XML node. + */ +typedef struct _xmlAttr xmlAttr; +typedef xmlAttr *xmlAttrPtr; +struct _xmlAttr { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_ATTRIBUTE_NODE, must be second ! */ + const xmlChar *name; /* the name of the property */ + struct _xmlNode *children; /* the value of the property */ + struct _xmlNode *last; /* NULL */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlAttr *next; /* next sibling link */ + struct _xmlAttr *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + xmlNs *ns; /* pointer to the associated namespace */ + xmlAttributeType atype; /* the attribute type if validating */ +}; + +/* + * An XML ID instance. + */ + +typedef struct _xmlID xmlID; +typedef xmlID *xmlIDPtr; +struct _xmlID { + struct _xmlID *next; /* next ID */ + const xmlChar *value; /* The ID name */ + xmlAttrPtr attr; /* The attribut holding it */ +}; + +/* + * An XML IDREF instance. + */ + +typedef struct _xmlRef xmlRef; +typedef xmlRef *xmlRefPtr; +struct _xmlRef { + struct _xmlRef *next; /* next Ref */ + const xmlChar *value; /* The Ref name */ + xmlAttrPtr attr; /* The attribut holding it */ +}; + +/* + * A buffer structure + */ + +typedef enum { + XML_BUFFER_ALLOC_DOUBLEIT, + XML_BUFFER_ALLOC_EXACT +} xmlBufferAllocationScheme; + +typedef struct _xmlBuffer xmlBuffer; +typedef xmlBuffer *xmlBufferPtr; +struct _xmlBuffer { + xmlChar *content; /* The buffer content UTF8 */ + unsigned int use; /* The buffer size used */ + unsigned int size; /* The buffer size */ + xmlBufferAllocationScheme alloc; /* The realloc method */ +}; + +/* + * A node in an XML tree. + */ +typedef struct _xmlNode xmlNode; +typedef xmlNode *xmlNodePtr; +struct _xmlNode { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* type number, must be second ! */ + const xmlChar *name; /* the name of the node, or the entity */ + struct _xmlNode *children; /* parent->childs link */ + struct _xmlNode *last; /* last child link */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + xmlNs *ns; /* pointer to the associated namespace */ +#ifndef XML_USE_BUFFER_CONTENT + xmlChar *content; /* the content */ +#else + xmlBufferPtr content; /* the content in a buffer */ +#endif + + /* End of common part */ + struct _xmlAttr *properties;/* properties list */ + xmlNs *nsDef; /* namespace definitions on this node */ +}; + +/* + * An XML document. + */ +typedef struct _xmlDoc xmlDoc; +typedef xmlDoc *xmlDocPtr; +struct _xmlDoc { +#ifndef XML_WITHOUT_CORBA + void *_private; /* for Corba, must be first ! */ +#endif + xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */ + char *name; /* name/filename/URI of the document */ + struct _xmlNode *children; /* the document tree */ + struct _xmlNode *last; /* last child link */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* autoreference to itself */ + + /* End of common part */ + int compression;/* level of zlib compression */ + int standalone; /* standalone document (no external refs) */ + struct _xmlDtd *intSubset; /* the document internal subset */ + struct _xmlDtd *extSubset; /* the document external subset */ + struct _xmlNs *oldNs; /* Global namespace, the old way */ + const xmlChar *version; /* the XML version string */ + const xmlChar *encoding; /* external initial encoding, if any */ + void *ids; /* Hash table for ID attributes if any */ + void *refs; /* Hash table for IDREFs attributes if any */ + const xmlChar *URL; /* The URI for that document */ + int charset; /* encoding of the in-memory content + actually an xmlCharEncoding */ +}; + +/* + * Compatibility naming layer with libxml1 + */ +#ifndef xmlChildrenNode +#define xmlChildrenNode children +#define xmlRootNode children +#endif + +/* + * Variables. + */ +LIBXML_DLL_IMPORT extern xmlNsPtr baseDTD; +LIBXML_DLL_IMPORT extern int oldXMLWDcompatibility;/* maintain compatibility with old WD */ +LIBXML_DLL_IMPORT extern int xmlIndentTreeOutput; /* try to indent the tree dumps */ +LIBXML_DLL_IMPORT extern xmlBufferAllocationScheme xmlBufferAllocScheme; /* alloc scheme to use */ +LIBXML_DLL_IMPORT extern int xmlSaveNoEmptyTags; /* save empty tags as */ + +/* + * Handling Buffers. + */ + +xmlBufferPtr xmlBufferCreate (void); +xmlBufferPtr xmlBufferCreateSize (size_t size); +void xmlBufferFree (xmlBufferPtr buf); +int xmlBufferDump (FILE *file, + xmlBufferPtr buf); +void xmlBufferAdd (xmlBufferPtr buf, + const xmlChar *str, + int len); +void xmlBufferAddHead (xmlBufferPtr buf, + const xmlChar *str, + int len); +void xmlBufferCat (xmlBufferPtr buf, + const xmlChar *str); +void xmlBufferCCat (xmlBufferPtr buf, + const char *str); +int xmlBufferShrink (xmlBufferPtr buf, + unsigned int len); +int xmlBufferGrow (xmlBufferPtr buf, + unsigned int len); +void xmlBufferEmpty (xmlBufferPtr buf); +const xmlChar* xmlBufferContent (const xmlBufferPtr buf); +int xmlBufferUse (const xmlBufferPtr buf); +void xmlBufferSetAllocationScheme(xmlBufferPtr buf, + xmlBufferAllocationScheme scheme); +int xmlBufferLength (const xmlBufferPtr buf); + +/* + * Creating/freeing new structures + */ +xmlDtdPtr xmlCreateIntSubset (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlNewDtd (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +xmlDtdPtr xmlGetIntSubset (xmlDocPtr doc); +void xmlFreeDtd (xmlDtdPtr cur); +xmlNsPtr xmlNewGlobalNs (xmlDocPtr doc, + const xmlChar *href, + const xmlChar *prefix); +xmlNsPtr xmlNewNs (xmlNodePtr node, + const xmlChar *href, + const xmlChar *prefix); +void xmlFreeNs (xmlNsPtr cur); +xmlDocPtr xmlNewDoc (const xmlChar *version); +void xmlFreeDoc (xmlDocPtr cur); +xmlAttrPtr xmlNewDocProp (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *value); +xmlAttrPtr xmlNewProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); +xmlAttrPtr xmlNewNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *value); +void xmlFreePropList (xmlAttrPtr cur); +void xmlFreeProp (xmlAttrPtr cur); +xmlAttrPtr xmlCopyProp (xmlNodePtr target, + xmlAttrPtr cur); +xmlAttrPtr xmlCopyPropList (xmlNodePtr target, + xmlAttrPtr cur); +xmlDtdPtr xmlCopyDtd (xmlDtdPtr dtd); +xmlDocPtr xmlCopyDoc (xmlDocPtr doc, + int recursive); + +/* + * Creating new nodes + */ +xmlNodePtr xmlNewDocNode (xmlDocPtr doc, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocRawNode (xmlDocPtr doc, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewNode (xmlNsPtr ns, + const xmlChar *name); +xmlNodePtr xmlNewChild (xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewTextChild (xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocText (xmlDocPtr doc, + const xmlChar *content); +xmlNodePtr xmlNewText (const xmlChar *content); +xmlNodePtr xmlNewPI (const xmlChar *name, + const xmlChar *content); +xmlNodePtr xmlNewDocTextLen (xmlDocPtr doc, + const xmlChar *content, + int len); +xmlNodePtr xmlNewTextLen (const xmlChar *content, + int len); +xmlNodePtr xmlNewDocComment (xmlDocPtr doc, + const xmlChar *content); +xmlNodePtr xmlNewComment (const xmlChar *content); +xmlNodePtr xmlNewCDataBlock (xmlDocPtr doc, + const xmlChar *content, + int len); +xmlNodePtr xmlNewCharRef (xmlDocPtr doc, + const xmlChar *name); +xmlNodePtr xmlNewReference (xmlDocPtr doc, + const xmlChar *name); +xmlNodePtr xmlCopyNode (xmlNodePtr node, + int recursive); +xmlNodePtr xmlCopyNodeList (xmlNodePtr node); +xmlNodePtr xmlNewDocFragment (xmlDocPtr doc); + +/* + * Navigating + */ +xmlNodePtr xmlDocGetRootElement (xmlDocPtr doc); +xmlNodePtr xmlGetLastChild (xmlNodePtr parent); +int xmlNodeIsText (xmlNodePtr node); +int xmlIsBlankNode (xmlNodePtr node); + +/* + * Changing the structure + */ +xmlNodePtr xmlDocSetRootElement (xmlDocPtr doc, + xmlNodePtr root); +void xmlNodeSetName (xmlNodePtr cur, + const xmlChar *name); +xmlNodePtr xmlAddChild (xmlNodePtr parent, + xmlNodePtr cur); +xmlNodePtr xmlAddChildList (xmlNodePtr parent, + xmlNodePtr cur); +xmlNodePtr xmlReplaceNode (xmlNodePtr old, + xmlNodePtr cur); +xmlNodePtr xmlAddSibling (xmlNodePtr cur, + xmlNodePtr elem); +xmlNodePtr xmlAddPrevSibling (xmlNodePtr cur, + xmlNodePtr elem); +xmlNodePtr xmlAddNextSibling (xmlNodePtr cur, + xmlNodePtr elem); +void xmlUnlinkNode (xmlNodePtr cur); +xmlNodePtr xmlTextMerge (xmlNodePtr first, + xmlNodePtr second); +void xmlTextConcat (xmlNodePtr node, + const xmlChar *content, + int len); +void xmlFreeNodeList (xmlNodePtr cur); +void xmlFreeNode (xmlNodePtr cur); +void xmlSetTreeDoc (xmlNodePtr tree, + xmlDocPtr doc); +void xmlSetListDoc (xmlNodePtr list, + xmlDocPtr doc); + +/* + * Namespaces + */ +xmlNsPtr xmlSearchNs (xmlDocPtr doc, + xmlNodePtr node, + const xmlChar *nameSpace); +xmlNsPtr xmlSearchNsByHref (xmlDocPtr doc, + xmlNodePtr node, + const xmlChar *href); +xmlNsPtr * xmlGetNsList (xmlDocPtr doc, + xmlNodePtr node); +void xmlSetNs (xmlNodePtr node, + xmlNsPtr ns); +xmlNsPtr xmlCopyNamespace (xmlNsPtr cur); +xmlNsPtr xmlCopyNamespaceList (xmlNsPtr cur); + +/* + * Changing the content. + */ +xmlAttrPtr xmlSetProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); +xmlChar * xmlGetProp (xmlNodePtr node, + const xmlChar *name); +xmlAttrPtr xmlHasProp (xmlNodePtr node, + const xmlChar *name); +xmlAttrPtr xmlSetNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *value); +xmlChar * xmlGetNsProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *nameSpace); +xmlNodePtr xmlStringGetNodeList (xmlDocPtr doc, + const xmlChar *value); +xmlNodePtr xmlStringLenGetNodeList (xmlDocPtr doc, + const xmlChar *value, + int len); +xmlChar * xmlNodeListGetString (xmlDocPtr doc, + xmlNodePtr list, + int inLine); +xmlChar * xmlNodeListGetRawString (xmlDocPtr doc, + xmlNodePtr list, + int inLine); +void xmlNodeSetContent (xmlNodePtr cur, + const xmlChar *content); +void xmlNodeSetContentLen (xmlNodePtr cur, + const xmlChar *content, + int len); +void xmlNodeAddContent (xmlNodePtr cur, + const xmlChar *content); +void xmlNodeAddContentLen (xmlNodePtr cur, + const xmlChar *content, + int len); +xmlChar * xmlNodeGetContent (xmlNodePtr cur); +xmlChar * xmlNodeGetLang (xmlNodePtr cur); +void xmlNodeSetLang (xmlNodePtr cur, + const xmlChar *lang); +int xmlNodeGetSpacePreserve (xmlNodePtr cur); +void xmlNodeSetSpacePreserve (xmlNodePtr cur, int + val); +xmlChar * xmlNodeGetBase (xmlDocPtr doc, + xmlNodePtr cur); +void xmlNodeSetBase (xmlNodePtr cur, + xmlChar *uri); + +/* + * Removing content. + */ +int xmlRemoveProp (xmlAttrPtr attr); +int xmlRemoveNode (xmlNodePtr node); /* TODO */ + +/* + * Internal, don't use + */ +#ifdef VMS +void xmlBufferWriteXmlCHAR (xmlBufferPtr buf, + const xmlChar *string); +#define xmlBufferWriteCHAR xmlBufferWriteXmlCHAR +#else +void xmlBufferWriteCHAR (xmlBufferPtr buf, + const xmlChar *string); +#endif +void xmlBufferWriteChar (xmlBufferPtr buf, + const char *string); +void xmlBufferWriteQuotedString(xmlBufferPtr buf, + const xmlChar *string); + +/* + * Namespace handling + */ +int xmlReconciliateNs (xmlDocPtr doc, + xmlNodePtr tree); + +/* + * Saving + */ +void xmlDocDumpFormatMemory (xmlDocPtr cur, + xmlChar**mem, + int *size, + int format); +void xmlDocDumpMemory (xmlDocPtr cur, + xmlChar**mem, + int *size); +void xmlDocDumpMemoryEnc (xmlDocPtr out_doc, + xmlChar **doc_txt_ptr, + int * doc_txt_len, + const char *txt_encoding); +void xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, + xmlChar **doc_txt_ptr, + int * doc_txt_len, + const char *txt_encoding, + int format); +int xmlDocDump (FILE *f, + xmlDocPtr cur); +void xmlElemDump (FILE *f, + xmlDocPtr doc, + xmlNodePtr cur); +int xmlSaveFile (const char *filename, + xmlDocPtr cur); +void xmlNodeDump (xmlBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format); + +/* This one is exported from xmlIO.h + +int xmlSaveFileTo (xmlOutputBuffer *buf, + xmlDocPtr cur, + const char *encoding); + */ + +int xmlSaveFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding); + +/* + * Compression + */ +int xmlGetDocCompressMode (xmlDocPtr doc); +void xmlSetDocCompressMode (xmlDocPtr doc, + int mode); +int xmlGetCompressMode (void); +void xmlSetCompressMode (int mode); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_TREE_H__ */ + diff --git a/uri.c b/uri.c new file mode 100644 index 00000000..31875f7f --- /dev/null +++ b/uri.c @@ -0,0 +1,1943 @@ +/** + * uri.c: set of generic URI related routines + * + * Reference: RFC 2396 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifdef WIN32 +#define INCLUDE_WINSOCK +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include + +#include +#include +#include + +/************************************************************************ + * * + * Macros to differenciate various character type * + * directly extracted from RFC 2396 * + * * + ************************************************************************/ + +/* + * alpha = lowalpha | upalpha + */ +#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) + + +/* + * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | + * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | + * "u" | "v" | "w" | "x" | "y" | "z" + */ + +#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) + +/* + * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | + * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | + * "U" | "V" | "W" | "X" | "Y" | "Z" + */ +#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) + +/* + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + */ + +#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) + +/* + * alphanum = alpha | digit + */ + +#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) + +/* + * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | + * "a" | "b" | "c" | "d" | "e" | "f" + */ + +#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \ + (((x) >= 'A') && ((x) <= 'F'))) + +/* + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + */ + +#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ + ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ + ((x) == '(') || ((x) == ')')) + + +/* + * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," + */ + +#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ + ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ + ((x) == '+') || ((x) == '$') || ((x) == ',')) + +/* + * unreserved = alphanum | mark + */ + +#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) + +/* + * escaped = "%" hex hex + */ + +#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \ + (IS_HEX((p)[2]))) + +/* + * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | + * "&" | "=" | "+" | "$" | "," + */ +#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\ + ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\ + ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\ + ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) + +/* + * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | "," + */ +#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\ + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\ + ((*(p) == ','))) + +/* + * rel_segment = 1*( unreserved | escaped | + * ";" | "@" | "&" | "=" | "+" | "$" | "," ) + */ + +#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \ + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ + ((*(p) == ','))) + +/* + * scheme = alpha *( alpha | digit | "+" | "-" | "." ) + */ + +#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \ + ((x) == '+') || ((x) == '-') || ((x) == '.')) + +/* + * reg_name = 1*( unreserved | escaped | "$" | "," | + * ";" | ":" | "@" | "&" | "=" | "+" ) + */ + +#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \ + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \ + ((*(p) == '=')) || ((*(p) == '+'))) + +/* + * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" | + * "+" | "$" | "," ) + */ +#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \ + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \ + ((*(p) == ','))) + +/* + * uric = reserved | unreserved | escaped + */ + +#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \ + (IS_RESERVED(*(p)))) + +/* + * Skip to next pointer char, handle escaped sequences + */ + +#define NEXT(p) ((*p == '%')? p += 3 : p++) + +/* + * Productions from the spec. + * + * authority = server | reg_name + * reg_name = 1*( unreserved | escaped | "$" | "," | + * ";" | ":" | "@" | "&" | "=" | "+" ) + * + * path = [ abs_path | opaque_part ] + */ + +/************************************************************************ + * * + * Generic URI structure functions * + * * + ************************************************************************/ + +/** + * xmlCreateURI: + * + * Simply creates an empty xmlURI + * + * Returns the new structure or NULL in case of error + */ +xmlURIPtr +xmlCreateURI(void) { + xmlURIPtr ret; + + ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCreateURI: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlURI)); + return(ret); +} + +/** + * xmlSaveUri: + * @uri: pointer to an xmlURI + * + * Save the URI as an escaped string + * + * Returns a new string (to be deallocated by caller) + */ +xmlChar * +xmlSaveUri(xmlURIPtr uri) { + xmlChar *ret = NULL; + const char *p; + int len; + int max; + + if (uri == NULL) return(NULL); + + + max = 80; + ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + len = 0; + + if (uri->scheme != NULL) { + p = uri->scheme; + while (*p != 0) { + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = *p++; + } + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = ':'; + } + if (uri->opaque != NULL) { + p = uri->opaque; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) || + ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) || + ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = 0; + } else { + if (uri->server != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '/'; + ret[len++] = '/'; + if (uri->user != NULL) { + p = uri->user; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == ';')) || ((*(p) == ':')) || + ((*(p) == '&')) || ((*(p) == '=')) || + ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '@'; + } + p = uri->server; + while (*p != 0) { + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = *p++; + } + if (uri->port > 0) { + if (len + 10 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + len += sprintf((char *) &ret[len], ":%d", uri->port); + } + } else if (uri->authority != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '/'; + ret[len++] = '/'; + p = uri->authority; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+'))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else if (uri->scheme != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '/'; + ret[len++] = '/'; + } + if (uri->path != NULL) { + p = uri->path; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || + ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (uri->query != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '?'; + p = uri->query; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (uri->fragment != NULL) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = '#'; + p = uri->fragment; + while (*p != 0) { + if (len + 3 >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, + (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (len >= max) { + max *= 2; + ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlSaveUri: out of memory\n"); + return(NULL); + } + } + ret[len++] = 0; + } + return(ret); +} + +/** + * xmlPrintURI: + * @stream: a FILE* for the output + * @uri: pointer to an xmlURI + * + * Prints the URI in the stream @steam. + */ +void +xmlPrintURI(FILE *stream, xmlURIPtr uri) { + xmlChar *out; + + out = xmlSaveUri(uri); + if (out != NULL) { + fprintf(stream, "%s", out); + xmlFree(out); + } +} + +/** + * xmlCleanURI: + * @uri: pointer to an xmlURI + * + * Make sure the xmlURI struct is free of content + */ +void +xmlCleanURI(xmlURIPtr uri) { + if (uri == NULL) return; + + if (uri->scheme != NULL) xmlFree(uri->scheme); + uri->scheme = NULL; + if (uri->server != NULL) xmlFree(uri->server); + uri->server = NULL; + if (uri->user != NULL) xmlFree(uri->user); + uri->user = NULL; + if (uri->path != NULL) xmlFree(uri->path); + uri->path = NULL; + if (uri->fragment != NULL) xmlFree(uri->fragment); + uri->fragment = NULL; + if (uri->opaque != NULL) xmlFree(uri->opaque); + uri->opaque = NULL; + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = NULL; + if (uri->query != NULL) xmlFree(uri->query); + uri->query = NULL; +} + +/** + * xmlFreeURI: + * @uri: pointer to an xmlURI + * + * Free up the xmlURI struct + */ +void +xmlFreeURI(xmlURIPtr uri) { + if (uri == NULL) return; + + if (uri->scheme != NULL) xmlFree(uri->scheme); + if (uri->server != NULL) xmlFree(uri->server); + if (uri->user != NULL) xmlFree(uri->user); + if (uri->path != NULL) xmlFree(uri->path); + if (uri->fragment != NULL) xmlFree(uri->fragment); + if (uri->opaque != NULL) xmlFree(uri->opaque); + if (uri->authority != NULL) xmlFree(uri->authority); + if (uri->query != NULL) xmlFree(uri->query); + memset(uri, -1, sizeof(xmlURI)); + xmlFree(uri); +} + +/************************************************************************ + * * + * Helper functions * + * * + ************************************************************************/ + +#if 0 +/** + * xmlNormalizeURIPath: + * @path: pointer to the path string + * + * applies the 5 normalization steps to a path string + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +int +xmlNormalizeURIPath(char *path) { + int cur, out; + + if (path == NULL) + return(-1); + cur = 0; + out = 0; + while ((path[cur] != 0) && (path[cur] != '/')) cur++; + if (path[cur] == 0) + return(0); + + /* we are positionned at the beginning of the first segment */ + cur++; + out = cur; + + /* + * Analyze each segment in sequence. + */ + while (path[cur] != 0) { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((path[cur] == '.') && (path[cur + 1] == '/')) { + cur += 2; + if (path[cur] == 0) { + path[out++] = 0; + } + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((path[cur] == '.') && (path[cur + 1] == 0)) { + path[out] = 0; + break; + } + + /* read the segment */ + while ((path[cur] != 0) && (path[cur] != '/')) { + path[out++] = path[cur++]; + } + path[out++] = path[cur]; + if (path[cur] != 0) { + cur++; + } + } + + cur = 0; + out = 0; + while ((path[cur] != 0) && (path[cur] != '/')) cur++; + if (path[cur] == 0) + return(0); + /* we are positionned at the beginning of the first segment */ + cur++; + out = cur; + /* + * Analyze each segment in sequence. + */ + while (path[cur] != 0) { + /* + * e) All occurrences of "/../", where is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + */ + if ((cur > 1) && (out > 1) && + (path[cur] == '/') && (path[cur + 1] == '.') && + (path[cur + 2] == '.') && (path[cur + 3] == '/') && + ((path[out] != '.') || (path[out - 1] != '.') || + (path[out - 2] != '/'))) { + cur += 3; + out --; + while ((out > 0) && (path[out] != '/')) { out --; } + path[out] = 0; + continue; + } + + /* + * f) If the buffer string ends with "/..", where + * is a complete path segment not equal to "..", that + * "/.." is removed. + */ + if ((path[cur] == '/') && (path[cur + 1] == '.') && + (path[cur + 2] == '.') && (path[cur + 3] == 0) && + ((path[out] != '.') || (path[out - 1] != '.') || + (path[out - 2] != '/'))) { + cur += 4; + out --; + while ((out > 0) && (path[out - 1] != '/')) { out --; } + path[out] = 0; + continue; + } + + path[out++] = path[cur++]; /* / or 0 */ + } + path[out] = 0; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + cur = 0; + while ((path[cur] == '/') && (path[cur + 1] == '.') && + (path[cur + 2] == '.')) + cur += 3; + if (cur != 0) { + out = 0; + while (path[cur] != 0) path[out++] = path[cur++]; + path[out] = 0; + } + return(0); +} +#else +/** + * xmlNormalizeURIPath: + * @path: pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +int +xmlNormalizeURIPath(char *path) { + char *cur, *out; + + if (path == NULL) + return(-1); + + /* Skip all initial "/" chars. We want to get to the beginning of the + * first non-empty segment. + */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* Keep everything we've seen so far. */ + out = cur; + + /* + * Analyze each segment in sequence for cases (c) and (d). + */ + while (cur[0] != '\0') { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((cur[0] == '.') && (cur[1] == '/')) { + cur += 2; + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((cur[0] == '.') && (cur[1] == '\0')) + break; + + /* Otherwise keep the segment. */ + while (cur[0] != '/') { + if (cur[0] == '\0') + goto done_cd; + (out++)[0] = (cur++)[0]; + } + (out++)[0] = (cur++)[0]; + } + done_cd: + out[0] = '\0'; + + /* Reset to the beginning of the first segment for the next sequence. */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* + * Analyze each segment in sequence for cases (e) and (f). + * + * e) All occurrences of "/../", where is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + * + * f) If the buffer string ends with "/..", where + * is a complete path segment not equal to "..", that + * "/.." is removed. + * + * To satisfy the "iterative" clause in (e), we need to collapse the + * string every time we find something that needs to be removed. Thus, + * we don't need to keep two pointers into the string: we only need a + * "current position" pointer. + */ + while (1) { + char *segp; + + /* At the beginning of each iteration of this loop, "cur" points to + * the first character of the segment we want to examine. + */ + + /* Find the end of the current segment. */ + segp = cur; + while ((segp[0] != '/') && (segp[0] != '\0')) + ++segp; + + /* If this is the last segment, we're done (we need at least two + * segments to meet the criteria for the (e) and (f) cases). + */ + if (segp[0] == '\0') + break; + + /* If the first segment is "..", or if the next segment _isn't_ "..", + * keep this segment and try the next one. + */ + ++segp; + if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) + || ((segp[0] != '.') || (segp[1] != '.') + || ((segp[2] != '/') && (segp[2] != '\0')))) { + cur = segp; + continue; + } + + /* If we get here, remove this segment and the next one and back up + * to the previous segment (if there is one), to implement the + * "iteratively" clause. It's pretty much impossible to back up + * while maintaining two pointers into the buffer, so just compact + * the whole buffer now. + */ + + /* If this is the end of the buffer, we're done. */ + if (segp[2] == '\0') { + cur[0] = '\0'; + break; + } + strcpy(cur, segp + 3); + + /* If there are no previous segments, then keep going from here. */ + segp = cur; + while ((segp > path) && ((--segp)[0] == '/')) + ; + if (segp == path) + continue; + + /* "segp" is pointing to the end of a previous segment; find it's + * start. We need to back up to the previous segment and start + * over with that to handle things like "foo/bar/../..". If we + * don't do this, then on the first pass we'll remove the "bar/..", + * but be pointing at the second ".." so we won't realize we can also + * remove the "foo/..". + */ + cur = segp; + while ((cur > path) && (cur[-1] != '/')) + --cur; + } + out[0] = '\0'; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + if (path[0] == '/') { + cur = path; + while ((cur[1] == '.') && (cur[2] == '.') + && ((cur[3] == '/') || (cur[3] == '\0'))) + cur += 3; + + if (cur != path) { + out = path; + while (cur[0] != '\0') + (out++)[0] = (cur++)[0]; + out[0] = 0; + } + } + + return(0); +} +#endif + +/** + * xmlURIUnescapeString: + * @str: the string to unescape + * @len: the lenght in bytes to unescape (or <= 0 to indicate full string) + * @target: optionnal destination buffer + * + * Unescaping routine, does not do validity checks ! + * Output is direct unsigned char translation of %XX values (no encoding) + * + * Returns an copy of the string, but unescaped + */ +char * +xmlURIUnescapeString(const char *str, int len, char *target) { + char *ret, *out; + const char *in; + + if (str == NULL) + return(NULL); + if (len <= 0) len = strlen(str); + if (len <= 0) return(NULL); + + if (target == NULL) { + ret = (char *) xmlMalloc(len + 1); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlURIUnescapeString: out of memory\n"); + return(NULL); + } + } else + ret = target; + in = str; + out = ret; + while(len > 0) { + if (*in == '%') { + in++; + if ((*in >= '0') && (*in <= '9')) + *out = (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = (*in - 'A') + 10; + in++; + if ((*in >= '0') && (*in <= '9')) + *out = *out * 16 + (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = *out * 16 + (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = *out * 16 + (*in - 'A') + 10; + in++; + len -= 3; + out++; + } else { + *out++ = *in++; + len--; + } + } + *out = 0; + return(ret); +} + +/** + * xmlURIEscape: + * @str: the string of the URI to escape + * + * Escaping routine, does not do validity checks ! + * It will try to escape the chars needing this, but this is heuristic + * based it's impossible to be sure. + * + * Returns an copy of the string, but escaped + */ +xmlChar * +xmlURIEscape(const xmlChar *str) { + xmlChar *ret; + const xmlChar *in; + unsigned int len, out; + + if (str == NULL) + return(NULL); + len = xmlStrlen(str); + if (len <= 0) return(NULL); + + len += 20; + ret = (xmlChar *) xmlMalloc(len); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlURIEscape: out of memory\n"); + return(NULL); + } + in = (const xmlChar *) str; + out = 0; + while(*in != 0) { + if (len - out <= 3) { + len += 20; + ret = (xmlChar *) xmlRealloc(ret, len); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlURIEscape: out of memory\n"); + return(NULL); + } + } + if ((!IS_UNRESERVED(*in)) && (*in != ':') && (*in != '/') && + (*in != '?') && (*in != '#')) { + unsigned char val; + ret[out++] = '%'; + val = *in >> 4; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + val = *in & 0xF; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + in++; + } else { + ret[out++] = *in++; + } + } + ret[out] = 0; + return(ret); +} + +/************************************************************************ + * * + * Escaped URI parsing * + * * + ************************************************************************/ + +/** + * xmlParseURIFragment: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI fragment string and fills in the appropriate fields + * of the @uri structure. + * + * fragment = *uric + * + * Returns 0 or the error code + */ +int +xmlParseURIFragment(xmlURIPtr uri, const char **str) { + const char *cur = *str; + + if (str == NULL) return(-1); + + while (IS_URIC(cur)) NEXT(cur); + if (uri != NULL) { + if (uri->fragment != NULL) xmlFree(uri->fragment); + uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return(0); +} + +/** + * xmlParseURIQuery: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * query = *uric + * + * Returns 0 or the error code + */ +int +xmlParseURIQuery(xmlURIPtr uri, const char **str) { + const char *cur = *str; + + if (str == NULL) return(-1); + + while (IS_URIC(cur)) NEXT(cur); + if (uri != NULL) { + if (uri->query != NULL) xmlFree(uri->query); + uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return(0); +} + +/** + * xmlParseURIScheme: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI scheme + * + * scheme = alpha *( alpha | digit | "+" | "-" | "." ) + * + * Returns 0 or the error code + */ +int +xmlParseURIScheme(xmlURIPtr uri, const char **str) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if (!IS_ALPHA(*cur)) + return(2); + cur++; + while (IS_SCHEME(*cur)) cur++; + if (uri != NULL) { + if (uri->scheme != NULL) xmlFree(uri->scheme); + /* !!! strndup */ + uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return(0); +} + +/** + * xmlParseURIOpaquePart: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI opaque part + * + * opaque_part = uric_no_slash *uric + * + * Returns 0 or the error code + */ +int +xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if (!IS_URIC_NO_SLASH(cur)) { + return(3); + } + NEXT(cur); + while (IS_URIC(cur)) NEXT(cur); + if (uri != NULL) { + if (uri->opaque != NULL) xmlFree(uri->opaque); + uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return(0); +} + +/** + * xmlParseURIServer: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse a server subpart of an URI, it's a finer grain analysis + * of the authority part. + * + * server = [ [ userinfo "@" ] hostport ] + * userinfo = *( unreserved | escaped | + * ";" | ":" | "&" | "=" | "+" | "$" | "," ) + * hostport = host [ ":" port ] + * host = hostname | IPv4address + * hostname = *( domainlabel "." ) toplabel [ "." ] + * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum + * toplabel = alpha | alpha *( alphanum | "-" ) alphanum + * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit + * port = *digit + * + * Returns 0 or the error code + */ +int +xmlParseURIServer(xmlURIPtr uri, const char **str) { + const char *cur; + const char *host, *tmp; + + if (str == NULL) + return(-1); + + cur = *str; + + /* + * is there an userinfo ? + */ + while (IS_USERINFO(cur)) NEXT(cur); + if (*cur == '@') { + if (uri != NULL) { + if (uri->user != NULL) xmlFree(uri->user); + uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); + } + cur++; + } else { + if (uri != NULL) { + if (uri->user != NULL) xmlFree(uri->user); + uri->user = NULL; + } + cur = *str; + } + /* + * This can be empty in the case where there is no server + */ + host = cur; + if (*cur == '/') { + if (uri != NULL) { + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) xmlFree(uri->server); + uri->server = NULL; + uri->port = 0; + } + return(0); + } + /* + * host part of hostport can derive either an IPV4 address + * or an unresolved name. Check the IP first, it easier to detect + * errors if wrong one + */ + if (IS_DIGIT(*cur)) { + while(IS_DIGIT(*cur)) cur++; + if (*cur != '.') + goto host_name; + cur++; + if (!IS_DIGIT(*cur)) + goto host_name; + while(IS_DIGIT(*cur)) cur++; + if (*cur != '.') + goto host_name; + cur++; + if (!IS_DIGIT(*cur)) + goto host_name; + while(IS_DIGIT(*cur)) cur++; + if (*cur != '.') + goto host_name; + cur++; + if (!IS_DIGIT(*cur)) + goto host_name; + while(IS_DIGIT(*cur)) cur++; + if (uri != NULL) { + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) xmlFree(uri->server); + uri->server = xmlURIUnescapeString(host, cur - host, NULL); + } + goto host_done; + } +host_name: + /* + * the hostname production as-is is a parser nightmare. + * simplify it to + * hostname = *( domainlabel "." ) domainlabel [ "." ] + * and just make sure the last label starts with a non numeric char. + */ + if (!IS_ALPHANUM(*cur)) + return(6); + while (IS_ALPHANUM(*cur)) { + while ((IS_ALPHANUM(*cur)) || (*cur == '-')) cur++; + if (*cur == '.') + cur++; + } + tmp = cur; + tmp--; + while (IS_ALPHANUM(*tmp) && (*tmp != '.') && (tmp >= host)) tmp--; + tmp++; + if (!IS_ALPHA(*tmp)) + return(7); + if (uri != NULL) { + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) xmlFree(uri->server); + uri->server = xmlURIUnescapeString(host, cur - host, NULL); + } + +host_done: + + /* + * finish by checking for a port presence. + */ + if (*cur == ':') { + cur++; + if (IS_DIGIT(*cur)) { + if (uri != NULL) + uri->port = 0; + while (IS_DIGIT(*cur)) { + if (uri != NULL) + uri->port = uri->port * 10 + (*cur - '0'); + cur++; + } + } + } + *str = cur; + return(0); +} + +/** + * xmlParseURIRelSegment: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI relative segment + * + * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" | + * "+" | "$" | "," ) + * + * Returns 0 or the error code + */ +int +xmlParseURIRelSegment(xmlURIPtr uri, const char **str) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if (!IS_SEGMENT(cur)) { + return(3); + } + NEXT(cur); + while (IS_SEGMENT(cur)) NEXT(cur); + if (uri != NULL) { + if (uri->path != NULL) xmlFree(uri->path); + uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return(0); +} + +/** + * xmlParseURIPathSegments: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * @slash: should we add a leading slash + * + * Parse an URI set of path segments + * + * path_segments = segment *( "/" segment ) + * segment = *pchar *( ";" param ) + * param = *pchar + * + * Returns 0 or the error code + */ +int +xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + + do { + while (IS_PCHAR(cur)) NEXT(cur); + if (*cur == ';') { + cur++; + while (IS_PCHAR(cur)) NEXT(cur); + } + if (*cur != '/') break; + cur++; + } while (1); + if (uri != NULL) { + int len, len2 = 0; + char *path; + + /* + * Concat the set of path segments to the current path + */ + len = cur - *str; + if (slash) + len++; + + if (uri->path != NULL) { + len2 = strlen(uri->path); + len += len2; + } + path = (char *) xmlMalloc(len + 1); + if (path == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlParseURIPathSegments: out of memory\n"); + *str = cur; + return(-1); + } + if (uri->path != NULL) + memcpy(path, uri->path, len2); + if (slash) { + path[len2] = '/'; + len2++; + } + path[len2] = 0; + if (cur - *str > 0) + xmlURIUnescapeString(*str, cur - *str, &path[len2]); + if (uri->path != NULL) + xmlFree(uri->path); + uri->path = path; + } + *str = cur; + return(0); +} + +/** + * xmlParseURIAuthority: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the authority part of an URI. + * + * authority = server | reg_name + * server = [ [ userinfo "@" ] hostport ] + * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" | + * "@" | "&" | "=" | "+" ) + * + * Note : this is completely ambiguous since reg_name is allowed to + * use the full set of chars in use by server: + * + * 3.2.1. Registry-based Naming Authority + * + * The structure of a registry-based naming authority is specific + * to the URI scheme, but constrained to the allowed characters + * for an authority component. + * + * Returns 0 or the error code + */ +int +xmlParseURIAuthority(xmlURIPtr uri, const char **str) { + const char *cur; + int ret; + + if (str == NULL) + return(-1); + + cur = *str; + + /* + * try first to parse it as a server string. + */ + ret = xmlParseURIServer(uri, str); + if (ret == 0) + return(0); + + /* + * failed, fallback to reg_name + */ + if (!IS_REG_NAME(cur)) { + return(5); + } + NEXT(cur); + while (IS_REG_NAME(cur)) NEXT(cur); + if (uri != NULL) { + if (uri->server != NULL) xmlFree(uri->server); + uri->server = NULL; + if (uri->user != NULL) xmlFree(uri->user); + uri->user = NULL; + if (uri->authority != NULL) xmlFree(uri->authority); + uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); + } + *str = cur; + return(0); +} + +/** + * xmlParseURIHierPart: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI hirarchical part + * + * hier_part = ( net_path | abs_path ) [ "?" query ] + * abs_path = "/" path_segments + * net_path = "//" authority [ abs_path ] + * + * Returns 0 or the error code + */ +int +xmlParseURIHierPart(xmlURIPtr uri, const char **str) { + int ret; + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + + if ((cur[0] == '/') && (cur[1] == '/')) { + cur += 2; + ret = xmlParseURIAuthority(uri, &cur); + if (ret != 0) + return(ret); + if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } + } else if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } else { + return(4); + } + if (ret != 0) + return(ret); + if (*cur == '?') { + cur++; + ret = xmlParseURIQuery(uri, &cur); + if (ret != 0) + return(ret); + } + *str = cur; + return(0); +} + +/** + * xmlParseAbsoluteURI: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * absoluteURI = scheme ":" ( hier_part | opaque_part ) + * + * Returns 0 or the error code + */ +int +xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) { + int ret; + + if (str == NULL) + return(-1); + + ret = xmlParseURIScheme(uri, str); + if (ret != 0) return(ret); + if (**str != ':') + return(1); + (*str)++; + if (**str == '/') + return(xmlParseURIHierPart(uri, str)); + return(xmlParseURIOpaquePart(uri, str)); +} + +/** + * xmlParseRelativeURI: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an relative URI string and fills in the appropriate fields + * of the @uri structure + * + * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + * abs_path = "/" path_segments + * net_path = "//" authority [ abs_path ] + * rel_path = rel_segment [ abs_path ] + * + * Returns 0 or the error code + */ +int +xmlParseRelativeURI(xmlURIPtr uri, const char **str) { + int ret = 0; + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if ((cur[0] == '/') && (cur[1] == '/')) { + cur += 2; + ret = xmlParseURIAuthority(uri, &cur); + if (ret != 0) + return(ret); + if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } + } else if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } else if (cur[0] != '#' && cur[0] != '?') { + ret = xmlParseURIRelSegment(uri, &cur); + if (ret != 0) + return(ret); + if (cur[0] == '/') { + cur++; + ret = xmlParseURIPathSegments(uri, &cur, 1); + } + } + if (ret != 0) + return(ret); + if (*cur == '?') { + cur++; + ret = xmlParseURIQuery(uri, &cur); + if (ret != 0) + return(ret); + } + *str = cur; + return(ret); +} + +/** + * xmlParseURIReference: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns 0 or the error code + */ +int +xmlParseURIReference(xmlURIPtr uri, const char *str) { + int ret; + const char *tmp = str; + + if (str == NULL) + return(-1); + xmlCleanURI(uri); + + /* + * Try first to parse aboslute refs, then fallback to relative if + * it fails. + */ + ret = xmlParseAbsoluteURI(uri, &str); + if (ret != 0) { + xmlCleanURI(uri); + str = tmp; + ret = xmlParseRelativeURI(uri, &str); + } + if (ret != 0) { + xmlCleanURI(uri); + return(ret); + } + + if (*str == '#') { + str++; + ret = xmlParseURIFragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + xmlCleanURI(uri); + return(1); + } + return(0); +} + +/** + * xmlParseURI: + * @str: the URI string to analyze + * + * Parse an URI + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns a newly build xmlURIPtr or NULL in case of error + */ +xmlURIPtr +xmlParseURI(const char *str) { + xmlURIPtr uri; + int ret; + + if (str == NULL) + return(NULL); + uri = xmlCreateURI(); + if (uri != NULL) { + ret = xmlParseURIReference(uri, str); + if (ret) { + xmlFreeURI(uri); + return(NULL); + } + } + return(uri); +} + +/************************************************************************ + * * + * Public functions * + * * + ************************************************************************/ + +/** + * xmlBuildURI: + * @URI: the URI instance found in the document + * @base: the base value + * + * Computes he final URI of the reference done by checking that + * the given URI is valid, and building the final URI using the + * base URI. This is processed according to section 5.2 of the + * RFC 2396 + * + * 5.2. Resolving Relative References to Absolute Form + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * of error. + */ +xmlChar * +xmlBuildURI(const xmlChar *URI, const xmlChar *base) { + xmlChar *val = NULL; + int ret, len, index, cur, out; + xmlURIPtr ref = NULL; + xmlURIPtr bas = NULL; + xmlURIPtr res = NULL; + + /* + * 1) The URI reference is parsed into the potential four components and + * fragment identifier, as described in Section 4.3. + * + * NOTE that a completely empty URI is treated by modern browsers + * as a reference to "." rather than as a synonym for the current + * URI. Should we do that here? + */ + if (URI == NULL) + ret = -1; + else { + if (*URI) { + ref = xmlCreateURI(); + if (ref == NULL) + goto done; + ret = xmlParseURIReference(ref, (const char *) URI); + } + else + ret = 0; + } + if (ret != 0) + goto done; + if (base == NULL) + ret = -1; + else { + bas = xmlCreateURI(); + if (bas == NULL) + goto done; + ret = xmlParseURIReference(bas, (const char *) base); + } + if (ret != 0) { + if (ref) + val = xmlSaveUri(ref); + goto done; + } + if (ref == NULL) { + /* + * the base fragment must be ignored + */ + if (bas->fragment != NULL) { + xmlFree(bas->fragment); + bas->fragment = NULL; + } + val = xmlSaveUri(bas); + goto done; + } + + /* + * 2) If the path component is empty and the scheme, authority, and + * query components are undefined, then it is a reference to the + * current document and we are done. Otherwise, the reference URI's + * query and fragment components are defined as found (or not found) + * within the URI reference and not inherited from the base URI. + * + * NOTE that in modern browsers, the parsing differs from the above + * in the following aspect: the query component is allowed to be + * defined while still treating this as a reference to the current + * document. + */ + res = xmlCreateURI(); + if (res == NULL) + goto done; + if ((ref->scheme == NULL) && (ref->path == NULL) && + ((ref->authority == NULL) && (ref->server == NULL))) { + if (bas->scheme != NULL) + res->scheme = xmlMemStrdup(bas->scheme); + if (bas->authority != NULL) + res->authority = xmlMemStrdup(bas->authority); + else if (bas->server != NULL) { + res->server = xmlMemStrdup(bas->server); + if (bas->user != NULL) + res->user = xmlMemStrdup(bas->user); + res->port = bas->port; + } + if (bas->path != NULL) + res->path = xmlMemStrdup(bas->path); + if (ref->query != NULL) + res->query = xmlMemStrdup(ref->query); + else if (bas->query != NULL) + res->query = xmlMemStrdup(bas->query); + if (ref->fragment != NULL) + res->fragment = xmlMemStrdup(ref->fragment); + goto step_7; + } + + if (ref->query != NULL) + res->query = xmlMemStrdup(ref->query); + if (ref->fragment != NULL) + res->fragment = xmlMemStrdup(ref->fragment); + + /* + * 3) If the scheme component is defined, indicating that the reference + * starts with a scheme name, then the reference is interpreted as an + * absolute URI and we are done. Otherwise, the reference URI's + * scheme is inherited from the base URI's scheme component. + */ + if (ref->scheme != NULL) { + val = xmlSaveUri(ref); + goto done; + } + if (bas->scheme != NULL) + res->scheme = xmlMemStrdup(bas->scheme); + + /* + * 4) If the authority component is defined, then the reference is a + * network-path and we skip to step 7. Otherwise, the reference + * URI's authority is inherited from the base URI's authority + * component, which will also be undefined if the URI scheme does not + * use an authority component. + */ + if ((ref->authority != NULL) || (ref->server != NULL)) { + if (ref->authority != NULL) + res->authority = xmlMemStrdup(ref->authority); + else { + res->server = xmlMemStrdup(ref->server); + if (ref->user != NULL) + res->user = xmlMemStrdup(ref->user); + res->port = ref->port; + } + if (ref->path != NULL) + res->path = xmlMemStrdup(ref->path); + goto step_7; + } + if (bas->authority != NULL) + res->authority = xmlMemStrdup(bas->authority); + else if (bas->server != NULL) { + res->server = xmlMemStrdup(bas->server); + if (bas->user != NULL) + res->user = xmlMemStrdup(bas->user); + res->port = bas->port; + } + + /* + * 5) If the path component begins with a slash character ("/"), then + * the reference is an absolute-path and we skip to step 7. + */ + if ((ref->path != NULL) && (ref->path[0] == '/')) { + res->path = xmlMemStrdup(ref->path); + goto step_7; + } + + + /* + * 6) If this step is reached, then we are resolving a relative-path + * reference. The relative path needs to be merged with the base + * URI's path. Although there are many ways to do this, we will + * describe a simple method using a separate string buffer. + * + * Allocate a buffer large enough for the result string. + */ + len = 2; /* extra / and 0 */ + if (ref->path != NULL) + len += strlen(ref->path); + if (bas->path != NULL) + len += strlen(bas->path); + res->path = (char *) xmlMalloc(len); + if (res->path == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlBuildURI: out of memory\n"); + goto done; + } + res->path[0] = 0; + + /* + * a) All but the last segment of the base URI's path component is + * copied to the buffer. In other words, any characters after the + * last (right-most) slash character, if any, are excluded. + */ + cur = 0; + out = 0; + if (bas->path != NULL) { + while (bas->path[cur] != 0) { + while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) + cur++; + if (bas->path[cur] == 0) + break; + + cur++; + while (out < cur) { + res->path[out] = bas->path[out]; + out++; + } + } + } + res->path[out] = 0; + + /* + * b) The reference's path component is appended to the buffer + * string. + */ + if (ref->path != NULL && ref->path[0] != 0) { + index = 0; + /* + * Ensure the path includes a '/' + */ + if ((out == 0) && (bas->server != NULL)) + res->path[out++] = '/'; + while (ref->path[index] != 0) { + res->path[out++] = ref->path[index++]; + } + } + res->path[out] = 0; + + /* + * Steps c) to h) are really path normalization steps + */ + xmlNormalizeURIPath(res->path); + +step_7: + + /* + * 7) The resulting URI components, including any inherited from the + * base URI, are recombined to give the absolute form of the URI + * reference. + */ + val = xmlSaveUri(res); + +done: + if (ref != NULL) + xmlFreeURI(ref); + if (bas != NULL) + xmlFreeURI(bas); + if (res != NULL) + xmlFreeURI(res); + return(val); +} + + diff --git a/uri.h b/uri.h new file mode 100644 index 00000000..e7aeda41 --- /dev/null +++ b/uri.h @@ -0,0 +1,61 @@ +/** + * uri.c: library of generic URI related routines + * + * Reference: RFC 2396 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_URI_H__ +#define __XML_URI_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * + */ +typedef struct _xmlURI xmlURI; +typedef xmlURI *xmlURIPtr; +struct _xmlURI { + char *scheme; + char *opaque; + char *authority; + char *server; + char *user; + int port; + char *path; + char *query; + char *fragment; +}; + +/* + * This function is in tree.h: + * xmlChar * xmlNodeGetBase (xmlDocPtr doc, + * xmlNodePtr cur); + */ +xmlURIPtr xmlCreateURI (void); +xmlChar * xmlBuildURI (const xmlChar *URI, + const xmlChar *base); +xmlURIPtr xmlParseURI (const char *URI); +int xmlParseURIReference (xmlURIPtr uri, + const char *str); +xmlChar * xmlSaveUri (xmlURIPtr uri); +void xmlPrintURI (FILE *stream, + xmlURIPtr uri); +char * xmlURIUnescapeString (const char *str, + int len, + char *target); +int xmlNormalizeURIPath (char *path); +xmlChar * xmlURIEscape (const xmlChar *str); +void xmlFreeURI (xmlURIPtr uri); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_URI_H__ */ diff --git a/valid.c b/valid.c new file mode 100644 index 00000000..1b7e6f65 --- /dev/null +++ b/valid.c @@ -0,0 +1,4098 @@ +/* + * valid.c : part of the code use to do the DTD handling and the validity + * checking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include + +#ifdef HAVE_STDLIB_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +/* + * Generic function for accessing stacks in the Validity Context + */ + +#define PUSH_AND_POP(scope, type, name) \ +scope int name##VPush(xmlValidCtxtPtr ctxt, type value) { \ + if (ctxt->name##Nr >= ctxt->name##Max) { \ + ctxt->name##Max *= 2; \ + ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ + ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ + if (ctxt->name##Tab == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "realloc failed !\n"); \ + return(0); \ + } \ + } \ + ctxt->name##Tab[ctxt->name##Nr] = value; \ + ctxt->name = value; \ + return(ctxt->name##Nr++); \ +} \ +scope type name##VPop(xmlValidCtxtPtr ctxt) { \ + type ret; \ + if (ctxt->name##Nr <= 0) return(0); \ + ctxt->name##Nr--; \ + if (ctxt->name##Nr > 0) \ + ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ + else \ + ctxt->name = NULL; \ + ret = ctxt->name##Tab[ctxt->name##Nr]; \ + ctxt->name##Tab[ctxt->name##Nr] = 0; \ + return(ret); \ +} \ + +PUSH_AND_POP(static, xmlNodePtr, node) + +/* #define DEBUG_VALID_ALGO */ + +#ifdef DEBUG_VALID_ALGO +void xmlValidPrintNodeList(xmlNodePtr cur) { + if (cur == NULL) + xmlGenericError(xmlGenericErrorContext, "null "); + while (cur != NULL) { + switch (cur->type) { + case XML_ELEMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "%s ", cur->name); + break; + case XML_TEXT_NODE: + xmlGenericError(xmlGenericErrorContext, "text "); + break; + case XML_CDATA_SECTION_NODE: + xmlGenericError(xmlGenericErrorContext, "cdata "); + break; + case XML_ENTITY_REF_NODE: + xmlGenericError(xmlGenericErrorContext, "&%s; ", cur->name); + break; + case XML_PI_NODE: + xmlGenericError(xmlGenericErrorContext, "pi(%s) ", cur->name); + break; + case XML_COMMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "comment "); + break; + case XML_ATTRIBUTE_NODE: + xmlGenericError(xmlGenericErrorContext, "?attr? "); + break; + case XML_ENTITY_NODE: + xmlGenericError(xmlGenericErrorContext, "?ent? "); + break; + case XML_DOCUMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "?doc? "); + break; + case XML_DOCUMENT_TYPE_NODE: + xmlGenericError(xmlGenericErrorContext, "?doctype? "); + break; + case XML_DOCUMENT_FRAG_NODE: + xmlGenericError(xmlGenericErrorContext, "?frag? "); + break; + case XML_NOTATION_NODE: + xmlGenericError(xmlGenericErrorContext, "?nota? "); + break; + case XML_HTML_DOCUMENT_NODE: + xmlGenericError(xmlGenericErrorContext, "?html? "); + break; + case XML_DTD_NODE: + xmlGenericError(xmlGenericErrorContext, "?dtd? "); + break; + case XML_ELEMENT_DECL: + xmlGenericError(xmlGenericErrorContext, "?edecl? "); + break; + case XML_ATTRIBUTE_DECL: + xmlGenericError(xmlGenericErrorContext, "?adecl? "); + break; + case XML_ENTITY_DECL: + xmlGenericError(xmlGenericErrorContext, "?entdecl? "); + break; + } + cur = cur->next; + } +} + +void xmlValidDebug(xmlNodePtr cur, xmlElementContentPtr cont) { + char expr[1000]; + + expr[0] = 0; + xmlGenericError(xmlGenericErrorContext, "valid: "); + xmlValidPrintNodeList(cur); + xmlGenericError(xmlGenericErrorContext, "against "); + xmlSprintfElementContent(expr, cont, 0); + xmlGenericError(xmlGenericErrorContext, "%s\n", expr); +} + +#define DEBUG_VALID_STATE(n,c) xmlValidDebug(n,c); +#else +#define DEBUG_VALID_STATE(n,c) +#endif + +/* TODO: use hash table for accesses to elem and attribute dedinitions */ + +#define VERROR \ + if ((ctxt != NULL) && (ctxt->error != NULL)) ctxt->error + +#define VWARNING \ + if ((ctxt != NULL) && (ctxt->warning != NULL)) ctxt->warning + +#define CHECK_DTD \ + if (doc == NULL) return(0); \ + else if ((doc->intSubset == NULL) && \ + (doc->extSubset == NULL)) return(0) + +xmlElementPtr xmlGetDtdElementDesc(xmlDtdPtr dtd, const xmlChar *name); +xmlAttributePtr xmlScanAttributeDecl(xmlDtdPtr dtd, const xmlChar *elem); + +/************************************************************************ + * * + * QName handling helper * + * * + ************************************************************************/ + +/** + * xmlSplitQName2: + * @name: an XML parser context + * @prefix: a xmlChar ** + * + * parse an XML qualified name string + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns NULL if not a QName, otherwise the local part, and prefix + * is updated to get the Prefix if any. + */ + +xmlChar * +xmlSplitQName2(const xmlChar *name, xmlChar **prefix) { + int len = 0; + xmlChar *ret = NULL; + + *prefix = NULL; + + /* xml: prefix is not really a namespace */ + if ((name[0] == 'x') && (name[1] == 'm') && + (name[2] == 'l') && (name[3] == ':')) + return(NULL); + + /* nasty but valid */ + if (name[0] == ':') + return(NULL); + + /* + * we are not trying to validate but just to cut, and yes it will + * work even if this is as set of UTF-8 encoded chars + */ + while ((name[len] != 0) && (name[len] != ':')) + len++; + + if (name[len] == 0) + return(NULL); + + *prefix = xmlStrndup(name, len); + ret = xmlStrdup(&name[len + 1]); + + return(ret); +} + +/**************************************************************** + * * + * Util functions for data allocation/deallocation * + * * + ****************************************************************/ + +/** + * xmlNewElementContent: + * @name: the subelement name or NULL + * @type: the type of element content decl + * + * Allocate an element content structure. + * + * Returns NULL if not, othervise the new element content structure + */ +xmlElementContentPtr +xmlNewElementContent(xmlChar *name, xmlElementContentType type) { + xmlElementContentPtr ret; + + switch(type) { + case XML_ELEMENT_CONTENT_ELEMENT: + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewElementContent : name == NULL !\n"); + } + break; + case XML_ELEMENT_CONTENT_PCDATA: + case XML_ELEMENT_CONTENT_SEQ: + case XML_ELEMENT_CONTENT_OR: + if (name != NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewElementContent : name != NULL !\n"); + } + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlNewElementContent: unknown type %d\n", type); + return(NULL); + } + ret = (xmlElementContentPtr) xmlMalloc(sizeof(xmlElementContent)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlNewElementContent : out of memory!\n"); + return(NULL); + } + ret->type = type; + ret->ocur = XML_ELEMENT_CONTENT_ONCE; + if (name != NULL) + ret->name = xmlStrdup(name); + else + ret->name = NULL; + ret->c1 = ret->c2 = NULL; + return(ret); +} + +/** + * xmlCopyElementContent: + * @content: An element content pointer. + * + * Build a copy of an element content description. + * + * Returns the new xmlElementContentPtr or NULL in case of error. + */ +xmlElementContentPtr +xmlCopyElementContent(xmlElementContentPtr cur) { + xmlElementContentPtr ret; + + if (cur == NULL) return(NULL); + ret = xmlNewElementContent((xmlChar *) cur->name, cur->type); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyElementContent : out of memory\n"); + return(NULL); + } + ret->ocur = cur->ocur; + if (cur->c1 != NULL) ret->c1 = xmlCopyElementContent(cur->c1); + if (cur->c2 != NULL) ret->c2 = xmlCopyElementContent(cur->c2); + return(ret); +} + +/** + * xmlFreeElementContent: + * @cur: the element content tree to free + * + * Free an element content structure. This is a recursive call ! + */ +void +xmlFreeElementContent(xmlElementContentPtr cur) { + if (cur == NULL) return; + switch (cur->type) { + case XML_ELEMENT_CONTENT_PCDATA: + case XML_ELEMENT_CONTENT_ELEMENT: + case XML_ELEMENT_CONTENT_SEQ: + case XML_ELEMENT_CONTENT_OR: + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlFreeElementContent : type %d\n", cur->type); + return; + } + if (cur->c1 != NULL) xmlFreeElementContent(cur->c1); + if (cur->c2 != NULL) xmlFreeElementContent(cur->c2); + if (cur->name != NULL) xmlFree((xmlChar *) cur->name); + memset(cur, -1, sizeof(xmlElementContent)); + xmlFree(cur); +} + +/** + * xmlDumpElementContent: + * @buf: An XML buffer + * @content: An element table + * @glob: 1 if one must print the englobing parenthesis, 0 otherwise + * + * This will dump the content of the element table as an XML DTD definition + */ +void +xmlDumpElementContent(xmlBufferPtr buf, xmlElementContentPtr content, int glob) { + if (content == NULL) return; + + if (glob) xmlBufferWriteChar(buf, "("); + switch (content->type) { + case XML_ELEMENT_CONTENT_PCDATA: + xmlBufferWriteChar(buf, "#PCDATA"); + break; + case XML_ELEMENT_CONTENT_ELEMENT: + xmlBufferWriteCHAR(buf, content->name); + break; + case XML_ELEMENT_CONTENT_SEQ: + if ((content->c1->type == XML_ELEMENT_CONTENT_OR) || + (content->c1->type == XML_ELEMENT_CONTENT_SEQ)) + xmlDumpElementContent(buf, content->c1, 1); + else + xmlDumpElementContent(buf, content->c1, 0); + xmlBufferWriteChar(buf, " , "); + if (content->c2->type == XML_ELEMENT_CONTENT_OR) + xmlDumpElementContent(buf, content->c2, 1); + else + xmlDumpElementContent(buf, content->c2, 0); + break; + case XML_ELEMENT_CONTENT_OR: + if ((content->c1->type == XML_ELEMENT_CONTENT_OR) || + (content->c1->type == XML_ELEMENT_CONTENT_SEQ)) + xmlDumpElementContent(buf, content->c1, 1); + else + xmlDumpElementContent(buf, content->c1, 0); + xmlBufferWriteChar(buf, " | "); + if (content->c2->type == XML_ELEMENT_CONTENT_SEQ) + xmlDumpElementContent(buf, content->c2, 1); + else + xmlDumpElementContent(buf, content->c2, 0); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpElementContent: unknown type %d\n", + content->type); + } + if (glob) + xmlBufferWriteChar(buf, ")"); + switch (content->ocur) { + case XML_ELEMENT_CONTENT_ONCE: + break; + case XML_ELEMENT_CONTENT_OPT: + xmlBufferWriteChar(buf, "?"); + break; + case XML_ELEMENT_CONTENT_MULT: + xmlBufferWriteChar(buf, "*"); + break; + case XML_ELEMENT_CONTENT_PLUS: + xmlBufferWriteChar(buf, "+"); + break; + } +} + +/** + * xmlSprintfElementContent: + * @buf: an output buffer + * @content: An element table + * @glob: 1 if one must print the englobing parenthesis, 0 otherwise + * + * This will dump the content of the element content definition + * Intended just for the debug routine + */ +void +xmlSprintfElementContent(char *buf, xmlElementContentPtr content, int glob) { + if (content == NULL) return; + if (glob) strcat(buf, "("); + switch (content->type) { + case XML_ELEMENT_CONTENT_PCDATA: + strcat(buf, "#PCDATA"); + break; + case XML_ELEMENT_CONTENT_ELEMENT: + strcat(buf, (char *) content->name); + break; + case XML_ELEMENT_CONTENT_SEQ: + if ((content->c1->type == XML_ELEMENT_CONTENT_OR) || + (content->c1->type == XML_ELEMENT_CONTENT_SEQ)) + xmlSprintfElementContent(buf, content->c1, 1); + else + xmlSprintfElementContent(buf, content->c1, 0); + strcat(buf, " , "); + if (content->c2->type == XML_ELEMENT_CONTENT_OR) + xmlSprintfElementContent(buf, content->c2, 1); + else + xmlSprintfElementContent(buf, content->c2, 0); + break; + case XML_ELEMENT_CONTENT_OR: + if ((content->c1->type == XML_ELEMENT_CONTENT_OR) || + (content->c1->type == XML_ELEMENT_CONTENT_SEQ)) + xmlSprintfElementContent(buf, content->c1, 1); + else + xmlSprintfElementContent(buf, content->c1, 0); + strcat(buf, " | "); + if (content->c2->type == XML_ELEMENT_CONTENT_SEQ) + xmlSprintfElementContent(buf, content->c2, 1); + else + xmlSprintfElementContent(buf, content->c2, 0); + break; + } + if (glob) + strcat(buf, ")"); + switch (content->ocur) { + case XML_ELEMENT_CONTENT_ONCE: + break; + case XML_ELEMENT_CONTENT_OPT: + strcat(buf, "?"); + break; + case XML_ELEMENT_CONTENT_MULT: + strcat(buf, "*"); + break; + case XML_ELEMENT_CONTENT_PLUS: + strcat(buf, "+"); + break; + } +} + +/**************************************************************** + * * + * Registration of DTD declarations * + * * + ****************************************************************/ + +/** + * xmlCreateElementTable: + * + * create and initialize an empty element hash table. + * + * Returns the xmlElementTablePtr just created or NULL in case of error. + */ +xmlElementTablePtr +xmlCreateElementTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlFreeElement: + * @elem: An element + * + * Deallocate the memory used by an element definition + */ +void +xmlFreeElement(xmlElementPtr elem) { + if (elem == NULL) return; + xmlUnlinkNode((xmlNodePtr) elem); + xmlFreeElementContent(elem->content); + if (elem->name != NULL) + xmlFree((xmlChar *) elem->name); + if (elem->prefix != NULL) + xmlFree((xmlChar *) elem->prefix); + memset(elem, -1, sizeof(xmlElement)); + xmlFree(elem); +} + + +/** + * xmlAddElementDecl: + * @ctxt: the validation context + * @dtd: pointer to the DTD + * @name: the entity name + * @type: the element type + * @content: the element content tree or NULL + * + * Register a new element declaration + * + * Returns NULL if not, othervise the entity + */ +xmlElementPtr +xmlAddElementDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *name, + xmlElementTypeVal type, + xmlElementContentPtr content) { + xmlElementPtr ret; + xmlElementTablePtr table; + xmlChar *ns, *uqname; + + if (dtd == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: dtd == NULL\n"); + return(NULL); + } + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: name == NULL\n"); + return(NULL); + } + switch (type) { + case XML_ELEMENT_TYPE_EMPTY: + if (content != NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: content != NULL for EMPTY\n"); + return(NULL); + } + break; + case XML_ELEMENT_TYPE_ANY: + if (content != NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: content != NULL for ANY\n"); + return(NULL); + } + break; + case XML_ELEMENT_TYPE_MIXED: + if (content == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: content == NULL for MIXED\n"); + return(NULL); + } + break; + case XML_ELEMENT_TYPE_ELEMENT: + if (content == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: content == NULL for ELEMENT\n"); + return(NULL); + } + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: unknown type %d\n", type); + return(NULL); + } + + /* + * check if name is a QName + */ + uqname = xmlSplitQName2(name, &ns); + if (uqname != NULL) + name = uqname; + + /* + * Create the Element table if needed. + */ + table = (xmlElementTablePtr) dtd->elements; + if (table == NULL) { + table = xmlCreateElementTable(); + dtd->elements = (void *) table; + } + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: Table creation failed!\n"); + return(NULL); + } + + ret = (xmlElementPtr) xmlMalloc(sizeof(xmlElement)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddElementDecl: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlElement)); + ret->type = XML_ELEMENT_DECL; + + /* + * fill the structure. + */ + ret->etype = type; + ret->name = xmlStrdup(name); + ret->prefix = ns; + ret->content = xmlCopyElementContent(content); + ret->attributes = xmlScanAttributeDecl(dtd, name); + + /* + * Validity Check: + * Insertion must not fail + */ + if (xmlHashAddEntry2(table, name, ns, ret)) { + /* + * The element is already defined in this Dtd. + */ + VERROR(ctxt->userData, "Redefinition of element %s\n", name); + xmlFreeElement(ret); + if (uqname != NULL) + xmlFree(uqname); + return(NULL); + } + + /* + * Link it to the Dtd + */ + ret->parent = dtd; + ret->doc = dtd->doc; + if (dtd->last == NULL) { + dtd->children = dtd->last = (xmlNodePtr) ret; + } else { + dtd->last->next = (xmlNodePtr) ret; + ret->prev = dtd->last; + dtd->last = (xmlNodePtr) ret; + } + if (uqname != NULL) + xmlFree(uqname); + return(ret); +} + +/** + * xmlFreeElementTable: + * @table: An element table + * + * Deallocate the memory used by an element hash table. + */ +void +xmlFreeElementTable(xmlElementTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeElement); +} + +/** + * xmlCopyElement: + * @elem: An element + * + * Build a copy of an element. + * + * Returns the new xmlElementPtr or NULL in case of error. + */ +xmlElementPtr +xmlCopyElement(xmlElementPtr elem) { + xmlElementPtr cur; + + cur = (xmlElementPtr) xmlMalloc(sizeof(xmlElement)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyElement: out of memory !\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlElement)); + cur->type = XML_ELEMENT_DECL; + cur->etype = elem->etype; + if (elem->name != NULL) + cur->name = xmlStrdup(elem->name); + else + cur->name = NULL; + if (elem->prefix != NULL) + cur->prefix = xmlStrdup(elem->prefix); + else + cur->prefix = NULL; + cur->content = xmlCopyElementContent(elem->content); + /* TODO : rebuild the attribute list on the copy */ + cur->attributes = NULL; + return(cur); +} + +/** + * xmlCopyElementTable: + * @table: An element table + * + * Build a copy of an element table. + * + * Returns the new xmlElementTablePtr or NULL in case of error. + */ +xmlElementTablePtr +xmlCopyElementTable(xmlElementTablePtr table) { + return((xmlElementTablePtr) xmlHashCopy(table, + (xmlHashCopier) xmlCopyElement)); +} + +/** + * xmlDumpElementDecl: + * @buf: the XML buffer output + * @elem: An element table + * + * This will dump the content of the element declaration as an XML + * DTD definition + */ +void +xmlDumpElementDecl(xmlBufferPtr buf, xmlElementPtr elem) { + switch (elem->etype) { + case XML_ELEMENT_TYPE_EMPTY: + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, " EMPTY>\n"); + break; + case XML_ELEMENT_TYPE_ANY: + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, " ANY>\n"); + break; + case XML_ELEMENT_TYPE_MIXED: + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, " "); + xmlDumpElementContent(buf, elem->content, 1); + xmlBufferWriteChar(buf, ">\n"); + break; + case XML_ELEMENT_TYPE_ELEMENT: + xmlBufferWriteChar(buf, "name); + xmlBufferWriteChar(buf, " "); + xmlDumpElementContent(buf, elem->content, 1); + xmlBufferWriteChar(buf, ">\n"); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpElementDecl: internal: unknown type %d\n", + elem->etype); + } +} + +/** + * xmlDumpElementTable: + * @buf: the XML buffer output + * @table: An element table + * + * This will dump the content of the element table as an XML DTD definition + */ +void +xmlDumpElementTable(xmlBufferPtr buf, xmlElementTablePtr table) { + xmlHashScan(table, (xmlHashScanner) xmlDumpElementDecl, buf); +} + +/** + * xmlCreateEnumeration: + * @name: the enumeration name or NULL + * + * create and initialize an enumeration attribute node. + * + * Returns the xmlEnumerationPtr just created or NULL in case + * of error. + */ +xmlEnumerationPtr +xmlCreateEnumeration(xmlChar *name) { + xmlEnumerationPtr ret; + + ret = (xmlEnumerationPtr) xmlMalloc(sizeof(xmlEnumeration)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCreateEnumeration : xmlMalloc(%ld) failed\n", + (long)sizeof(xmlEnumeration)); + return(NULL); + } + memset(ret, 0, sizeof(xmlEnumeration)); + + if (name != NULL) + ret->name = xmlStrdup(name); + return(ret); +} + +/** + * xmlFreeEnumeration: + * @cur: the tree to free. + * + * free an enumeration attribute node (recursive). + */ +void +xmlFreeEnumeration(xmlEnumerationPtr cur) { + if (cur == NULL) return; + + if (cur->next != NULL) xmlFreeEnumeration(cur->next); + + if (cur->name != NULL) xmlFree((xmlChar *) cur->name); + memset(cur, -1, sizeof(xmlEnumeration)); + xmlFree(cur); +} + +/** + * xmlCopyEnumeration: + * @cur: the tree to copy. + * + * Copy an enumeration attribute node (recursive). + * + * Returns the xmlEnumerationPtr just created or NULL in case + * of error. + */ +xmlEnumerationPtr +xmlCopyEnumeration(xmlEnumerationPtr cur) { + xmlEnumerationPtr ret; + + if (cur == NULL) return(NULL); + ret = xmlCreateEnumeration((xmlChar *) cur->name); + + if (cur->next != NULL) ret->next = xmlCopyEnumeration(cur->next); + else ret->next = NULL; + + return(ret); +} + +/** + * xmlDumpEnumeration: + * @buf: the XML buffer output + * @enum: An enumeration + * + * This will dump the content of the enumeration + */ +void +xmlDumpEnumeration(xmlBufferPtr buf, xmlEnumerationPtr cur) { + if (cur == NULL) return; + + xmlBufferWriteCHAR(buf, cur->name); + if (cur->next == NULL) + xmlBufferWriteChar(buf, ")"); + else { + xmlBufferWriteChar(buf, " | "); + xmlDumpEnumeration(buf, cur->next); + } +} + +/** + * xmlCreateAttributeTable: + * + * create and initialize an empty attribute hash table. + * + * Returns the xmlAttributeTablePtr just created or NULL in case + * of error. + */ +xmlAttributeTablePtr +xmlCreateAttributeTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlScanAttributeDeclCallback: + * @attr: the attribute decl + * @list: the list to update + * + * Callback called by xmlScanAttributeDecl when a new attribute + * has to be entered in the list. + */ +void +xmlScanAttributeDeclCallback(xmlAttributePtr attr, xmlAttributePtr *list, + const xmlChar* name) { + attr->nexth = *list; + *list = attr; +} + +/** + * xmlScanAttributeDecl: + * @dtd: pointer to the DTD + * @elem: the element name + * + * When inserting a new element scan the DtD for existing attributes + * for taht element and initialize the Attribute chain + * + * Returns the pointer to the first attribute decl in the chain, + * possibly NULL. + */ +xmlAttributePtr +xmlScanAttributeDecl(xmlDtdPtr dtd, const xmlChar *elem) { + xmlAttributePtr ret = NULL; + xmlAttributeTablePtr table; + + if (dtd == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlScanAttributeDecl: dtd == NULL\n"); + return(NULL); + } + if (elem == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlScanAttributeDecl: elem == NULL\n"); + return(NULL); + } + table = (xmlAttributeTablePtr) dtd->attributes; + if (table == NULL) + return(NULL); + + /* WRONG !!! */ + xmlHashScan3(table, NULL, NULL, elem, + (xmlHashScanner) xmlScanAttributeDeclCallback, &ret); + return(ret); +} + +/** + * xmlScanIDAttributeDecl: + * @ctxt: the validation context + * @elem: the element name + * + * Verify that the element don't have too many ID attributes + * declared. + * + * Returns the number of ID attributes found. + */ +int +xmlScanIDAttributeDecl(xmlValidCtxtPtr ctxt, xmlElementPtr elem) { + xmlAttributePtr cur; + int ret = 0; + + if (elem == NULL) return(0); + cur = elem->attributes; + while (cur != NULL) { + if (cur->atype == XML_ATTRIBUTE_ID) { + ret ++; + if (ret > 1) + VERROR(ctxt->userData, + "Element %s has too may ID attributes defined : %s\n", + elem->name, cur->name); + } + cur = cur->nexth; + } + return(ret); +} + +/** + * xmlFreeAttribute: + * @elem: An attribute + * + * Deallocate the memory used by an attribute definition + */ +void +xmlFreeAttribute(xmlAttributePtr attr) { + if (attr == NULL) return; + xmlUnlinkNode((xmlNodePtr) attr); + if (attr->tree != NULL) + xmlFreeEnumeration(attr->tree); + if (attr->elem != NULL) + xmlFree((xmlChar *) attr->elem); + if (attr->name != NULL) + xmlFree((xmlChar *) attr->name); + if (attr->defaultValue != NULL) + xmlFree((xmlChar *) attr->defaultValue); + if (attr->prefix != NULL) + xmlFree((xmlChar *) attr->prefix); + memset(attr, -1, sizeof(xmlAttribute)); + xmlFree(attr); +} + + +/** + * xmlAddAttributeDecl: + * @ctxt: the validation context + * @dtd: pointer to the DTD + * @elem: the element name + * @name: the attribute name + * @ns: the attribute namespace prefix + * @type: the attribute type + * @def: the attribute default type + * @defaultValue: the attribute default value + * @tree: if it's an enumeration, the associated list + * + * Register a new attribute declaration + * Note that @tree becomes the ownership of the DTD + * + * Returns NULL if not new, othervise the attribute decl + */ +xmlAttributePtr +xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem, + const xmlChar *name, const xmlChar *ns, + xmlAttributeType type, xmlAttributeDefault def, + const xmlChar *defaultValue, xmlEnumerationPtr tree) { + xmlAttributePtr ret; + xmlAttributeTablePtr table; + xmlElementPtr elemDef; + + if (dtd == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: dtd == NULL\n"); + xmlFreeEnumeration(tree); + return(NULL); + } + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: name == NULL\n"); + xmlFreeEnumeration(tree); + return(NULL); + } + if (elem == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: elem == NULL\n"); + xmlFreeEnumeration(tree); + return(NULL); + } + /* + * Check the type and possibly the default value. + */ + switch (type) { + case XML_ATTRIBUTE_CDATA: + break; + case XML_ATTRIBUTE_ID: + break; + case XML_ATTRIBUTE_IDREF: + break; + case XML_ATTRIBUTE_IDREFS: + break; + case XML_ATTRIBUTE_ENTITY: + break; + case XML_ATTRIBUTE_ENTITIES: + break; + case XML_ATTRIBUTE_NMTOKEN: + break; + case XML_ATTRIBUTE_NMTOKENS: + break; + case XML_ATTRIBUTE_ENUMERATION: + break; + case XML_ATTRIBUTE_NOTATION: + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: unknown type %d\n", type); + xmlFreeEnumeration(tree); + return(NULL); + } + if ((defaultValue != NULL) && + (!xmlValidateAttributeValue(type, defaultValue))) { + VERROR(ctxt->userData, "Attribute %s on %s: invalid default value\n", + elem, name, defaultValue); + defaultValue = NULL; + } + + /* + * Create the Attribute table if needed. + */ + table = (xmlAttributeTablePtr) dtd->attributes; + if (table == NULL) { + table = xmlCreateAttributeTable(); + dtd->attributes = (void *) table; + } + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: Table creation failed!\n"); + return(NULL); + } + + + ret = (xmlAttributePtr) xmlMalloc(sizeof(xmlAttribute)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddAttributeDecl: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlAttribute)); + ret->type = XML_ATTRIBUTE_DECL; + + /* + * fill the structure. + */ + ret->atype = type; + ret->name = xmlStrdup(name); + ret->prefix = xmlStrdup(ns); + ret->elem = xmlStrdup(elem); + ret->def = def; + ret->tree = tree; + if (defaultValue != NULL) + ret->defaultValue = xmlStrdup(defaultValue); + + /* + * Validity Check: + * Search the DTD for previous declarations of the ATTLIST + */ + if (xmlHashAddEntry3(table, name, ns, elem, ret) < 0) { + /* + * The attribute is already defined in this Dtd. + */ + VWARNING(ctxt->userData, + "Attribute %s on %s: already defined\n", + name, elem); + xmlFreeAttribute(ret); + return(NULL); + } + + /* + * Validity Check: + * Multiple ID per element + */ + elemDef = xmlGetDtdElementDesc(dtd, elem); + if (elemDef != NULL) { + if ((type == XML_ATTRIBUTE_ID) && + (xmlScanIDAttributeDecl(NULL, elemDef) != 0)) + VERROR(ctxt->userData, + "Element %s has too may ID attributes defined : %s\n", + elem, name); + ret->nexth = elemDef->attributes; + elemDef->attributes = ret; + } + + /* + * Link it to the Dtd + */ + ret->parent = dtd; + ret->doc = dtd->doc; + if (dtd->last == NULL) { + dtd->children = dtd->last = (xmlNodePtr) ret; + } else { + dtd->last->next = (xmlNodePtr) ret; + ret->prev = dtd->last; + dtd->last = (xmlNodePtr) ret; + } + return(ret); +} + +/** + * xmlFreeAttributeTable: + * @table: An attribute table + * + * Deallocate the memory used by an entities hash table. + */ +void +xmlFreeAttributeTable(xmlAttributeTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeAttribute); +} + +/** + * xmlCopyAttribute: + * @attr: An attribute + * + * Build a copy of an attribute. + * + * Returns the new xmlAttributePtr or NULL in case of error. + */ +xmlAttributePtr +xmlCopyAttribute(xmlAttributePtr attr) { + xmlAttributePtr cur; + + cur = (xmlAttributePtr) xmlMalloc(sizeof(xmlAttribute)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyAttribute: out of memory !\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlAttribute)); + cur->atype = attr->atype; + cur->def = attr->def; + cur->tree = xmlCopyEnumeration(attr->tree); + if (attr->elem != NULL) + cur->elem = xmlStrdup(attr->elem); + if (attr->name != NULL) + cur->name = xmlStrdup(attr->name); + if (attr->defaultValue != NULL) + cur->defaultValue = xmlStrdup(attr->defaultValue); + return(cur); +} + +/** + * xmlCopyAttributeTable: + * @table: An attribute table + * + * Build a copy of an attribute table. + * + * Returns the new xmlAttributeTablePtr or NULL in case of error. + */ +xmlAttributeTablePtr +xmlCopyAttributeTable(xmlAttributeTablePtr table) { + return((xmlAttributeTablePtr) xmlHashCopy(table, + (xmlHashCopier) xmlCopyAttribute)); +} + +/** + * xmlDumpAttributeDecl: + * @buf: the XML buffer output + * @attr: An attribute declaration + * + * This will dump the content of the attribute declaration as an XML + * DTD definition + */ +void +xmlDumpAttributeDecl(xmlBufferPtr buf, xmlAttributePtr attr) { + xmlBufferWriteChar(buf, "elem); + xmlBufferWriteChar(buf, " "); + if (attr->prefix != NULL) { + xmlBufferWriteCHAR(buf, attr->prefix); + xmlBufferWriteChar(buf, ":"); + } + xmlBufferWriteCHAR(buf, attr->name); + switch (attr->atype) { + case XML_ATTRIBUTE_CDATA: + xmlBufferWriteChar(buf, " CDATA"); + break; + case XML_ATTRIBUTE_ID: + xmlBufferWriteChar(buf, " ID"); + break; + case XML_ATTRIBUTE_IDREF: + xmlBufferWriteChar(buf, " IDREF"); + break; + case XML_ATTRIBUTE_IDREFS: + xmlBufferWriteChar(buf, " IDREFS"); + break; + case XML_ATTRIBUTE_ENTITY: + xmlBufferWriteChar(buf, " ENTITY"); + break; + case XML_ATTRIBUTE_ENTITIES: + xmlBufferWriteChar(buf, " ENTITIES"); + break; + case XML_ATTRIBUTE_NMTOKEN: + xmlBufferWriteChar(buf, " NMTOKEN"); + break; + case XML_ATTRIBUTE_NMTOKENS: + xmlBufferWriteChar(buf, " NMTOKENS"); + break; + case XML_ATTRIBUTE_ENUMERATION: + xmlBufferWriteChar(buf, " ("); + xmlDumpEnumeration(buf, attr->tree); + break; + case XML_ATTRIBUTE_NOTATION: + xmlBufferWriteChar(buf, " NOTATION ("); + xmlDumpEnumeration(buf, attr->tree); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpAttributeTable: internal: unknown type %d\n", + attr->atype); + } + switch (attr->def) { + case XML_ATTRIBUTE_NONE: + break; + case XML_ATTRIBUTE_REQUIRED: + xmlBufferWriteChar(buf, " #REQUIRED"); + break; + case XML_ATTRIBUTE_IMPLIED: + xmlBufferWriteChar(buf, " #IMPLIED"); + break; + case XML_ATTRIBUTE_FIXED: + xmlBufferWriteChar(buf, " #FIXED"); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "xmlDumpAttributeTable: internal: unknown default %d\n", + attr->def); + } + if (attr->defaultValue != NULL) { + xmlBufferWriteChar(buf, " "); + xmlBufferWriteQuotedString(buf, attr->defaultValue); + } + xmlBufferWriteChar(buf, ">\n"); +} + +/** + * xmlDumpAttributeTable: + * @buf: the XML buffer output + * @table: An attribute table + * + * This will dump the content of the attribute table as an XML DTD definition + */ +void +xmlDumpAttributeTable(xmlBufferPtr buf, xmlAttributeTablePtr table) { + xmlHashScan(table, (xmlHashScanner) xmlDumpAttributeDecl, buf); +} + +/************************************************************************ + * * + * NOTATIONs * + * * + ************************************************************************/ +/** + * xmlCreateNotationTable: + * + * create and initialize an empty notation hash table. + * + * Returns the xmlNotationTablePtr just created or NULL in case + * of error. + */ +xmlNotationTablePtr +xmlCreateNotationTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlFreeNotation: + * @not: A notation + * + * Deallocate the memory used by an notation definition + */ +void +xmlFreeNotation(xmlNotationPtr nota) { + if (nota == NULL) return; + if (nota->name != NULL) + xmlFree((xmlChar *) nota->name); + if (nota->PublicID != NULL) + xmlFree((xmlChar *) nota->PublicID); + if (nota->SystemID != NULL) + xmlFree((xmlChar *) nota->SystemID); + memset(nota, -1, sizeof(xmlNotation)); + xmlFree(nota); +} + + +/** + * xmlAddNotationDecl: + * @dtd: pointer to the DTD + * @ctxt: the validation context + * @name: the entity name + * @PublicID: the public identifier or NULL + * @SystemID: the system identifier or NULL + * + * Register a new notation declaration + * + * Returns NULL if not, othervise the entity + */ +xmlNotationPtr +xmlAddNotationDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *name, + const xmlChar *PublicID, const xmlChar *SystemID) { + xmlNotationPtr ret; + xmlNotationTablePtr table; + + if (dtd == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: dtd == NULL\n"); + return(NULL); + } + if (name == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: name == NULL\n"); + return(NULL); + } + if ((PublicID == NULL) && (SystemID == NULL)) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: no PUBLIC ID nor SYSTEM ID\n"); + } + + /* + * Create the Notation table if needed. + */ + table = (xmlNotationTablePtr) dtd->notations; + if (table == NULL) + dtd->notations = table = xmlCreateNotationTable(); + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: Table creation failed!\n"); + return(NULL); + } + + ret = (xmlNotationPtr) xmlMalloc(sizeof(xmlNotation)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: out of memory\n"); + return(NULL); + } + memset(ret, 0, sizeof(xmlNotation)); + + /* + * fill the structure. + */ + ret->name = xmlStrdup(name); + if (SystemID != NULL) + ret->SystemID = xmlStrdup(SystemID); + if (PublicID != NULL) + ret->PublicID = xmlStrdup(PublicID); + + /* + * Validity Check: + * Check the DTD for previous declarations of the ATTLIST + */ + if (xmlHashAddEntry(table, name, ret)) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddNotationDecl: %s already defined\n", name); + xmlFreeNotation(ret); + return(NULL); + } + return(ret); +} + +/** + * xmlFreeNotationTable: + * @table: An notation table + * + * Deallocate the memory used by an entities hash table. + */ +void +xmlFreeNotationTable(xmlNotationTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeNotation); +} + +/** + * xmlCopyNotation: + * @nota: A notation + * + * Build a copy of a notation. + * + * Returns the new xmlNotationPtr or NULL in case of error. + */ +xmlNotationPtr +xmlCopyNotation(xmlNotationPtr nota) { + xmlNotationPtr cur; + + cur = (xmlNotationPtr) xmlMalloc(sizeof(xmlNotation)); + if (cur == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlCopyNotation: out of memory !\n"); + return(NULL); + } + if (nota->name != NULL) + cur->name = xmlStrdup(nota->name); + else + cur->name = NULL; + if (nota->PublicID != NULL) + cur->PublicID = xmlStrdup(nota->PublicID); + else + cur->PublicID = NULL; + if (nota->SystemID != NULL) + cur->SystemID = xmlStrdup(nota->SystemID); + else + cur->SystemID = NULL; + return(cur); +} + +/** + * xmlCopyNotationTable: + * @table: A notation table + * + * Build a copy of a notation table. + * + * Returns the new xmlNotationTablePtr or NULL in case of error. + */ +xmlNotationTablePtr +xmlCopyNotationTable(xmlNotationTablePtr table) { + return((xmlNotationTablePtr) xmlHashCopy(table, + (xmlHashCopier) xmlCopyNotation)); +} + +/** + * xmlDumpNotationDecl: + * @buf: the XML buffer output + * @nota: A notation declaration + * + * This will dump the content the notation declaration as an XML DTD definition + */ +void +xmlDumpNotationDecl(xmlBufferPtr buf, xmlNotationPtr nota) { + xmlBufferWriteChar(buf, "name); + if (nota->PublicID != NULL) { + xmlBufferWriteChar(buf, " PUBLIC "); + xmlBufferWriteQuotedString(buf, nota->PublicID); + if (nota->SystemID != NULL) { + xmlBufferWriteChar(buf, " "); + xmlBufferWriteCHAR(buf, nota->SystemID); + } + } else { + xmlBufferWriteChar(buf, " SYSTEM "); + xmlBufferWriteCHAR(buf, nota->SystemID); + } + xmlBufferWriteChar(buf, " >\n"); +} + +/** + * xmlDumpNotationTable: + * @buf: the XML buffer output + * @table: A notation table + * + * This will dump the content of the notation table as an XML DTD definition + */ +void +xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table) { + xmlHashScan(table, (xmlHashScanner) xmlDumpNotationDecl, buf); +} + +/************************************************************************ + * * + * IDs * + * * + ************************************************************************/ +/** + * xmlCreateIDTable: + * + * create and initialize an empty id hash table. + * + * Returns the xmlIDTablePtr just created or NULL in case + * of error. + */ +xmlIDTablePtr +xmlCreateIDTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlFreeID: + * @not: A id + * + * Deallocate the memory used by an id definition + */ +void +xmlFreeID(xmlIDPtr id) { + if (id == NULL) return; + if (id->value != NULL) + xmlFree((xmlChar *) id->value); + memset(id, -1, sizeof(xmlID)); + xmlFree(id); +} + +/** + * xmlAddID: + * @ctxt: the validation context + * @doc: pointer to the document + * @value: the value name + * @attr: the attribute holding the ID + * + * Register a new id declaration + * + * Returns NULL if not, othervise the new xmlIDPtr + */ +xmlIDPtr +xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value, + xmlAttrPtr attr) { + xmlIDPtr ret; + xmlIDTablePtr table; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddIDDecl: doc == NULL\n"); + return(NULL); + } + if (value == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddIDDecl: value == NULL\n"); + return(NULL); + } + if (attr == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddIDDecl: attr == NULL\n"); + return(NULL); + } + + /* + * Create the ID table if needed. + */ + table = (xmlIDTablePtr) doc->ids; + if (table == NULL) + doc->ids = table = xmlCreateIDTable(); + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddID: Table creation failed!\n"); + return(NULL); + } + + ret = (xmlIDPtr) xmlMalloc(sizeof(xmlID)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddID: out of memory\n"); + return(NULL); + } + + /* + * fill the structure. + */ + ret->value = xmlStrdup(value); + ret->attr = attr; + + if (xmlHashAddEntry(table, value, ret) < 0) { + /* + * The id is already defined in this Dtd. + */ + VERROR(ctxt->userData, "ID %s already defined\n", value); + xmlFreeID(ret); + return(NULL); + } + return(ret); +} + +/** + * xmlFreeIDTable: + * @table: An id table + * + * Deallocate the memory used by an ID hash table. + */ +void +xmlFreeIDTable(xmlIDTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeID); +} + +/** + * xmlIsID: + * @doc: the document + * @elem: the element carrying the attribute + * @attr: the attribute + * + * Determine whether an attribute is of type ID. In case we have Dtd(s) + * then this is simple, otherwise we use an heuristic: name ID (upper + * or lowercase). + * + * Returns 0 or 1 depending on the lookup result + */ +int +xmlIsID(xmlDocPtr doc, xmlNodePtr elem, xmlAttrPtr attr) { + if (doc == NULL) return(0); + if (attr == NULL) return(0); + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) { + return(0); + } else if (doc->type == XML_HTML_DOCUMENT_NODE) { + if ((xmlStrEqual(BAD_CAST "id", attr->name)) || + (xmlStrEqual(BAD_CAST "name", attr->name))) + return(1); + return(0); + } else { + xmlAttributePtr attrDecl; + + if (elem == NULL) return(0); + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, + attr->name); + + if ((attrDecl != NULL) && (attrDecl->atype == XML_ATTRIBUTE_ID)) + return(1); + } + return(0); +} + +/** + * xmlRemoveID + * @doc: the document + * @attr: the attribute + * + * Remove the given attribute from the ID table maintained internally. + * + * Returns -1 if the lookup failed and 0 otherwise + */ +int +xmlRemoveID(xmlDocPtr doc, xmlAttrPtr attr) { + xmlAttrPtr cur; + xmlIDTablePtr table; + xmlChar *ID; + + if (doc == NULL) return(-1); + if (attr == NULL) return(-1); + table = (xmlIDTablePtr) doc->ids; + if (table == NULL) + return(-1); + + if (attr == NULL) + return(-1); + ID = xmlNodeListGetString(doc, attr->children, 1); + if (ID == NULL) + return(-1); + cur = xmlHashLookup(table, ID); + if (cur != attr) { + xmlFree(ID); + return(-1); + } + xmlHashUpdateEntry(table, ID, NULL, (xmlHashDeallocator) xmlFreeID); + xmlFree(ID); + return(0); +} + +/** + * xmlGetID: + * @doc: pointer to the document + * @ID: the ID value + * + * Search the attribute declaring the given ID + * + * Returns NULL if not found, otherwise the xmlAttrPtr defining the ID + */ +xmlAttrPtr +xmlGetID(xmlDocPtr doc, const xmlChar *ID) { + xmlIDTablePtr table; + xmlIDPtr id; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, "xmlGetID: doc == NULL\n"); + return(NULL); + } + + if (ID == NULL) { + xmlGenericError(xmlGenericErrorContext, "xmlGetID: ID == NULL\n"); + return(NULL); + } + + table = (xmlIDTablePtr) doc->ids; + if (table == NULL) + return(NULL); + + id = xmlHashLookup(table, ID); + if (id == NULL) + return(NULL); + return(id->attr); +} + +/************************************************************************ + * * + * Refs * + * * + ************************************************************************/ +typedef struct xmlRemove_t +{ + xmlListPtr l; + xmlAttrPtr ap; +} xmlRemove; + +/** + * xmlCreateRefTable: + * + * create and initialize an empty ref hash table. + * + * Returns the xmlRefTablePtr just created or NULL in case + * of error. + */ +xmlRefTablePtr +xmlCreateRefTable(void) { + return(xmlHashCreate(0)); +} + +/** + * xmlFreeRef: + * @lk: A list link + * + * Deallocate the memory used by a ref definition + */ +static void +xmlFreeRef(xmlLinkPtr lk) { + xmlRefPtr ref = (xmlRefPtr)xmlLinkGetData(lk); + if (ref == NULL) return; + if (ref->value != NULL) + xmlFree((xmlChar *)ref->value); + memset(ref, -1, sizeof(xmlRef)); + xmlFree(ref); +} + +/** + * xmlFreeRefList: + * @list_ref: A list of references. + * + * Deallocate the memory used by a list of references + */ +static void +xmlFreeRefList(xmlListPtr list_ref) { + if (list_ref == NULL) return; + xmlListDelete(list_ref); +} + +/** + * xmlWalkRemoveRef: + * @data: Contents of current link + * @user: Value supplied by the user + * + * Return 0 to abort the walk or 1 to continue + */ +static int +xmlWalkRemoveRef(const void *data, const void *user) +{ + xmlAttrPtr attr0 = ((xmlRefPtr)data)->attr; + xmlAttrPtr attr1 = ((xmlRemove *)user)->ap; + xmlListPtr ref_list = ((xmlRemove *)user)->l; + + if (attr0 == attr1) { /* Matched: remove and terminate walk */ + xmlListRemoveFirst(ref_list, (void *)data); + return 0; + } + return 1; +} + +/** + * xmlAddRef: + * @ctxt: the validation context + * @doc: pointer to the document + * @value: the value name + * @attr: the attribute holding the Ref + * + * Register a new ref declaration + * + * Returns NULL if not, othervise the new xmlRefPtr + */ +xmlRefPtr +xmlAddRef(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value, + xmlAttrPtr attr) { + xmlRefPtr ret; + xmlRefTablePtr table; + xmlListPtr ref_list; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRefDecl: doc == NULL\n"); + return(NULL); + } + if (value == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRefDecl: value == NULL\n"); + return(NULL); + } + if (attr == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRefDecl: attr == NULL\n"); + return(NULL); + } + + /* + * Create the Ref table if needed. + */ + table = (xmlRefTablePtr) doc->refs; + if (table == NULL) + doc->refs = table = xmlCreateRefTable(); + if (table == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: Table creation failed!\n"); + return(NULL); + } + + ret = (xmlRefPtr) xmlMalloc(sizeof(xmlRef)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: out of memory\n"); + return(NULL); + } + + /* + * fill the structure. + */ + ret->value = xmlStrdup(value); + ret->attr = attr; + + /* To add a reference :- + * References are maintained as a list of references, + * Lookup the entry, if no entry create new nodelist + * Add the owning node to the NodeList + * Return the ref + */ + + if(NULL == (ref_list = xmlHashLookup(table, value))) { + if(NULL == (ref_list = xmlListCreate(xmlFreeRef, NULL))) { + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: Reference list creation failed!\n"); + return(NULL); + } + if (xmlHashAddEntry(table, value, ref_list) < 0) { + xmlListDelete(ref_list); + xmlGenericError(xmlGenericErrorContext, + "xmlAddRef: Reference list insertion failed!\n"); + return(NULL); + } + } + xmlListInsert(ref_list, ret); + return(ret); +} + +/** + * xmlFreeRefTable: + * @table: An ref table + * + * Deallocate the memory used by an Ref hash table. + */ +void +xmlFreeRefTable(xmlRefTablePtr table) { + xmlHashFree(table, (xmlHashDeallocator) xmlFreeRefList); +} + +/** + * xmlIsRef: + * @doc: the document + * @elem: the element carrying the attribute + * @attr: the attribute + * + * Determine whether an attribute is of type Ref. In case we have Dtd(s) + * then this is simple, otherwise we use an heuristic: name Ref (upper + * or lowercase). + * + * Returns 0 or 1 depending on the lookup result + */ +int +xmlIsRef(xmlDocPtr doc, xmlNodePtr elem, xmlAttrPtr attr) { + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) { + return(0); + } else if (doc->type == XML_HTML_DOCUMENT_NODE) { + /* TODO @@@ */ + return(0); + } else { + xmlAttributePtr attrDecl; + + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, + attr->name); + + if ((attrDecl != NULL) && (attrDecl->atype == XML_ATTRIBUTE_IDREF)) + return(1); + } + return(0); +} + +/** + * xmlRemoveRef + * @doc: the document + * @attr: the attribute + * + * Remove the given attribute from the Ref table maintained internally. + * + * Returns -1 if the lookup failed and 0 otherwise + */ +int +xmlRemoveRef(xmlDocPtr doc, xmlAttrPtr attr) { + xmlListPtr ref_list; + xmlRefTablePtr table; + xmlChar *ID; + xmlRemove target; + + if (doc == NULL) return(-1); + if (attr == NULL) return(-1); + table = (xmlRefTablePtr) doc->refs; + if (table == NULL) + return(-1); + + if (attr == NULL) + return(-1); + ID = xmlNodeListGetString(doc, attr->children, 1); + if (ID == NULL) + return(-1); + ref_list = xmlHashLookup(table, ID); + + if(ref_list == NULL) { + xmlFree(ID); + return (-1); + } + /* At this point, ref_list refers to a list of references which + * have the same key as the supplied attr. Our list of references + * is ordered by reference address and we don't have that information + * here to use when removing. We'll have to walk the list and + * check for a matching attribute, when we find one stop the walk + * and remove the entry. + * The list is ordered by reference, so that means we don't have the + * key. Passing the list and the reference to the walker means we + * will have enough data to be able to remove the entry. + */ + target.l = ref_list; + target.ap = attr; + + /* Remove the supplied attr from our list */ + xmlListWalk(ref_list, xmlWalkRemoveRef, &target); + + /*If the list is empty then remove the list entry in the hash */ + if (xmlListEmpty(ref_list)) + xmlHashUpdateEntry(table, ID, NULL, (xmlHashDeallocator) + xmlFreeRefList); + xmlFree(ID); + return(0); +} + +/** + * xmlGetRefs: + * @doc: pointer to the document + * @ID: the ID value + * + * Find the set of references for the supplied ID. + * + * Returns NULL if not found, otherwise node set for the ID. + */ +xmlListPtr +xmlGetRefs(xmlDocPtr doc, const xmlChar *ID) { + xmlRefTablePtr table; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, "xmlGetRef: doc == NULL\n"); + return(NULL); + } + + if (ID == NULL) { + xmlGenericError(xmlGenericErrorContext, "xmlGetRef: ID == NULL\n"); + return(NULL); + } + + table = (xmlRefTablePtr) doc->refs; + if (table == NULL) + return(NULL); + + return (xmlHashLookup(table, ID)); +} + +/************************************************************************ + * * + * Routines for validity checking * + * * + ************************************************************************/ + +/** + * xmlGetDtdElementDesc: + * @dtd: a pointer to the DtD to search + * @name: the element name + * + * Search the Dtd for the description of this element + * + * returns the xmlElementPtr if found or NULL + */ + +xmlElementPtr +xmlGetDtdElementDesc(xmlDtdPtr dtd, const xmlChar *name) { + xmlElementTablePtr table; + xmlElementPtr cur; + xmlChar *uqname = NULL, *prefix = NULL; + + if (dtd == NULL) return(NULL); + if (dtd->elements == NULL) return(NULL); + table = (xmlElementTablePtr) dtd->elements; + + uqname = xmlSplitQName2(name, &prefix); + if (uqname != NULL) { + cur = xmlHashLookup2(table, uqname, prefix); + if (prefix != NULL) xmlFree(prefix); + if (uqname != NULL) xmlFree(uqname); + } else + cur = xmlHashLookup2(table, name, NULL); + return(cur); +} + +/** + * xmlGetDtdQElementDesc: + * @dtd: a pointer to the DtD to search + * @name: the element name + * @prefix: the element namespace prefix + * + * Search the Dtd for the description of this element + * + * returns the xmlElementPtr if found or NULL + */ + +xmlElementPtr +xmlGetDtdQElementDesc(xmlDtdPtr dtd, const xmlChar *name, + const xmlChar *prefix) { + xmlElementTablePtr table; + + if (dtd == NULL) return(NULL); + if (dtd->elements == NULL) return(NULL); + table = (xmlElementTablePtr) dtd->elements; + + return(xmlHashLookup2(table, name, prefix)); +} + +/** + * xmlGetDtdAttrDesc: + * @dtd: a pointer to the DtD to search + * @elem: the element name + * @name: the attribute name + * + * Search the Dtd for the description of this attribute on + * this element. + * + * returns the xmlAttributePtr if found or NULL + */ + +xmlAttributePtr +xmlGetDtdAttrDesc(xmlDtdPtr dtd, const xmlChar *elem, const xmlChar *name) { + xmlAttributeTablePtr table; + xmlAttributePtr cur; + xmlChar *uqname = NULL, *prefix = NULL; + + if (dtd == NULL) return(NULL); + if (dtd->attributes == NULL) return(NULL); + + table = (xmlAttributeTablePtr) dtd->attributes; + if (table == NULL) + return(NULL); + + uqname = xmlSplitQName2(name, &prefix); + + if (uqname != NULL) { + cur = xmlHashLookup3(table, uqname, prefix, elem); + if (prefix != NULL) xmlFree(prefix); + if (uqname != NULL) xmlFree(uqname); + } else + cur = xmlHashLookup3(table, name, NULL, elem); + return(cur); +} + +/** + * xmlGetDtdQAttrDesc: + * @dtd: a pointer to the DtD to search + * @elem: the element name + * @name: the attribute name + * @prefix: the attribute namespace prefix + * + * Search the Dtd for the description of this qualified attribute on + * this element. + * + * returns the xmlAttributePtr if found or NULL + */ + +xmlAttributePtr +xmlGetDtdQAttrDesc(xmlDtdPtr dtd, const xmlChar *elem, const xmlChar *name, + const xmlChar *prefix) { + xmlAttributeTablePtr table; + + if (dtd == NULL) return(NULL); + if (dtd->attributes == NULL) return(NULL); + table = (xmlAttributeTablePtr) dtd->attributes; + + return(xmlHashLookup3(table, name, prefix, elem)); +} + +/** + * xmlGetDtdNotationDesc: + * @dtd: a pointer to the DtD to search + * @name: the notation name + * + * Search the Dtd for the description of this notation + * + * returns the xmlNotationPtr if found or NULL + */ + +xmlNotationPtr +xmlGetDtdNotationDesc(xmlDtdPtr dtd, const xmlChar *name) { + xmlNotationTablePtr table; + + if (dtd == NULL) return(NULL); + if (dtd->notations == NULL) return(NULL); + table = (xmlNotationTablePtr) dtd->notations; + + return(xmlHashLookup(table, name)); +} + +/** + * xmlValidateNotationUse: + * @ctxt: the validation context + * @doc: the document + * @notationName: the notation name to check + * + * Validate that the given mame match a notation declaration. + * - [ VC: Notation Declared ] + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNotationUse(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + const xmlChar *notationName) { + xmlNotationPtr notaDecl; + if ((doc == NULL) || (doc->intSubset == NULL)) return(-1); + + notaDecl = xmlGetDtdNotationDesc(doc->intSubset, notationName); + if ((notaDecl == NULL) && (doc->extSubset != NULL)) + notaDecl = xmlGetDtdNotationDesc(doc->extSubset, notationName); + + if (notaDecl == NULL) { + VERROR(ctxt->userData, "NOTATION %s is not declared\n", + notationName); + return(0); + } + return(1); +} + +/** + * xmlIsMixedElement + * @doc: the document + * @name: the element name + * + * Search in the DtDs whether an element accept Mixed content (or ANY) + * basically if it is supposed to accept text childs + * + * returns 0 if no, 1 if yes, and -1 if no element description is available + */ + +int +xmlIsMixedElement(xmlDocPtr doc, const xmlChar *name) { + xmlElementPtr elemDecl; + + if ((doc == NULL) || (doc->intSubset == NULL)) return(-1); + + elemDecl = xmlGetDtdElementDesc(doc->intSubset, name); + if ((elemDecl == NULL) && (doc->extSubset != NULL)) + elemDecl = xmlGetDtdElementDesc(doc->extSubset, name); + if (elemDecl == NULL) return(-1); + switch (elemDecl->etype) { + case XML_ELEMENT_TYPE_ELEMENT: + return(0); + case XML_ELEMENT_TYPE_EMPTY: + /* + * return 1 for EMPTY since we want VC error to pop up + * on for example + */ + case XML_ELEMENT_TYPE_ANY: + case XML_ELEMENT_TYPE_MIXED: + return(1); + } + return(1); +} + +/** + * xmlValidateNameValue: + * @value: an Name value + * + * Validate that the given value match Name production + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNameValue(const xmlChar *value) { + const xmlChar *cur; + + if (value == NULL) return(0); + cur = value; + + if (!IS_LETTER(*cur) && (*cur != '_') && + (*cur != ':')) { + return(0); + } + + while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || + (*cur == '.') || (*cur == '-') || + (*cur == '_') || (*cur == ':') || + (IS_COMBINING(*cur)) || + (IS_EXTENDER(*cur))) + cur++; + + if (*cur != 0) return(0); + + return(1); +} + +/** + * xmlValidateNamesValue: + * @value: an Names value + * + * Validate that the given value match Names production + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNamesValue(const xmlChar *value) { + const xmlChar *cur; + + if (value == NULL) return(0); + cur = value; + + if (!IS_LETTER(*cur) && (*cur != '_') && + (*cur != ':')) { + return(0); + } + + while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || + (*cur == '.') || (*cur == '-') || + (*cur == '_') || (*cur == ':') || + (IS_COMBINING(*cur)) || + (IS_EXTENDER(*cur))) + cur++; + + while (IS_BLANK(*cur)) { + while (IS_BLANK(*cur)) cur++; + + if (!IS_LETTER(*cur) && (*cur != '_') && + (*cur != ':')) { + return(0); + } + + while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || + (*cur == '.') || (*cur == '-') || + (*cur == '_') || (*cur == ':') || + (IS_COMBINING(*cur)) || + (IS_EXTENDER(*cur))) + cur++; + } + + if (*cur != 0) return(0); + + return(1); +} + +/** + * xmlValidateNmtokenValue: + * @value: an Mntoken value + * + * Validate that the given value match Nmtoken production + * + * [ VC: Name Token ] + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNmtokenValue(const xmlChar *value) { + const xmlChar *cur; + + if (value == NULL) return(0); + cur = value; + + if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) && + (*cur != '.') && (*cur != '-') && + (*cur != '_') && (*cur != ':') && + (!IS_COMBINING(*cur)) && + (!IS_EXTENDER(*cur))) + return(0); + + while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || + (*cur == '.') || (*cur == '-') || + (*cur == '_') || (*cur == ':') || + (IS_COMBINING(*cur)) || + (IS_EXTENDER(*cur))) + cur++; + + if (*cur != 0) return(0); + + return(1); +} + +/** + * xmlValidateNmtokensValue: + * @value: an Mntokens value + * + * Validate that the given value match Nmtokens production + * + * [ VC: Name Token ] + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNmtokensValue(const xmlChar *value) { + const xmlChar *cur; + + if (value == NULL) return(0); + cur = value; + + while (IS_BLANK(*cur)) cur++; + if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) && + (*cur != '.') && (*cur != '-') && + (*cur != '_') && (*cur != ':') && + (!IS_COMBINING(*cur)) && + (!IS_EXTENDER(*cur))) + return(0); + + while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || + (*cur == '.') || (*cur == '-') || + (*cur == '_') || (*cur == ':') || + (IS_COMBINING(*cur)) || + (IS_EXTENDER(*cur))) + cur++; + + while (IS_BLANK(*cur)) { + while (IS_BLANK(*cur)) cur++; + if (*cur == 0) return(1); + + if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) && + (*cur != '.') && (*cur != '-') && + (*cur != '_') && (*cur != ':') && + (!IS_COMBINING(*cur)) && + (!IS_EXTENDER(*cur))) + return(0); + + while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || + (*cur == '.') || (*cur == '-') || + (*cur == '_') || (*cur == ':') || + (IS_COMBINING(*cur)) || + (IS_EXTENDER(*cur))) + cur++; + } + + if (*cur != 0) return(0); + + return(1); +} + +/** + * xmlValidateNotationDecl: + * @ctxt: the validation context + * @doc: a document instance + * @nota: a notation definition + * + * Try to validate a single notation definition + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - it seems that no validity constraing exist on notation declarations + * But this function get called anyway ... + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateNotationDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNotationPtr nota) { + int ret = 1; + + return(ret); +} + +/** + * xmlValidateAttributeValue: + * @type: an attribute type + * @value: an attribute value + * + * Validate that the given attribute value match the proper production + * + * [ VC: ID ] + * Values of type ID must match the Name production.... + * + * [ VC: IDREF ] + * Values of type IDREF must match the Name production, and values + * of type IDREFS must match Names ... + * + * [ VC: Entity Name ] + * Values of type ENTITY must match the Name production, values + * of type ENTITIES must match Names ... + * + * [ VC: Name Token ] + * Values of type NMTOKEN must match the Nmtoken production; values + * of type NMTOKENS must match Nmtokens. + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateAttributeValue(xmlAttributeType type, const xmlChar *value) { + switch (type) { + case XML_ATTRIBUTE_ENTITIES: + case XML_ATTRIBUTE_IDREFS: + return(xmlValidateNamesValue(value)); + case XML_ATTRIBUTE_ENTITY: + case XML_ATTRIBUTE_IDREF: + case XML_ATTRIBUTE_ID: + case XML_ATTRIBUTE_NOTATION: + return(xmlValidateNameValue(value)); + case XML_ATTRIBUTE_NMTOKENS: + case XML_ATTRIBUTE_ENUMERATION: + return(xmlValidateNmtokensValue(value)); + case XML_ATTRIBUTE_NMTOKEN: + return(xmlValidateNmtokenValue(value)); + case XML_ATTRIBUTE_CDATA: + break; + } + return(1); +} + +/** + * xmlValidateAttributeValue2: + * @ctxt: the validation context + * @doc: the document + * @name: the attribute name (used for error reporting only) + * @type: the attribute type + * @value: the attribute value + * + * Validate that the given attribute value match a given type. + * This typically cannot be done before having finished parsing + * the subsets. + * + * [ VC: IDREF ] + * Values of type IDREF must match one of the declared IDs + * Values of type IDREFS must match a sequence of the declared IDs + * each Name must match the value of an ID attribute on some element + * in the XML document; i.e. IDREF values must match the value of + * some ID attribute + * + * [ VC: Entity Name ] + * Values of type ENTITY must match one declared entity + * Values of type ENTITIES must match a sequence of declared entities + * + * [ VC: Notation Attributes ] + * all notation names in the declaration must be declared. + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateAttributeValue2(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + const xmlChar *name, xmlAttributeType type, const xmlChar *value) { + int ret = 1; + switch (type) { + case XML_ATTRIBUTE_IDREFS: + case XML_ATTRIBUTE_IDREF: + case XML_ATTRIBUTE_ID: + case XML_ATTRIBUTE_NMTOKENS: + case XML_ATTRIBUTE_ENUMERATION: + case XML_ATTRIBUTE_NMTOKEN: + case XML_ATTRIBUTE_CDATA: + break; + case XML_ATTRIBUTE_ENTITY: { + xmlEntityPtr ent; + + ent = xmlGetDocEntity(doc, value); + if (ent == NULL) { + VERROR(ctxt->userData, + "ENTITY attribute %s reference an unknown entity \"%s\"\n", + name, value); + ret = 0; + } else if (ent->etype != XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + VERROR(ctxt->userData, + "ENTITY attribute %s reference an entity \"%s\" of wrong type\n", + name, value); + ret = 0; + } + break; + } + case XML_ATTRIBUTE_ENTITIES: { + xmlChar *dup, *nam = NULL, *cur, save; + xmlEntityPtr ent; + + dup = xmlStrdup(value); + if (dup == NULL) + return(0); + cur = dup; + while (*cur != 0) { + nam = cur; + while ((*cur != 0) && (!IS_BLANK(*cur))) cur++; + save = *cur; + *cur = 0; + ent = xmlGetDocEntity(doc, nam); + if (ent == NULL) { + VERROR(ctxt->userData, + "ENTITIES attribute %s reference an unknown entity \"%s\"\n", + name, nam); + ret = 0; + } else if (ent->etype != XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { + VERROR(ctxt->userData, + "ENTITIES attribute %s reference an entity \"%s\" of wrong type\n", + name, nam); + ret = 0; + } + if (save == 0) + break; + *cur = save; + while (IS_BLANK(*cur)) cur++; + } + xmlFree(dup); + break; + } + case XML_ATTRIBUTE_NOTATION: { + xmlNotationPtr nota; + + nota = xmlGetDtdNotationDesc(doc->intSubset, value); + if ((nota == NULL) && (doc->extSubset != NULL)) + nota = xmlGetDtdNotationDesc(doc->extSubset, value); + + if (nota == NULL) { + VERROR(ctxt->userData, + "NOTATION attribute %s reference an unknown notation \"%s\"\n", + name, value); + ret = 0; + } + break; + } + } + return(ret); +} + +/** + * xmlValidNormalizeAttributeValue: + * @doc: the document + * @elem: the parent + * @name: the attribute name + * @value: the attribute value + * + * Does the validation related extra step of the normalization of attribute + * values: + * + * If the declared value is not CDATA, then the XML processor must further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by single space (#x20) character. + * + * returns a new normalized string if normalization is needed, NULL otherwise + * the caller must free the returned value. + */ + +xmlChar * +xmlValidNormalizeAttributeValue(xmlDocPtr doc, xmlNodePtr elem, + const xmlChar *name, const xmlChar *value) { + xmlChar *ret, *dst; + const xmlChar *src; + xmlAttributePtr attrDecl = NULL; + + if (doc == NULL) return(NULL); + if (elem == NULL) return(NULL); + if (name == NULL) return(NULL); + if (value == NULL) return(NULL); + + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) { + xmlChar qname[500]; +#ifdef HAVE_SNPRINTF + snprintf((char *) qname, sizeof(qname), "%s:%s", + elem->ns->prefix, elem->name); +#else + sprintf((char *) qname, "%s:%s", elem->ns->prefix, elem->name); +#endif + qname[sizeof(qname) - 1] = 0; + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, qname, name); + } + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, name); + + if (attrDecl == NULL) + return(NULL); + if (attrDecl->atype == XML_ATTRIBUTE_CDATA) + return(NULL); + + ret = xmlStrdup(value); + if (ret == NULL) + return(NULL); + src = value; + dst = ret; + while (*src == 0x20) src++; + while (*src != 0) { + if (*src == 0x20) { + while (*src == 0x20) src++; + if (*src != 0) + *dst++ = 0x20; + } else { + *dst++ = *src++; + } + } + *dst = 0; + return(ret); +} + +void +xmlValidateAttributeIdCallback(xmlAttributePtr attr, int *count, + const xmlChar* name) { + if (attr->atype == XML_ATTRIBUTE_ID) (*count)++; +} + +/** + * xmlValidateAttributeDecl: + * @ctxt: the validation context + * @doc: a document instance + * @attr: an attribute definition + * + * Try to validate a single attribute definition + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: Attribute Default Legal ] + * - [ VC: Enumeration ] + * - [ VC: ID Attribute Default ] + * + * The ID/IDREF uniqueness and matching are done separately + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlAttributePtr attr) { + int ret = 1; + int val; + CHECK_DTD; + if(attr == NULL) return(1); + + /* Attribute Default Legal */ + /* Enumeration */ + if (attr->defaultValue != NULL) { + val = xmlValidateAttributeValue(attr->atype, attr->defaultValue); + if (val == 0) { + VERROR(ctxt->userData, + "Syntax of default value for attribute %s on %s is not valid\n", + attr->name, attr->elem); + } + ret &= val; + } + + /* ID Attribute Default */ + if ((attr->atype == XML_ATTRIBUTE_ID)&& + (attr->def != XML_ATTRIBUTE_IMPLIED) && + (attr->def != XML_ATTRIBUTE_REQUIRED)) { + VERROR(ctxt->userData, + "ID attribute %s on %s is not valid must be #IMPLIED or #REQUIRED\n", + attr->name, attr->elem); + ret = 0; + } + + /* One ID per Element Type */ + if (attr->atype == XML_ATTRIBUTE_ID) { + int nbId; + + /* the trick is taht we parse DtD as their own internal subset */ + xmlElementPtr elem = xmlGetDtdElementDesc(doc->intSubset, + attr->elem); + if (elem != NULL) { + nbId = xmlScanIDAttributeDecl(NULL, elem); + } else { + xmlAttributeTablePtr table; + + /* + * The attribute may be declared in the internal subset and the + * element in the external subset. + */ + nbId = 0; + table = (xmlAttributeTablePtr) doc->intSubset->attributes; + xmlHashScan3(table, NULL, NULL, attr->elem, (xmlHashScanner) + xmlValidateAttributeIdCallback, &nbId); + } + if (nbId > 1) { + VERROR(ctxt->userData, + "Element %s has %d ID attribute defined in the internal subset : %s\n", + attr->elem, nbId, attr->name); + } else if (doc->extSubset != NULL) { + int extId = 0; + elem = xmlGetDtdElementDesc(doc->extSubset, attr->elem); + if (elem != NULL) { + extId = xmlScanIDAttributeDecl(NULL, elem); + } + if (extId > 1) { + VERROR(ctxt->userData, + "Element %s has %d ID attribute defined in the external subset : %s\n", + attr->elem, extId, attr->name); + } else if (extId + nbId > 1) { + VERROR(ctxt->userData, +"Element %s has ID attributes defined in the internal and external subset : %s\n", + attr->elem, attr->name); + } + } + } + + /* Validity Constraint: Enumeration */ + if ((attr->defaultValue != NULL) && (attr->tree != NULL)) { + xmlEnumerationPtr tree = attr->tree; + while (tree != NULL) { + if (xmlStrEqual(tree->name, attr->defaultValue)) break; + tree = tree->next; + } + if (tree == NULL) { + VERROR(ctxt->userData, +"Default value \"%s\" for attribute %s on %s is not among the enumerated set\n", + attr->defaultValue, attr->name, attr->elem); + ret = 0; + } + } + + return(ret); +} + +/** + * xmlValidateElementDecl: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element definition + * + * Try to validate a single element definition + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: One ID per Element Type ] + * - [ VC: No Duplicate Types ] + * - [ VC: Unique Element Type Declaration ] + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateElementDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlElementPtr elem) { + int ret = 1; + xmlElementPtr tst; + + CHECK_DTD; + + if (elem == NULL) return(1); + + /* No Duplicate Types */ + if (elem->etype == XML_ELEMENT_TYPE_MIXED) { + xmlElementContentPtr cur, next; + const xmlChar *name; + + cur = elem->content; + while (cur != NULL) { + if (cur->type != XML_ELEMENT_CONTENT_OR) break; + if (cur->c1 == NULL) break; + if (cur->c1->type == XML_ELEMENT_CONTENT_ELEMENT) { + name = cur->c1->name; + next = cur->c2; + while (next != NULL) { + if (next->type == XML_ELEMENT_CONTENT_ELEMENT) { + if (xmlStrEqual(next->name, name)) { + VERROR(ctxt->userData, + "Definition of %s has duplicate references of %s\n", + elem->name, name); + ret = 0; + } + break; + } + if (next->c1 == NULL) break; + if (next->c1->type != XML_ELEMENT_CONTENT_ELEMENT) break; + if (xmlStrEqual(next->c1->name, name)) { + VERROR(ctxt->userData, + "Definition of %s has duplicate references of %s\n", + elem->name, name); + ret = 0; + } + next = next->c2; + } + } + cur = cur->c2; + } + } + + /* VC: Unique Element Type Declaration */ + tst = xmlGetDtdElementDesc(doc->intSubset, elem->name); + if ((tst != NULL ) && (tst != elem)) { + VERROR(ctxt->userData, "Redefinition of element %s\n", + elem->name); + ret = 0; + } + tst = xmlGetDtdElementDesc(doc->extSubset, elem->name); + if ((tst != NULL ) && (tst != elem)) { + VERROR(ctxt->userData, "Redefinition of element %s\n", + elem->name); + ret = 0; + } + + /* One ID per Element Type */ + if (xmlScanIDAttributeDecl(ctxt, elem) > 1) { + ret = 0; + } + return(ret); +} + +/** + * xmlValidateOneAttribute: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * @attr: an attribute instance + * @value: the attribute value (without entities processing) + * + * Try to validate a single attribute for an element + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: Attribute Value Type ] + * - [ VC: Fixed Attribute Default ] + * - [ VC: Entity Name ] + * - [ VC: Name Token ] + * - [ VC: ID ] + * - [ VC: IDREF ] + * - [ VC: Entity Name ] + * - [ VC: Notation Attributes ] + * + * The ID/IDREF uniqueness and matching are done separately + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateOneAttribute(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem, xmlAttrPtr attr, const xmlChar *value) { + /* xmlElementPtr elemDecl; */ + xmlAttributePtr attrDecl = NULL; + int val; + int ret = 1; + + CHECK_DTD; + if ((elem == NULL) || (elem->name == NULL)) return(0); + if ((attr == NULL) || (attr->name == NULL)) return(0); + + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) { + xmlChar qname[500]; +#ifdef HAVE_SNPRINTF + snprintf((char *) qname, sizeof(qname), "%s:%s", + elem->ns->prefix, elem->name); +#else + sprintf((char *) qname, "%s:%s", elem->ns->prefix, elem->name); +#endif + qname[sizeof(qname) - 1] = 0; + if (attr->ns != NULL) { + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, qname, + attr->name, attr->ns->prefix); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, qname, + attr->name, attr->ns->prefix); + } else { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, + qname, attr->name); + } + } + if (attrDecl == NULL) { + if (attr->ns != NULL) { + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, elem->name, + attr->name, attr->ns->prefix); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, elem->name, + attr->name, attr->ns->prefix); + } else { + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, + elem->name, attr->name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, + elem->name, attr->name); + } + } + + + /* Validity Constraint: Attribute Value Type */ + if (attrDecl == NULL) { + VERROR(ctxt->userData, + "No declaration for attribute %s on element %s\n", + attr->name, elem->name); + return(0); + } + attr->atype = attrDecl->atype; + + val = xmlValidateAttributeValue(attrDecl->atype, value); + if (val == 0) { + VERROR(ctxt->userData, + "Syntax of value for attribute %s on %s is not valid\n", + attr->name, elem->name); + ret = 0; + } + + /* Validity constraint: Fixed Attribute Default */ + if (attrDecl->def == XML_ATTRIBUTE_FIXED) { + if (!xmlStrEqual(value, attrDecl->defaultValue)) { + VERROR(ctxt->userData, + "Value for attribute %s on %s is differnt from default \"%s\"\n", + attr->name, elem->name, attrDecl->defaultValue); + ret = 0; + } + } + + /* Validity Constraint: ID uniqueness */ + if (attrDecl->atype == XML_ATTRIBUTE_ID) { + if (xmlAddID(ctxt, doc, value, attr) == NULL) + ret = 0; + } + + if ((attrDecl->atype == XML_ATTRIBUTE_IDREF) || + (attrDecl->atype == XML_ATTRIBUTE_IDREFS)) { + if (xmlAddRef(ctxt, doc, value, attr) == NULL) + ret = 0; + } + + /* Validity Constraint: Notation Attributes */ + if (attrDecl->atype == XML_ATTRIBUTE_NOTATION) { + xmlEnumerationPtr tree = attrDecl->tree; + xmlNotationPtr nota; + + /* First check that the given NOTATION was declared */ + nota = xmlGetDtdNotationDesc(doc->intSubset, value); + if (nota == NULL) + nota = xmlGetDtdNotationDesc(doc->extSubset, value); + + if (nota == NULL) { + VERROR(ctxt->userData, + "Value \"%s\" for attribute %s on %s is not a declared Notation\n", + value, attr->name, elem->name); + ret = 0; + } + + /* Second, verify that it's among the list */ + while (tree != NULL) { + if (xmlStrEqual(tree->name, value)) break; + tree = tree->next; + } + if (tree == NULL) { + VERROR(ctxt->userData, +"Value \"%s\" for attribute %s on %s is not among the enumerated notations\n", + value, attr->name, elem->name); + ret = 0; + } + } + + /* Validity Constraint: Enumeration */ + if (attrDecl->atype == XML_ATTRIBUTE_ENUMERATION) { + xmlEnumerationPtr tree = attrDecl->tree; + while (tree != NULL) { + if (xmlStrEqual(tree->name, value)) break; + tree = tree->next; + } + if (tree == NULL) { + VERROR(ctxt->userData, + "Value \"%s\" for attribute %s on %s is not among the enumerated set\n", + value, attr->name, elem->name); + ret = 0; + } + } + + /* Fixed Attribute Default */ + if ((attrDecl->def == XML_ATTRIBUTE_FIXED) && + (!xmlStrEqual(attrDecl->defaultValue, value))) { + VERROR(ctxt->userData, + "Value for attribute %s on %s must be \"%s\"\n", + attr->name, elem->name, attrDecl->defaultValue); + ret = 0; + } + + /* Extra check for the attribute value */ + ret &= xmlValidateAttributeValue2(ctxt, doc, attr->name, + attrDecl->atype, value); + + return(ret); +} + +/* Find the next XML_ELEMENT_NODE, subject to the content constraints. + * Return -1 if we found something unexpected, or 1 otherwise. + */ + +static int +xmlValidateFindNextElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child, + xmlElementContentPtr cont) +{ + while (*child && (*child)->type != XML_ELEMENT_NODE) { + switch ((*child)->type) { + /* + * If there is an entity declared and it's not empty + * Push the current node on the stack and process with the + * entity content. + */ + case XML_ENTITY_REF_NODE: + if (((*child)->children != NULL) && + ((*child)->children->children != NULL)) { + nodeVPush(ctxt, *child); + *child = (*child)->children->children; + continue; + } + break; + + /* These things are ignored (skipped) during validation. */ + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + break; + + case XML_TEXT_NODE: + if (xmlIsBlankNode(*child) + && (cont->type == XML_ELEMENT_CONTENT_ELEMENT + || cont->type == XML_ELEMENT_CONTENT_SEQ + || cont->type == XML_ELEMENT_CONTENT_OR)) + break; + return -1; + + default: + return -1; + } + *child = (*child)->next; + } + + return 1; +} + +int xmlValidateElementTypeElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child, + xmlElementContentPtr cont); + +/** + * xmlValidateElementTypeExpr: + * @ctxt: the validation context + * @child: pointer to the child list + * @cont: pointer to the content declaration + * + * Try to validate the content of an element of type element + * but don't handle the occurence factor + * + * returns 1 if valid or 0 and -1 if PCDATA stuff is found, + * also update child value in-situ. + */ + +int +xmlValidateElementTypeExpr(xmlValidCtxtPtr ctxt, xmlNodePtr *child, + xmlElementContentPtr cont) { + xmlNodePtr cur; + int ret = 1; + + if (cont == NULL) return(-1); + DEBUG_VALID_STATE(*child, cont) + ret = xmlValidateFindNextElement(ctxt, child, cont); + if (ret < 0) + return(-1); + DEBUG_VALID_STATE(*child, cont) + switch (cont->type) { + case XML_ELEMENT_CONTENT_PCDATA: + if (*child == NULL) return(0); + if ((*child)->type == XML_TEXT_NODE) return(1); + return(0); + case XML_ELEMENT_CONTENT_ELEMENT: + if (*child == NULL) return(0); + ret = (xmlStrEqual((*child)->name, cont->name)); + if (ret == 1) { + while ((*child)->next == NULL) { + if (((*child)->parent != NULL) && + ((*child)->parent->type == XML_ENTITY_DECL)) { + *child = nodeVPop(ctxt); + } else + break; + } + *child = (*child)->next; + } + return(ret); + case XML_ELEMENT_CONTENT_OR: + cur = *child; + ret = xmlValidateElementTypeElement(ctxt, child, cont->c1); + if (ret == -1) return(-1); + if (ret == 1) { + return(1); + } + /* rollback and retry the other path */ + *child = cur; + ret = xmlValidateElementTypeElement(ctxt, child, cont->c2); + if (ret == -1) return(-1); + if (ret == 0) { + *child = cur; + return(0); + } + return(1); + case XML_ELEMENT_CONTENT_SEQ: + cur = *child; + ret = xmlValidateElementTypeElement(ctxt, child, cont->c1); + if (ret == -1) return(-1); + if (ret == 0) { + *child = cur; + return(0); + } + ret = xmlValidateElementTypeElement(ctxt, child, cont->c2); + if (ret == -1) return(-1); + if (ret == 0) { + *child = cur; + return(0); + } + return(1); + } + return(ret); +} + +/** + * xmlValidateElementTypeElement: + * @ctxt: the validation context + * @child: pointer to the child list + * @cont: pointer to the content declaration + * + * Try to validate the content of an element of type element + * yeah, Yet Another Regexp Implementation, and recursive + * + * returns 1 if valid or 0 and -1 if PCDATA stuff is found, + * also update child and content values in-situ. + */ + +int +xmlValidateElementTypeElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child, + xmlElementContentPtr cont) { + xmlNodePtr cur; + int ret; + + if (cont == NULL) return(-1); + + DEBUG_VALID_STATE(*child, cont) + ret = xmlValidateFindNextElement(ctxt, child, cont); + if (ret < 0) + return(-1); + DEBUG_VALID_STATE(*child, cont) + cur = *child; + ret = xmlValidateElementTypeExpr(ctxt, child, cont); + if (ret == -1) return(-1); + switch (cont->ocur) { + case XML_ELEMENT_CONTENT_ONCE: + if (ret == 1) { + /* skip ignorable elems */ + while ((*child != NULL) && + ((*child)->type == XML_PI_NODE + || (*child)->type == XML_COMMENT_NODE + || (*child)->type == XML_XINCLUDE_START + || (*child)->type == XML_XINCLUDE_END)) { + while ((*child)->next == NULL) { + if (((*child)->parent != NULL) && + ((*child)->parent->type == XML_ENTITY_REF_NODE)) { + *child = (*child)->parent; + } else + break; + } + *child = (*child)->next; + } + return(1); + } + *child = cur; + return(0); + case XML_ELEMENT_CONTENT_OPT: + if (ret == 0) { + *child = cur; + return(1); + } + break; + case XML_ELEMENT_CONTENT_MULT: + if (ret == 0) { + *child = cur; + break; + } + /* no break on purpose */ + case XML_ELEMENT_CONTENT_PLUS: + if (ret == 0) { + *child = cur; + return(0); + } + if (ret == -1) return(-1); + cur = *child; + do { + if (*child == NULL) + break; /* while */ + if ((*child)->type == XML_TEXT_NODE + && xmlIsBlankNode(*child)) { + *child = (*child)->next; + continue; + } + ret = xmlValidateElementTypeExpr(ctxt, child, cont); + if (ret == 1) + cur = *child; + } while (ret == 1); + if (ret == -1) return(-1); + *child = cur; + break; + } + + return xmlValidateFindNextElement(ctxt, child, cont); +} + +/** + * xmlSprintfElementChilds: + * @buf: an output buffer + * @content: An element + * @glob: 1 if one must print the englobing parenthesis, 0 otherwise + * + * This will dump the list of childs to the buffer + * Intended just for the debug routine + */ +void +xmlSprintfElementChilds(char *buf, xmlNodePtr node, int glob) { + xmlNodePtr cur; + + if (node == NULL) return; + if (glob) strcat(buf, "("); + cur = node->children; + while (cur != NULL) { + switch (cur->type) { + case XML_ELEMENT_NODE: + strcat(buf, (char *) cur->name); + if (cur->next != NULL) + strcat(buf, " "); + break; + case XML_TEXT_NODE: + if (xmlIsBlankNode(cur)) + break; + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + strcat(buf, "CDATA"); + if (cur->next != NULL) + strcat(buf, " "); + break; + case XML_ATTRIBUTE_NODE: + case XML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_NAMESPACE_DECL: + strcat(buf, "???"); + if (cur->next != NULL) + strcat(buf, " "); + break; + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_DTD_NODE: + case XML_COMMENT_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + break; + } + cur = cur->next; + } + if (glob) strcat(buf, ")"); +} + + +/** + * xmlValidateOneElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * + * Try to validate a single element and it's attributes, + * basically it does the following checks as described by the + * XML-1.0 recommendation: + * - [ VC: Element Valid ] + * - [ VC: Required Attribute ] + * Then call xmlValidateOneAttribute() for each attribute present. + * + * The ID/IDREF checkings are done separately + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem) { + xmlElementPtr elemDecl = NULL; + xmlElementContentPtr cont; + xmlAttributePtr attr; + xmlNodePtr child; + int ret = 1; + const xmlChar *name; + + CHECK_DTD; + + if (elem == NULL) return(0); + if (elem->type == XML_TEXT_NODE) { + } + switch (elem->type) { + case XML_ATTRIBUTE_NODE: + VERROR(ctxt->userData, + "Attribute element not expected here\n"); + return(0); + case XML_TEXT_NODE: + if (elem->children != NULL) { + VERROR(ctxt->userData, "Text element has childs !\n"); + return(0); + } + if (elem->properties != NULL) { + VERROR(ctxt->userData, "Text element has attributes !\n"); + return(0); + } + if (elem->ns != NULL) { + VERROR(ctxt->userData, "Text element has namespace !\n"); + return(0); + } + if (elem->nsDef != NULL) { + VERROR(ctxt->userData, + "Text element carries namespace definitions !\n"); + return(0); + } + if (elem->content == NULL) { + VERROR(ctxt->userData, + "Text element has no content !\n"); + return(0); + } + return(1); + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return(1); + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + return(1); + case XML_ENTITY_NODE: + VERROR(ctxt->userData, + "Entity element not expected here\n"); + return(0); + case XML_NOTATION_NODE: + VERROR(ctxt->userData, + "Notation element not expected here\n"); + return(0); + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + VERROR(ctxt->userData, + "Document element not expected here\n"); + return(0); + case XML_HTML_DOCUMENT_NODE: + VERROR(ctxt->userData, + "\n"); + return(0); + case XML_ELEMENT_NODE: + break; + default: + VERROR(ctxt->userData, + "unknown element type %d\n", elem->type); + return(0); + } + if (elem->name == NULL) return(0); + + /* + * Fetch the declaration for the qualified name + */ + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) { + elemDecl = xmlGetDtdQElementDesc(doc->intSubset, + elem->name, elem->ns->prefix); + if ((elemDecl == NULL) && (doc->extSubset != NULL)) + elemDecl = xmlGetDtdQElementDesc(doc->extSubset, + elem->name, elem->ns->prefix); + } + + /* + * Fetch the declaration for the non qualified name + */ + if (elemDecl == NULL) { + elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name); + if ((elemDecl == NULL) && (doc->extSubset != NULL)) + elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name); + } + if (elemDecl == NULL) { + VERROR(ctxt->userData, "No declaration for element %s\n", + elem->name); + return(0); + } + + /* Check taht the element content matches the definition */ + switch (elemDecl->etype) { + case XML_ELEMENT_TYPE_EMPTY: + if (elem->children != NULL) { + VERROR(ctxt->userData, + "Element %s was declared EMPTY this one has content\n", + elem->name); + ret = 0; + } + break; + case XML_ELEMENT_TYPE_ANY: + /* I don't think anything is required then */ + break; + case XML_ELEMENT_TYPE_MIXED: + /* Hum, this start to get messy */ + child = elem->children; + while (child != NULL) { + if (child->type == XML_ELEMENT_NODE) { + name = child->name; + if ((child->ns != NULL) && (child->ns->prefix != NULL)) { + xmlChar qname[500]; +#ifdef HAVE_SNPRINTF + snprintf((char *) qname, sizeof(qname), "%s:%s", + child->ns->prefix, child->name); +#else + sprintf((char *) qname, "%s:%s", + child->ns->prefix, child->name); +#endif + qname[sizeof(qname) - 1] = 0; + cont = elemDecl->content; + while (cont != NULL) { + if (cont->type == XML_ELEMENT_CONTENT_ELEMENT) { + if (xmlStrEqual(cont->name, qname)) break; + } else if ((cont->type == XML_ELEMENT_CONTENT_OR) && + (cont->c1 != NULL) && + (cont->c1->type == XML_ELEMENT_CONTENT_ELEMENT)){ + if (xmlStrEqual(cont->c1->name, qname)) break; + } else if ((cont->type != XML_ELEMENT_CONTENT_OR) || + (cont->c1 == NULL) || + (cont->c1->type != XML_ELEMENT_CONTENT_PCDATA)){ + /* Internal error !!! */ + xmlGenericError(xmlGenericErrorContext, + "Internal: MIXED struct bad\n"); + break; + } + cont = cont->c2; + } + if (cont != NULL) + goto child_ok; + } + cont = elemDecl->content; + while (cont != NULL) { + if (cont->type == XML_ELEMENT_CONTENT_ELEMENT) { + if (xmlStrEqual(cont->name, name)) break; + } else if ((cont->type == XML_ELEMENT_CONTENT_OR) && + (cont->c1 != NULL) && + (cont->c1->type == XML_ELEMENT_CONTENT_ELEMENT)) { + if (xmlStrEqual(cont->c1->name, name)) break; + } else if ((cont->type != XML_ELEMENT_CONTENT_OR) || + (cont->c1 == NULL) || + (cont->c1->type != XML_ELEMENT_CONTENT_PCDATA)) { + /* Internal error !!! */ + xmlGenericError(xmlGenericErrorContext, + "Internal: MIXED struct bad\n"); + break; + } + cont = cont->c2; + } + if (cont == NULL) { + VERROR(ctxt->userData, + "Element %s is not declared in %s list of possible childs\n", + name, elem->name); + ret = 0; + } + } +child_ok: + child = child->next; + } + break; + case XML_ELEMENT_TYPE_ELEMENT: + child = elem->children; + cont = elemDecl->content; + ret = xmlValidateElementTypeElement(ctxt, &child, cont); + while ((child != NULL) && (child->type == XML_TEXT_NODE) && + (xmlIsBlankNode(child))) { + child = child->next; + continue; + } + if ((ret == 0) || (child != NULL)) { + char expr[1000]; + char list[2000]; + + expr[0] = 0; + xmlSprintfElementContent(expr, cont, 1); + list[0] = 0; + xmlSprintfElementChilds(list, elem, 1); + + VERROR(ctxt->userData, + "Element %s content doesn't follow the Dtd\nExpecting %s, got %s\n", + elem->name, expr, list); + ret = 0; + } + break; + } + + /* [ VC: Required Attribute ] */ + attr = elemDecl->attributes; + while (attr != NULL) { + if (attr->def == XML_ATTRIBUTE_REQUIRED) { + xmlAttrPtr attrib; + int qualified = -1; + + attrib = elem->properties; + while (attrib != NULL) { + if (xmlStrEqual(attrib->name, attr->name)) { + if (attr->prefix != NULL) { + xmlNsPtr nameSpace = attrib->ns; + + if (nameSpace == NULL) + nameSpace = elem->ns; + /* + * qualified names handling is problematic, having a + * different prefix should be possible but DTDs don't + * allow to define the URI instead of the prefix :-( + */ + if (nameSpace == NULL) { + if (qualified < 0) + qualified = 0; + } else if (!xmlStrEqual(nameSpace->prefix, attr->prefix)) { + if (qualified < 1) + qualified = 1; + } else + goto found; + } else { + /* + * We should allow applications to define namespaces + * for their application even if the DTD doesn't + * carry one, otherwise, basically we would always + * break. + */ + goto found; + } + } + attrib = attrib->next; + } + if (qualified == -1) { + if (attr->prefix == NULL) { + VERROR(ctxt->userData, + "Element %s doesn't carry attribute %s\n", + elem->name, attr->name); + ret = 0; + } else { + VERROR(ctxt->userData, + "Element %s doesn't carry attribute %s:%s\n", + elem->name, attr->prefix,attr->name); + ret = 0; + } + } else if (qualified == 0) { + VWARNING(ctxt->userData, + "Element %s required attribute %s:%s has no prefix\n", + elem->name, attr->prefix,attr->name); + } else if (qualified == 1) { + VWARNING(ctxt->userData, + "Element %s required attribute %s:%s has different prefix\n", + elem->name, attr->prefix,attr->name); + } + } +found: + attr = attr->nexth; + } + return(ret); +} + +/** + * xmlValidateRoot: + * @ctxt: the validation context + * @doc: a document instance + * + * Try to validate a the root element + * basically it does the following check as described by the + * XML-1.0 recommendation: + * - [ VC: Root Element Type ] + * it doesn't try to recurse or apply other check to the element + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateRoot(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + xmlNodePtr root; + if (doc == NULL) return(0); + + root = xmlDocGetRootElement(doc); + if ((root == NULL) || (root->name == NULL)) { + VERROR(ctxt->userData, "Not valid: no root element\n"); + return(0); + } + + /* + * When doing post validation against a separate DTD, those may + * no internal subset has been generated + */ + if ((doc->intSubset != NULL) && + (doc->intSubset->name != NULL)) { + /* + * Check first the document root against the NQName + */ + if (!xmlStrEqual(doc->intSubset->name, root->name)) { + if ((root->ns != NULL) && (root->ns->prefix != NULL)) { + xmlChar qname[500]; +#ifdef HAVE_SNPRINTF + snprintf((char *) qname, sizeof(qname), "%s:%s", + root->ns->prefix, root->name); +#else + sprintf((char *) qname, "%s:%s", root->ns->prefix, root->name); +#endif + qname[sizeof(qname) - 1] = 0; + if (xmlStrEqual(doc->intSubset->name, qname)) + goto name_ok; + } + if ((xmlStrEqual(doc->intSubset->name, BAD_CAST "HTML")) && + (xmlStrEqual(root->name, BAD_CAST "html"))) + goto name_ok; + VERROR(ctxt->userData, + "Not valid: root and DtD name do not match '%s' and '%s'\n", + root->name, doc->intSubset->name); + return(0); + + } + } +name_ok: + return(1); +} + + +/** + * xmlValidateElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * + * Try to validate the subtree under an element + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr elem) { + xmlNodePtr child; + xmlAttrPtr attr; + xmlChar *value; + int ret = 1; + + if (elem == NULL) return(0); + + /* + * XInclude elements were added after parsing in the infoset, + * they don't really mean anything validation wise. + */ + if ((elem->type == XML_XINCLUDE_START) || + (elem->type == XML_XINCLUDE_END)) + return(1); + + CHECK_DTD; + + ret &= xmlValidateOneElement(ctxt, doc, elem); + attr = elem->properties; + while(attr != NULL) { + value = xmlNodeListGetString(doc, attr->children, 0); + ret &= xmlValidateOneAttribute(ctxt, doc, elem, attr, value); + if (value != NULL) + xmlFree(value); + attr= attr->next; + } + child = elem->children; + while (child != NULL) { + ret &= xmlValidateElement(ctxt, doc, child); + child = child->next; + } + + return(ret); +} + + +void +xmlValidateCheckRefCallback(xmlRefPtr ref, xmlValidCtxtPtr ctxt, + const xmlChar *name) { + xmlAttrPtr id; + xmlAttrPtr attr; + + if (ref == NULL) + return; + attr = ref->attr; + if (attr == NULL) + return; + if (attr->atype == XML_ATTRIBUTE_IDREF) { + id = xmlGetID(ctxt->doc, name); + if (id == NULL) { + VERROR(ctxt->userData, + "IDREF attribute %s reference an unknown ID \"%s\"\n", + attr->name, name); + ctxt->valid = 0; + } + } else if (attr->atype == XML_ATTRIBUTE_IDREFS) { + xmlChar *dup, *str = NULL, *cur, save; + + dup = xmlStrdup(name); + if (dup == NULL) { + ctxt->valid = 0; + return; + } + cur = dup; + while (*cur != 0) { + str = cur; + while ((*cur != 0) && (!IS_BLANK(*cur))) cur++; + save = *cur; + *cur = 0; + id = xmlGetID(ctxt->doc, str); + if (id == NULL) { + VERROR(ctxt->userData, + "IDREFS attribute %s reference an unknown ID \"%s\"\n", + attr->name, str); + ctxt->valid = 0; + } + if (save == 0) + break; + *cur = save; + while (IS_BLANK(*cur)) cur++; + } + xmlFree(dup); + } +} + +/** + * xmlValidateDocumentFinal: + * @ctxt: the validation context + * @doc: a document instance + * + * Does the final step for the document validation once all the + * incremental validation steps have been completed + * + * basically it does the following checks described by the XML Rec + * + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateDocumentFinal(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + xmlRefTablePtr table; + + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlValidateDocumentFinal: doc == NULL\n"); + return(0); + } + + /* + * Check all the NOTATION/NOTATIONS attributes + */ + /* + * Check all the ENTITY/ENTITIES attributes definition for validity + */ + /* + * Check all the IDREF/IDREFS attributes definition for validity + */ + table = (xmlRefTablePtr) doc->refs; + ctxt->doc = doc; + ctxt->valid = 1; + xmlHashScan(table, (xmlHashScanner) xmlValidateCheckRefCallback, ctxt); + return(ctxt->valid); +} + +/** + * xmlValidateDtd: + * @ctxt: the validation context + * @doc: a document instance + * @dtd: a dtd instance + * + * Try to validate the document against the dtd instance + * + * basically it does check all the definitions in the DtD. + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateDtd(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlDtdPtr dtd) { + int ret; + xmlDtdPtr oldExt; + xmlNodePtr root; + + if (dtd == NULL) return(0); + if (doc == NULL) return(0); + oldExt = doc->extSubset; + doc->extSubset = dtd; + ret = xmlValidateRoot(ctxt, doc); + if (ret == 0) { + doc->extSubset = oldExt; + return(ret); + } + if (doc->ids != NULL) { + xmlFreeIDTable(doc->ids); + doc->ids = NULL; + } + if (doc->refs != NULL) { + xmlFreeRefTable(doc->refs); + doc->refs = NULL; + } + root = xmlDocGetRootElement(doc); + ret = xmlValidateElement(ctxt, doc, root); + ret &= xmlValidateDocumentFinal(ctxt, doc); + doc->extSubset = oldExt; + return(ret); +} + +void +xmlValidateAttributeCallback(xmlAttributePtr cur, xmlValidCtxtPtr ctxt, + const xmlChar *name) { + if (cur == NULL) + return; + switch (cur->atype) { + case XML_ATTRIBUTE_CDATA: + case XML_ATTRIBUTE_ID: + case XML_ATTRIBUTE_IDREF : + case XML_ATTRIBUTE_IDREFS: + case XML_ATTRIBUTE_NMTOKEN: + case XML_ATTRIBUTE_NMTOKENS: + case XML_ATTRIBUTE_ENUMERATION: + break; + case XML_ATTRIBUTE_ENTITY: + case XML_ATTRIBUTE_ENTITIES: + case XML_ATTRIBUTE_NOTATION: + if (cur->defaultValue != NULL) { + ctxt->valid &= xmlValidateAttributeValue2(ctxt, ctxt->doc, + cur->name, cur->atype, cur->defaultValue); + } + if (cur->tree != NULL) { + xmlEnumerationPtr tree = cur->tree; + while (tree != NULL) { + ctxt->valid &= xmlValidateAttributeValue2(ctxt, ctxt->doc, + cur->name, cur->atype, tree->name); + tree = tree->next; + } + } + } +} + +/** + * xmlValidateDtdFinal: + * @ctxt: the validation context + * @doc: a document instance + * + * Does the final step for the dtds validation once all the + * subsets have been parsed + * + * basically it does the following checks described by the XML Rec + * - check that ENTITY and ENTITIES type attributes default or + * possible values matches one of the defined entities. + * - check that NOTATION type attributes default or + * possible values matches one of the defined notations. + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateDtdFinal(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + int ret = 1; + xmlDtdPtr dtd; + xmlAttributeTablePtr table; + + if (doc == NULL) return(0); + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) + return(0); + ctxt->doc = doc; + ctxt->valid = ret; + dtd = doc->intSubset; + if ((dtd != NULL) && (dtd->attributes != NULL)) { + table = (xmlAttributeTablePtr) dtd->attributes; + xmlHashScan(table, (xmlHashScanner) xmlValidateAttributeCallback, ctxt); + } + dtd = doc->extSubset; + if ((dtd != NULL) && (dtd->attributes != NULL)) { + table = (xmlAttributeTablePtr) dtd->attributes; + xmlHashScan(table, (xmlHashScanner) xmlValidateAttributeCallback, ctxt); + } + return(ctxt->valid); +} + +/** + * xmlValidateDocument: + * @ctxt: the validation context + * @doc: a document instance + * + * Try to validate the document instance + * + * basically it does the all the checks described by the XML Rec + * i.e. validates the internal and external subset (if present) + * and validate the document tree. + * + * returns 1 if valid or 0 otherwise + */ + +int +xmlValidateDocument(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + int ret; + xmlNodePtr root; + + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) + return(0); + if ((doc->intSubset != NULL) && ((doc->intSubset->SystemID != NULL) || + (doc->intSubset->ExternalID != NULL)) && (doc->extSubset == NULL)) { + doc->extSubset = xmlParseDTD(doc->intSubset->ExternalID, + doc->intSubset->SystemID); + if (doc->extSubset == NULL) { + if (doc->intSubset->SystemID != NULL) { + VERROR(ctxt->userData, + "Could not load the external subset \"%s\"\n", + doc->intSubset->SystemID); + } else { + VERROR(ctxt->userData, + "Could not load the external subset \"%s\"\n", + doc->intSubset->ExternalID); + } + return(0); + } + } + + if (doc->ids != NULL) { + xmlFreeIDTable(doc->ids); + doc->ids = NULL; + } + if (doc->refs != NULL) { + xmlFreeRefTable(doc->refs); + doc->refs = NULL; + } + ret = xmlValidateDtdFinal(ctxt, doc); + if (!xmlValidateRoot(ctxt, doc)) return(0); + + root = xmlDocGetRootElement(doc); + ret &= xmlValidateElement(ctxt, doc, root); + ret &= xmlValidateDocumentFinal(ctxt, doc); + return(ret); +} + + +/************************************************************************ + * * + * Routines for dynamic validation editing * + * * + ************************************************************************/ + +/** + * xmlValidGetPotentialChildren: + * @ctree: an element content tree + * @list: an array to store the list of child names + * @len: a pointer to the number of element in the list + * @max: the size of the array + * + * Build/extend a list of potential children allowed by the content tree + * + * returns the number of element in the list, or -1 in case of error. + */ + +int +xmlValidGetPotentialChildren(xmlElementContent *ctree, const xmlChar **list, + int *len, int max) { + int i; + + if ((ctree == NULL) || (list == NULL) || (len == NULL)) + return(-1); + if (*len >= max) return(*len); + + switch (ctree->type) { + case XML_ELEMENT_CONTENT_PCDATA: + for (i = 0; i < *len;i++) + if (xmlStrEqual(BAD_CAST "#PCDATA", list[i])) return(*len); + list[(*len)++] = BAD_CAST "#PCDATA"; + break; + case XML_ELEMENT_CONTENT_ELEMENT: + for (i = 0; i < *len;i++) + if (xmlStrEqual(ctree->name, list[i])) return(*len); + list[(*len)++] = ctree->name; + break; + case XML_ELEMENT_CONTENT_SEQ: + xmlValidGetPotentialChildren(ctree->c1, list, len, max); + xmlValidGetPotentialChildren(ctree->c2, list, len, max); + break; + case XML_ELEMENT_CONTENT_OR: + xmlValidGetPotentialChildren(ctree->c1, list, len, max); + xmlValidGetPotentialChildren(ctree->c2, list, len, max); + break; + } + + return(*len); +} + +/** + * xmlValidGetValidElements: + * @prev: an element to insert after + * @next: an element to insert next + * @list: an array to store the list of child names + * @max: the size of the array + * + * This function returns the list of authorized children to insert + * within an existing tree while respecting the validity constraints + * forced by the Dtd. The insertion point is defined using @prev and + * @next in the following ways: + * to insert before 'node': xmlValidGetValidElements(node->prev, node, ... + * to insert next 'node': xmlValidGetValidElements(node, node->next, ... + * to replace 'node': xmlValidGetValidElements(node->prev, node->next, ... + * to prepend a child to 'node': xmlValidGetValidElements(NULL, node->childs, + * to append a child to 'node': xmlValidGetValidElements(node->last, NULL, ... + * + * pointers to the element names are inserted at the beginning of the array + * and do not need to be freed. + * + * returns the number of element in the list, or -1 in case of error. If + * the function returns the value @max the caller is invited to grow the + * receiving array and retry. + */ + +int +xmlValidGetValidElements(xmlNode *prev, xmlNode *next, const xmlChar **list, + int max) { + int nb_valid_elements = 0; + const xmlChar *elements[256]; + int nb_elements = 0, i; + + xmlNode *ref_node; + xmlNode *parent; + xmlNode *test_node; + + xmlNode *prev_next; + xmlNode *next_prev; + xmlNode *parent_childs; + xmlNode *parent_last; + + xmlElement *element_desc; + + if (prev == NULL && next == NULL) + return(-1); + + if (list == NULL) return(-1); + if (max <= 0) return(-1); + + nb_valid_elements = 0; + ref_node = prev ? prev : next; + parent = ref_node->parent; + + /* + * Retrieves the parent element declaration + */ + element_desc = xmlGetDtdElementDesc(parent->doc->intSubset, + parent->name); + if ((element_desc == NULL) && (parent->doc->extSubset != NULL)) + element_desc = xmlGetDtdElementDesc(parent->doc->extSubset, + parent->name); + if (element_desc == NULL) return(-1); + + /* + * Do a backup of the current tree structure + */ + prev_next = prev ? prev->next : NULL; + next_prev = next ? next->prev : NULL; + parent_childs = parent->children; + parent_last = parent->last; + + /* + * Creates a dummy node and insert it into the tree + */ + test_node = xmlNewNode (NULL, BAD_CAST ""); + test_node->doc = ref_node->doc; + test_node->parent = parent; + test_node->prev = prev; + test_node->next = next; + + if (prev) prev->next = test_node; + else parent->children = test_node; + + if (next) next->prev = test_node; + else parent->last = test_node; + + /* + * Insert each potential child node and check if the parent is + * still valid + */ + nb_elements = xmlValidGetPotentialChildren(element_desc->content, + elements, &nb_elements, 256); + + for (i = 0;i < nb_elements;i++) { + test_node->name = elements[i]; + if (xmlValidateOneElement(NULL, parent->doc, parent)) { + int j; + + for (j = 0; j < nb_valid_elements;j++) + if (xmlStrEqual(elements[i], list[j])) break; + list[nb_valid_elements++] = elements[i]; + if (nb_valid_elements >= max) break; + } + } + + /* + * Restore the tree structure + */ + if (prev) prev->next = prev_next; + if (next) next->prev = next_prev; + parent->children = parent_childs; + parent->last = parent_last; + + return(nb_valid_elements); +} diff --git a/valid.h b/valid.h new file mode 100644 index 00000000..a7eb675d --- /dev/null +++ b/valid.h @@ -0,0 +1,236 @@ +/* + * valid.h : interface to the DTD handling and the validity checking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef __XML_VALID_H__ +#define __XML_VALID_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * an xmlValidCtxt is used for error reporting when validating + */ + +typedef void (*xmlValidityErrorFunc) (void *ctx, const char *msg, ...); +typedef void (*xmlValidityWarningFunc) (void *ctx, const char *msg, ...); + +typedef struct _xmlValidCtxt xmlValidCtxt; +typedef xmlValidCtxt *xmlValidCtxtPtr; +struct _xmlValidCtxt { + void *userData; /* user specific data block */ + xmlValidityErrorFunc error; /* the callback in case of errors */ + xmlValidityWarningFunc warning; /* the callback in case of warning */ + + /* Node analysis stack used when validating within entities */ + xmlNodePtr node; /* Current parsed Node */ + int nodeNr; /* Depth of the parsing stack */ + int nodeMax; /* Max depth of the parsing stack */ + xmlNodePtr *nodeTab; /* array of nodes */ + + int finishDtd; /* finished validating the Dtd ? */ + xmlDocPtr doc; /* the document */ + int valid; /* temporary validity check result */ +}; + +/* + * ALl notation declarations are stored in a table + * there is one table per DTD + */ + +typedef struct _xmlHashTable xmlNotationTable; +typedef xmlNotationTable *xmlNotationTablePtr; + +/* + * ALl element declarations are stored in a table + * there is one table per DTD + */ + +typedef struct _xmlHashTable xmlElementTable; +typedef xmlElementTable *xmlElementTablePtr; + +/* + * ALl attribute declarations are stored in a table + * there is one table per DTD + */ + +typedef struct _xmlHashTable xmlAttributeTable; +typedef xmlAttributeTable *xmlAttributeTablePtr; + +/* + * ALl IDs attributes are stored in a table + * there is one table per document + */ + +typedef struct _xmlHashTable xmlIDTable; +typedef xmlIDTable *xmlIDTablePtr; + +/* + * ALl Refs attributes are stored in a table + * there is one table per document + */ + +typedef struct _xmlHashTable xmlRefTable; +typedef xmlRefTable *xmlRefTablePtr; + +/* helper */ +xmlChar * xmlSplitQName2 (const xmlChar *name, + xmlChar **prefix); + +/* Notation */ +xmlNotationPtr xmlAddNotationDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *name, + const xmlChar *PublicID, + const xmlChar *SystemID); +xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table); +void xmlFreeNotationTable(xmlNotationTablePtr table); +void xmlDumpNotationDecl (xmlBufferPtr buf, + xmlNotationPtr nota); +void xmlDumpNotationTable(xmlBufferPtr buf, + xmlNotationTablePtr table); + +/* Element Content */ +xmlElementContentPtr xmlNewElementContent (xmlChar *name, + xmlElementContentType type); +xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content); +void xmlFreeElementContent(xmlElementContentPtr cur); +void xmlSprintfElementContent(char *buf, + xmlElementContentPtr content, + int glob); + +/* Element */ +xmlElementPtr xmlAddElementDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *name, + xmlElementTypeVal type, + xmlElementContentPtr content); +xmlElementTablePtr xmlCopyElementTable (xmlElementTablePtr table); +void xmlFreeElementTable (xmlElementTablePtr table); +void xmlDumpElementTable (xmlBufferPtr buf, + xmlElementTablePtr table); +void xmlDumpElementDecl (xmlBufferPtr buf, + xmlElementPtr elem); + +/* Enumeration */ +xmlEnumerationPtr xmlCreateEnumeration (xmlChar *name); +void xmlFreeEnumeration (xmlEnumerationPtr cur); +xmlEnumerationPtr xmlCopyEnumeration (xmlEnumerationPtr cur); + +/* Attribute */ +xmlAttributePtr xmlAddAttributeDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name, + const xmlChar *ns, + xmlAttributeType type, + xmlAttributeDefault def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +xmlAttributeTablePtr xmlCopyAttributeTable (xmlAttributeTablePtr table); +void xmlFreeAttributeTable (xmlAttributeTablePtr table); +void xmlDumpAttributeTable (xmlBufferPtr buf, + xmlAttributeTablePtr table); +void xmlDumpAttributeDecl (xmlBufferPtr buf, + xmlAttributePtr attr); + +/* IDs */ +xmlIDPtr xmlAddID (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *value, + xmlAttrPtr attr); +xmlIDTablePtr xmlCopyIDTable (xmlIDTablePtr table); +void xmlFreeIDTable (xmlIDTablePtr table); +xmlAttrPtr xmlGetID (xmlDocPtr doc, + const xmlChar *ID); +int xmlIsID (xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr); +int xmlRemoveID (xmlDocPtr doc, xmlAttrPtr attr); + +/* IDREFs */ +xmlRefPtr xmlAddRef (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *value, + xmlAttrPtr attr); +xmlRefTablePtr xmlCopyRefTable (xmlRefTablePtr table); +void xmlFreeRefTable (xmlRefTablePtr table); +int xmlIsRef (xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr); +int xmlRemoveRef (xmlDocPtr doc, xmlAttrPtr attr); + +/** + * The public function calls related to validity checking + */ + +int xmlValidateRoot (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateElementDecl (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlElementPtr elem); +xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *name, + const xmlChar *value); +int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlAttributePtr attr); +int xmlValidateAttributeValue(xmlAttributeType type, + const xmlChar *value); +int xmlValidateNotationDecl (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNotationPtr nota); +int xmlValidateDtd (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlDtdPtr dtd); +int xmlValidateDtdFinal (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateDocument (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +int xmlValidateOneElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +int xmlValidateOneAttribute (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr, + const xmlChar *value); +int xmlValidateDocumentFinal(xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +int xmlValidateNotationUse (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *notationName); +int xmlIsMixedElement (xmlDocPtr doc, + const xmlChar *name); +xmlAttributePtr xmlGetDtdAttrDesc (xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name); +xmlNotationPtr xmlGetDtdNotationDesc (xmlDtdPtr dtd, + const xmlChar *name); +xmlElementPtr xmlGetDtdElementDesc (xmlDtdPtr dtd, + const xmlChar *name); + +int xmlValidGetValidElements(xmlNode *prev, + xmlNode *next, + const xmlChar **list, + int max); +int xmlValidGetPotentialChildren(xmlElementContent *ctree, + const xmlChar **list, + int *len, + int max); +#ifdef __cplusplus +} +#endif +#endif /* __XML_VALID_H__ */ diff --git a/xinclude.c b/xinclude.c new file mode 100644 index 00000000..c377f64e --- /dev/null +++ b/xinclude.c @@ -0,0 +1,806 @@ +/* + * xinclude.c : Code to implement XInclude processing + * + * World Wide Web Consortium Working Draft 26 October 2000 + * http://www.w3.org/TR/2000/WD-xinclude-20001026 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +/* + * TODO: compute XPointers nodesets + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef LIBXML_DEBUG_ENABLED +#include +#endif +#include + +#ifdef LIBXML_XINCLUDE_ENABLED +#include + +#define XINCLUDE_NS (const xmlChar *) "http://www.w3.org/1999/XML/xinclude" +#define XINCLUDE_NODE (const xmlChar *) "include" +#define XINCLUDE_HREF (const xmlChar *) "href" +#define XINCLUDE_PARSE (const xmlChar *) "parse" +#define XINCLUDE_PARSE_XML (const xmlChar *) "xml" +#define XINCLUDE_PARSE_TEXT (const xmlChar *) "text" + +/* #define DEBUG_XINCLUDE */ + +/************************************************************************ + * * + * XInclude contexts handling * + * * + ************************************************************************/ + +/* + * An XInclude context + */ +typedef xmlChar *URL; +typedef struct _xmlXIncludeCtxt xmlXIncludeCtxt; +typedef xmlXIncludeCtxt *xmlXIncludeCtxtPtr; +struct _xmlXIncludeCtxt { + xmlDocPtr doc; /* the source document */ + int incNr; /* number of includes */ + int incMax; /* size of includes tab */ + xmlNodePtr *incTab; /* array of include nodes */ + xmlNodePtr *repTab; /* array of replacement node lists */ + int docNr; /* number of parsed documents */ + int docMax; /* size of parsed documents tab */ + xmlDocPtr *docTab; /* array of parsed documents */ + URL *urlTab; /* array of parsed documents URLs */ + int txtNr; /* number of unparsed documents */ + int txtMax; /* size of unparsed documents tab */ + xmlNodePtr *txtTab; /* array of unparsed text nodes */ + URL *txturlTab; /* array of unparsed txtuments URLs */ +}; + +/** + * xmlXIncludeAddNode: + * @ctxt: the XInclude context + * @node: the new node + * + * Add a new node to process to an XInclude context + */ +void +xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) { + if (ctxt->incMax == 0) { + ctxt->incMax = 4; + ctxt->incTab = (xmlNodePtr *) xmlMalloc(ctxt->incMax * + sizeof(ctxt->incTab[0])); + if (ctxt->incTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + return; + } + ctxt->repTab = (xmlNodePtr *) xmlMalloc(ctxt->incMax * + sizeof(ctxt->repTab[0])); + if (ctxt->repTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + return; + } + } + if (ctxt->incNr >= ctxt->incMax) { + ctxt->incMax *= 2; + ctxt->incTab = (xmlNodePtr *) xmlRealloc(ctxt->incTab, + ctxt->incMax * sizeof(ctxt->incTab[0])); + if (ctxt->incTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return; + } + ctxt->repTab = (xmlNodePtr *) xmlRealloc(ctxt->repTab, + ctxt->incMax * sizeof(ctxt->repTab[0])); + if (ctxt->repTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return; + } + } + ctxt->incTab[ctxt->incNr] = node; + ctxt->repTab[ctxt->incNr] = NULL; + ctxt->incNr++; +} + +/** + * xmlXIncludeAddDoc: + * @ctxt: the XInclude context + * @doc: the new document + * @url: the associated URL + * + * Add a new document to the list + */ +void +xmlXIncludeAddDoc(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, const URL url) { + if (ctxt->docMax == 0) { + ctxt->docMax = 4; + ctxt->docTab = (xmlDocPtr *) xmlMalloc(ctxt->docMax * + sizeof(ctxt->docTab[0])); + if (ctxt->docTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + return; + } + ctxt->urlTab = (URL *) xmlMalloc(ctxt->docMax * + sizeof(ctxt->urlTab[0])); + if (ctxt->urlTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + return; + } + } + if (ctxt->docNr >= ctxt->docMax) { + ctxt->docMax *= 2; + ctxt->docTab = (xmlDocPtr *) xmlRealloc(ctxt->docTab, + ctxt->docMax * sizeof(ctxt->docTab[0])); + if (ctxt->docTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return; + } + ctxt->urlTab = (URL *) xmlRealloc(ctxt->urlTab, + ctxt->docMax * sizeof(ctxt->urlTab[0])); + if (ctxt->urlTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return; + } + } + ctxt->docTab[ctxt->docNr] = doc; + ctxt->urlTab[ctxt->docNr] = xmlStrdup(url); + ctxt->docNr++; +} + +/** + * xmlXIncludeAddTxt: + * @ctxt: the XInclude context + * @txt: the new text node + * @url: the associated URL + * + * Add a new txtument to the list + */ +void +xmlXIncludeAddTxt(xmlXIncludeCtxtPtr ctxt, xmlNodePtr txt, const URL url) { + if (ctxt->txtMax == 0) { + ctxt->txtMax = 4; + ctxt->txtTab = (xmlNodePtr *) xmlMalloc(ctxt->txtMax * + sizeof(ctxt->txtTab[0])); + if (ctxt->txtTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + return; + } + ctxt->txturlTab = (URL *) xmlMalloc(ctxt->txtMax * + sizeof(ctxt->txturlTab[0])); + if (ctxt->txturlTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "malloc failed !\n"); + return; + } + } + if (ctxt->txtNr >= ctxt->txtMax) { + ctxt->txtMax *= 2; + ctxt->txtTab = (xmlNodePtr *) xmlRealloc(ctxt->txtTab, + ctxt->txtMax * sizeof(ctxt->txtTab[0])); + if (ctxt->txtTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return; + } + ctxt->txturlTab = (URL *) xmlRealloc(ctxt->txturlTab, + ctxt->txtMax * sizeof(ctxt->urlTab[0])); + if (ctxt->txturlTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "realloc failed !\n"); + return; + } + } + ctxt->txtTab[ctxt->txtNr] = txt; + ctxt->txturlTab[ctxt->txtNr] = xmlStrdup(url); + ctxt->txtNr++; +} + +/** + * xmlXIncludeNewContext: + * @doc: an XML Document + * + * Creates a new XInclude context + * + * Returns the new set + */ +xmlXIncludeCtxtPtr +xmlXIncludeNewContext(xmlDocPtr doc) { + xmlXIncludeCtxtPtr ret; + + if (doc == NULL) + return(NULL); + ret = (xmlXIncludeCtxtPtr) xmlMalloc(sizeof(xmlXIncludeCtxt)); + if (ret == NULL) + return(NULL); + memset(ret, 0, sizeof(xmlXIncludeCtxt)); + ret->doc = doc; + ret->incNr = 0; + ret->incMax = 0; + ret->incTab = NULL; + ret->repTab = NULL; + ret->docNr = 0; + ret->docMax = 0; + ret->docTab = NULL; + ret->urlTab = NULL; + return(ret); +} + +/** + * xmlXIncludeFreeContext: + * @ctxt: the XInclude context + * + * Free an XInclude context + */ +void +xmlXIncludeFreeContext(xmlXIncludeCtxtPtr ctxt) { + int i; + + if (ctxt == NULL) + return; + for (i = 0;i < ctxt->docNr;i++) { + xmlFreeDoc(ctxt->docTab[i]); + if (ctxt->urlTab[i] != NULL) + xmlFree(ctxt->urlTab[i]); + } + for (i = 0;i < ctxt->txtNr;i++) { + if (ctxt->txturlTab[i] != NULL) + xmlFree(ctxt->txturlTab[i]); + } + if (ctxt->incTab != NULL) + xmlFree(ctxt->incTab); + if (ctxt->repTab != NULL) + xmlFree(ctxt->repTab); + if (ctxt->urlTab != NULL) + xmlFree(ctxt->urlTab); + if (ctxt->docTab != NULL) + xmlFree(ctxt->docTab); + if (ctxt->txtTab != NULL) + xmlFree(ctxt->txtTab); + if (ctxt->txturlTab != NULL) + xmlFree(ctxt->txturlTab); + memset(ctxt, 0xeb, sizeof(xmlXIncludeCtxt)); + xmlFree(ctxt); +} + +/************************************************************************ + * * + * XInclude I/O handling * + * * + ************************************************************************/ + +/** + * xmlXIncludeLoadDoc: + * @ctxt: the XInclude context + * @url: the associated URL + * @nr: the xinclude node number + * + * Load the document, and store the result in the XInclude context + */ +void +xmlXIncludeLoadDoc(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) { + xmlDocPtr doc; + xmlURIPtr uri; + xmlChar *URL; + xmlChar *fragment = NULL; + int i; + /* + * Check the URL and remove any fragment identifier + */ + uri = xmlParseURI((const char *)url); + if (uri == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", url); + return; + } + if (uri->fragment != NULL) { + fragment = (xmlChar *) uri->fragment; + uri->fragment = NULL; + } + URL = xmlSaveUri(uri); + xmlFreeURI(uri); + if (URL == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", url); + if (fragment != NULL) + xmlFree(fragment); + return; + } + + /* + * Handling of references to the local document are done + * directly through ctxt->doc. + */ + if ((URL[0] == 0) || (URL[0] == '#')) { + doc = NULL; + goto loaded; + } + + /* + * Prevent reloading twice the document. + */ + for (i = 0; i < ctxt->docNr; i++) { + if (xmlStrEqual(URL, ctxt->urlTab[i])) { + doc = ctxt->docTab[i]; + goto loaded; + } + } + /* + * Load it. + */ + doc = xmlParseFile((const char *)URL); + if (doc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: could not load %s\n", URL); + xmlFree(URL); + if (fragment != NULL) + xmlFree(fragment); + return; + } + xmlXIncludeAddDoc(ctxt, doc, URL); + +loaded: + if (fragment == NULL) { + /* + * Add the top children list as the replacement copy. + * ISSUE: seems we should scrap DTD info from the copied list. + */ + if (doc == NULL) + ctxt->repTab[nr] = xmlCopyNodeList(ctxt->doc->children); + else + ctxt->repTab[nr] = xmlCopyNodeList(doc->children); + } else { + /* + * Computes the XPointer expression and make a copy used + * as the replacement copy. + */ + xmlXPathObjectPtr xptr; + xmlXPathContextPtr xptrctxt; + + if (doc == NULL) { + xptrctxt = xmlXPtrNewContext(ctxt->doc, ctxt->incTab[nr], NULL); + } else { + xptrctxt = xmlXPtrNewContext(doc, NULL, NULL); + } + if (xptrctxt == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: could create XPointer context\n"); + xmlFree(URL); + xmlFree(fragment); + return; + } + xptr = xmlXPtrEval(fragment, xptrctxt); + if (xptr == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: XPointer evaluation failed: #%s\n", + fragment); + xmlXPathFreeContext(xptrctxt); + xmlFree(URL); + xmlFree(fragment); + return; + } + ctxt->repTab[nr] = xmlXPtrBuildNodeList(xptr); + xmlXPathFreeObject(xptr); + xmlXPathFreeContext(xptrctxt); + xmlFree(fragment); + } + xmlFree(URL); +} + +/** + * xmlXIncludeLoadTxt: + * @ctxt: the XInclude context + * @url: the associated URL + * @nr: the xinclude node number + * + * Load the content, and store the result in the XInclude context + */ +void +xmlXIncludeLoadTxt(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) { + xmlParserInputBufferPtr buf; + xmlNodePtr node; + xmlURIPtr uri; + xmlChar *URL; + int i; + /* + * Check the URL and remove any fragment identifier + */ + uri = xmlParseURI((const char *)url); + if (uri == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", url); + return; + } + if (uri->fragment != NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: fragment identifier forbidden for text: %s\n", + uri->fragment); + xmlFreeURI(uri); + return; + } + URL = xmlSaveUri(uri); + xmlFreeURI(uri); + if (URL == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value URI %s\n", url); + return; + } + + /* + * Handling of references to the local document are done + * directly through ctxt->doc. + */ + if (URL[0] == 0) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: text serialization of document not available\n"); + xmlFree(URL); + return; + } + + /* + * Prevent reloading twice the document. + */ + for (i = 0; i < ctxt->txtNr; i++) { + if (xmlStrEqual(URL, ctxt->txturlTab[i])) { + node = xmlCopyNode(ctxt->txtTab[i], 1); + goto loaded; + } + } + /* + * Load it. + * Issue 62: how to detect the encoding + */ + buf = xmlParserInputBufferCreateFilename((const char *)URL, 0); + if (buf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: could not load %s\n", URL); + xmlFree(URL); + return; + } + node = xmlNewText(NULL); + + /* + * Scan all chars from the resource and add the to the node + */ + while (xmlParserInputBufferRead(buf, 128) > 0) { + int len; + const xmlChar *content; + + content = xmlBufferContent(buf->buffer); + len = xmlBufferLength(buf->buffer); + for (i = 0;i < len; i++) { + /* + * TODO: if the encoding issue is solved, scan UTF8 chars instead + */ + if (!IS_CHAR(content[i])) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: %s contains invalid char %d\n", URL, content[i]); + } else { + xmlNodeAddContentLen(node, &content[i], 1); + } + } + xmlBufferShrink(buf->buffer, len); + } + xmlFreeParserInputBuffer(buf); + xmlXIncludeAddTxt(ctxt, node, URL); + +loaded: + /* + * Add the element as the replacement copy. + */ + ctxt->repTab[nr] = node; + xmlFree(URL); +} + +/************************************************************************ + * * + * XInclude Processing * + * * + ************************************************************************/ + +/** + * xmlXIncludePreProcessNode: + * @ctxt: an XInclude context + * @node: an XInclude node + * + * Implement the infoset replacement lookup on the XML element @node + * + * Returns the result list or NULL in case of error + */ +xmlNodePtr +xmlXIncludePreProcessNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) { + xmlXIncludeAddNode(ctxt, node); + return(0); +} + +/** + * xmlXIncludeLoadNode: + * @ctxt: an XInclude context + * @nr: the node number + * + * Find and load the infoset replacement for the given node. + * + * Returns 0 if substition succeeded, -1 if some processing failed + */ +int +xmlXIncludeLoadNode(xmlXIncludeCtxtPtr ctxt, int nr) { + xmlNodePtr cur; + xmlChar *href; + xmlChar *parse; + xmlChar *base; + xmlChar *URI; + int xml = 1; /* default Issue 64 */ + + if (ctxt == NULL) + return(-1); + if ((nr < 0) || (nr >= ctxt->incNr)) + return(-1); + cur = ctxt->incTab[nr]; + if (cur == NULL) + return(-1); + +#ifdef DEBUG_XINCLUDE + xmlDebugDumpNode(stdout, cur, 0); +#endif + /* + * read the attributes + */ + href = xmlGetNsProp(cur, XINCLUDE_NS, XINCLUDE_HREF); + if (href == NULL) { + href = xmlGetProp(cur, XINCLUDE_HREF); + if (href == NULL) { + xmlGenericError(xmlGenericErrorContext, "XInclude: no href\n"); + return(-1); + } + } + parse = xmlGetNsProp(cur, XINCLUDE_NS, XINCLUDE_PARSE); + if (parse == NULL) { + parse = xmlGetProp(cur, XINCLUDE_PARSE); + } + if (parse != NULL) { + if (xmlStrEqual(parse, XINCLUDE_PARSE_XML)) + xml = 1; + else if (xmlStrEqual(parse, XINCLUDE_PARSE_TEXT)) + xml = 0; + else { + xmlGenericError(xmlGenericErrorContext, + "XInclude: invalid value %s for %s\n", + parse, XINCLUDE_PARSE); + if (href != NULL) + xmlFree(href); + if (parse != NULL) + xmlFree(parse); + return(-1); + } + } + + /* + * compute the URI + */ + base = xmlNodeGetBase(ctxt->doc, cur); + if (base == NULL) { + URI = xmlBuildURI(href, ctxt->doc->URL); + } else { + URI = xmlBuildURI(href, base); + } + if (URI == NULL) { + xmlChar *escbase; + xmlChar *eschref; + /* + * Some escapeing may be needed + */ + escbase = xmlURIEscape(base); + eschref = xmlURIEscape(href); + URI = xmlBuildURI(eschref, escbase); + if (escbase != NULL) + xmlFree(escbase); + if (eschref != NULL) + xmlFree(eschref); + } + if (URI == NULL) { + xmlGenericError(xmlGenericErrorContext, "XInclude: failed build URL\n"); + if (parse != NULL) + xmlFree(parse); + if (href != NULL) + xmlFree(href); + if (base != NULL) + xmlFree(base); + return(-1); + } +#ifdef DEBUG_XINCLUDE + xmlGenericError(xmlGenericErrorContext, "parse: %s\n", + xml ? "xml": "text"); + xmlGenericError(xmlGenericErrorContext, "URI: %s\n", URI); +#endif + + /* + * Cleanup + */ + if (xml) { + xmlXIncludeLoadDoc(ctxt, URI, nr); + /* xmlXIncludeGetFragment(ctxt, cur, URI); */ + } else { + xmlXIncludeLoadTxt(ctxt, URI, nr); + } + + /* + * Cleanup + */ + if (URI != NULL) + xmlFree(URI); + if (parse != NULL) + xmlFree(parse); + if (href != NULL) + xmlFree(href); + if (base != NULL) + xmlFree(base); + return(0); +} + +/** + * xmlXIncludeIncludeNode: + * @ctxt: an XInclude context + * @nr: the node number + * + * Inplement the infoset replacement for the given node + * + * Returns 0 if substition succeeded, -1 if some processing failed + */ +int +xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + xmlNodePtr cur, end, list; + + if (ctxt == NULL) + return(-1); + if ((nr < 0) || (nr >= ctxt->incNr)) + return(-1); + cur = ctxt->incTab[nr]; + if (cur == NULL) + return(-1); + + /* + * Change the current node as an XInclude start one, and add an + * entity end one + */ + cur->type = XML_XINCLUDE_START; + end = xmlNewNode(cur->ns, cur->name); + if (end == NULL) { + xmlGenericError(xmlGenericErrorContext, + "XInclude: failed to build node\n"); + return(-1); + } + end->type = XML_XINCLUDE_END; + xmlAddNextSibling(cur, end); + + /* + * Add the list of nodes + */ + list = ctxt->repTab[nr]; + ctxt->repTab[nr] = NULL; + while (list != NULL) { + cur = list; + list = list->next; + + xmlAddPrevSibling(end, cur); + } + return(0); +} + +/** + * xmlXIncludeTestNode: + * @doc: an XML document + * @node: an XInclude node + * + * test if the node is an XInclude node + * + * Returns 1 true, 0 otherwise + */ +int +xmlXIncludeTestNode(xmlDocPtr doc, xmlNodePtr node) { + if (node == NULL) + return(0); + if (node->ns == NULL) + return(0); + if ((xmlStrEqual(node->name, XINCLUDE_NODE)) && + (xmlStrEqual(node->ns->href, XINCLUDE_NS))) return(1); + return(0); +} + +/** + * xmlXIncludeProcess: + * @doc: an XML document + * + * Implement the XInclude substitution on the XML document @doc + * + * Returns 0 if no substition were done, -1 if some processing failed + * or the number of substitutions done. + */ +int +xmlXIncludeProcess(xmlDocPtr doc) { + xmlXIncludeCtxtPtr ctxt; + xmlNodePtr cur; + int ret = 0; + int i; + + if (doc == NULL) + return(-1); + ctxt = xmlXIncludeNewContext(doc); + if (ctxt == NULL) + return(-1); + + /* + * First phase: lookup the elements in the document + */ + cur = xmlDocGetRootElement(doc); + if (xmlXIncludeTestNode(doc, cur)) + xmlXIncludePreProcessNode(ctxt, cur); + while (cur != NULL) { + /* TODO: need to work on entities -> stack */ + if ((cur->children != NULL) && + (cur->children->type != XML_ENTITY_DECL)) { + cur = cur->children; + if (xmlXIncludeTestNode(doc, cur)) + xmlXIncludePreProcessNode(ctxt, cur); + } else if (cur->next != NULL) { + cur = cur->next; + if (xmlXIncludeTestNode(doc, cur)) + xmlXIncludePreProcessNode(ctxt, cur); + } else { + do { + cur = cur->parent; + if (cur == NULL) break; /* do */ + if (cur->next != NULL) { + cur = cur->next; + if (xmlXIncludeTestNode(doc, cur)) + xmlXIncludePreProcessNode(ctxt, cur); + break; /* do */ + } + } while (cur != NULL); + } + } + + /* + * Second Phase : collect the infosets fragments + */ + for (i = 0;i < ctxt->incNr; i++) { + xmlXIncludeLoadNode(ctxt, i); + } + + /* + * Third phase: extend the original document infoset. + */ + for (i = 0;i < ctxt->incNr; i++) { + xmlXIncludeIncludeNode(ctxt, i); + } + + /* + * Cleanup + */ + xmlXIncludeFreeContext(ctxt); + return(ret); +} + +#else /* !LIBXML_XINCLUDE_ENABLED */ +#endif diff --git a/xinclude.h b/xinclude.h new file mode 100644 index 00000000..eca4588c --- /dev/null +++ b/xinclude.h @@ -0,0 +1,26 @@ +/* + * xinclude.c : API to handle XInclude processing + * + * World Wide Web Consortium Working Draft 26 October 2000 + * http://www.w3.org/TR/2000/WD-xinclude-20001026 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_XINCLUDE_H__ +#define __XML_XINCLUDE_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int xmlXIncludeProcess (xmlDocPtr doc); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XINCLUDE_H__ */ diff --git a/xlink.c b/xlink.c new file mode 100644 index 00000000..68214902 --- /dev/null +++ b/xlink.c @@ -0,0 +1,187 @@ +/* + * xlink.c : implementation of the hyperlinks detection module + * This version supports both XML XLinks and HTML simple links + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include /* for memset() only */ +#ifdef HAVE_CTYPE_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_ZLIB_H +#include +#endif + +#include +#include +#include +#include +#include + +#define XLINK_NAMESPACE (BAD_CAST "http://www.w3.org/1999/xlink/namespace/") +#define XHTML_NAMESPACE (BAD_CAST "http://www.w3.org/1999/xhtml/") + +/**************************************************************** + * * + * Default setting and related functions * + * * + ****************************************************************/ + +xlinkHandlerPtr xlinkDefaultHandler = NULL; +xlinkNodeDetectFunc xlinkDefaultDetect = NULL; + +/** + * xlinkGetDefaultHandler: + * + * Get the default xlink handler. + * + * Returns the current xlinkHandlerPtr value. + */ +xlinkHandlerPtr +xlinkGetDefaultHandler(void) { + return(xlinkDefaultHandler); +} + + +/** + * xlinkSetDefaultHandler: + * @handler: the new value for the xlink handler block + * + * Set the default xlink handlers + */ +void +xlinkSetDefaultHandler(xlinkHandlerPtr handler) { + xlinkDefaultHandler = handler; +} + +/** + * xlinkGetDefaultDetect: + * + * Get the default xlink detection routine + * + * Returns the current function or NULL; + */ +xlinkNodeDetectFunc +xlinkGetDefaultDetect (void) { + return(xlinkDefaultDetect); +} + +/** + * xlinkSetDefaultDetect: + * @func: pointer to the new detction routine. + * + * Set the default xlink detection routine + */ +void +xlinkSetDefaultDetect (xlinkNodeDetectFunc func) { + xlinkDefaultDetect = func; +} + +/**************************************************************** + * * + * The detection routines * + * * + ****************************************************************/ + + +/** + * xlinkIsLink: + * @doc: the document containing the node + * @node: the node pointer itself + * + * Check whether the given node carries the attributes needed + * to be a link element (or is one of the linking elements issued + * from the (X)HTML DtDs). + * This routine don't try to do full checking of the link validity + * but tries to detect and return the appropriate link type. + * + * Returns the xlinkType of the node (XLINK_TYPE_NONE if there is no + * link detected. + */ +xlinkType +xlinkIsLink (xmlDocPtr doc, xmlNodePtr node) { + xmlChar *type = NULL, *role = NULL; + xlinkType ret = XLINK_TYPE_NONE; + + if (node == NULL) return(XLINK_TYPE_NONE); + if (doc == NULL) doc = node->doc; + if ((doc != NULL) && (doc->type == XML_HTML_DOCUMENT_NODE)) { + /* + * This is an HTML document. + */ + } else if ((node->ns != NULL) && + (xmlStrEqual(node->ns->href, XHTML_NAMESPACE))) { + /* + * !!!! We really need an IS_XHTML_ELEMENT function from HTMLtree.h @@@ + */ + /* + * This is an XHTML element within an XML document + * Check whether it's one of the element able to carry links + * and in that case if it holds the attributes. + */ + } + + /* + * We don't prevent a-priori having XML Linking constructs on + * XHTML elements + */ + type = xmlGetNsProp(node, BAD_CAST"type", XLINK_NAMESPACE); + if (type != NULL) { + if (!xmlStrEqual(type, BAD_CAST "simple")) { + ret = XLINK_TYPE_SIMPLE; + } if (!xmlStrEqual(type, BAD_CAST "extended")) { + role = xmlGetNsProp(node, BAD_CAST "role", XLINK_NAMESPACE); + if (role != NULL) { + xmlNsPtr xlink; + xlink = xmlSearchNs(doc, node, XLINK_NAMESPACE); + if (xlink == NULL) { + /* Humm, fallback method */ + if (xmlStrEqual(role, BAD_CAST"xlink:external-linkset")) + ret = XLINK_TYPE_EXTENDED_SET; + } else { + xmlChar buf[200]; +#ifdef HAVE_SNPRINTF + snprintf((char *) buf, sizeof(buf), "%s:external-linkset", + (char *) xlink->prefix); +#else + sprintf((char *) buf, "%s:external-linkset", + (char *) xlink->prefix); +#endif + buf[sizeof(buf) - 1] = 0; + if (xmlStrEqual(role, buf)) + ret = XLINK_TYPE_EXTENDED_SET; + + } + + } + ret = XLINK_TYPE_EXTENDED; + } + } + + if (type != NULL) xmlFree(type); + if (role != NULL) xmlFree(role); + return(ret); +} diff --git a/xlink.h b/xlink.h new file mode 100644 index 00000000..37a54151 --- /dev/null +++ b/xlink.h @@ -0,0 +1,182 @@ +/* + * xlink.h : interfaces to the hyperlinks detection module + * + * See Copyright for the status of this software. + * + * Related specification: http://www.w3.org/TR/xlink + * http://www.w3.org/HTML/ + * and XBase + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_XLINK_H__ +#define __XML_XLINK_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif +/** + * Various defines for the various Link properties. + * + * NOTE: the link detection layer will try to resolve QName expansion + * of namespaces, if "foo" is the prefix for "http://foo.com/" + * then the link detection layer will expand role="foo:myrole" + * to "http://foo.com/:myrole" + * NOTE: the link detection layer will expand URI-Refences found on + * href attributes by using the base mechanism if found. + */ +typedef xmlChar *xlinkHRef; +typedef xmlChar *xlinkRole; +typedef xmlChar *xlinkTitle; + +typedef enum { + XLINK_TYPE_NONE = 0, + XLINK_TYPE_SIMPLE, + XLINK_TYPE_EXTENDED, + XLINK_TYPE_EXTENDED_SET +} xlinkType; + +typedef enum { + XLINK_SHOW_NONE = 0, + XLINK_SHOW_NEW, + XLINK_SHOW_EMBED, + XLINK_SHOW_REPLACE +} xlinkShow; + +typedef enum { + XLINK_ACTUATE_NONE = 0, + XLINK_ACTUATE_AUTO, + XLINK_ACTUATE_ONREQUEST +} xlinkActuate; + +/** + * xlinkNodeDetectFunc: + * @ctx: user data pointer + * @node: the node to check + * + * This is the prototype for the link detection routine + * It calls the default link detection callbacks upon link detection. + */ +typedef void +(*xlinkNodeDetectFunc) (void *ctx, + xmlNodePtr node); + +/** + * The link detection module interract with the upper layers using + * a set of callback registered at parsing time. + */ + +/** + * xlinkSimpleLinkFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @href: the target of the link + * @role: the role string + * @title: the link title + * + * This is the prototype for a simple link detection callback. + */ +typedef void +(*xlinkSimpleLinkFunk) (void *ctx, + xmlNodePtr node, + const xlinkHRef href, + const xlinkRole role, + const xlinkTitle title); + +/** + * xlinkExtendedLinkFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @nbLocators: the number of locators detected on the link + * @hrefs: pointer to the array of locator hrefs + * @roles: pointer to the array of locator roles + * @nbArcs: the number of arcs detected on the link + * @from: pointer to the array of source roles found on the arcs + * @to: pointer to the array of target roles found on the arcs + * @show: array of values for the show attributes found on the arcs + * @actuate: array of values for the actuate attributes found on the arcs + * @nbTitles: the number of titles detected on the link + * @title: array of titles detected on the link + * @langs: array of xml:lang values for the titles + * + * This is the prototype for a extended link detection callback. + */ +typedef void +(*xlinkExtendedLinkFunk)(void *ctx, + xmlNodePtr node, + int nbLocators, + const xlinkHRef *hrefs, + const xlinkRole *roles, + int nbArcs, + const xlinkRole *from, + const xlinkRole *to, + xlinkShow *show, + xlinkActuate *actuate, + int nbTitles, + const xlinkTitle *titles, + const xmlChar **langs); + +/** + * xlinkExtendedLinkSetFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @nbLocators: the number of locators detected on the link + * @hrefs: pointer to the array of locator hrefs + * @roles: pointer to the array of locator roles + * @nbTitles: the number of titles detected on the link + * @title: array of titles detected on the link + * @langs: array of xml:lang values for the titles + * + * This is the prototype for a extended link set detection callback. + */ +typedef void +(*xlinkExtendedLinkSetFunk) (void *ctx, + xmlNodePtr node, + int nbLocators, + const xlinkHRef *hrefs, + const xlinkRole *roles, + int nbTitles, + const xlinkTitle *titles, + const xmlChar **langs); + +/** + * This is the structure containing a set of Links detection callbacks + * + * There is no default xlink callbacks, if one want to get link + * recognition activated, those call backs must be provided before parsing. + */ +typedef struct _xlinkHandler xlinkHandler; +typedef xlinkHandler *xlinkHandlerPtr; +struct _xlinkHandler { + xlinkSimpleLinkFunk simple; + xlinkExtendedLinkFunk extended; + xlinkExtendedLinkSetFunk set; +}; + +/** + * the default detection routine, can be overriden, they call the default + * detection callbacks. + */ + +xlinkNodeDetectFunc xlinkGetDefaultDetect (void); +void xlinkSetDefaultDetect (xlinkNodeDetectFunc func); + +/** + * Routines to set/get the default handlers. + */ +xlinkHandlerPtr xlinkGetDefaultHandler (void); +void xlinkSetDefaultHandler (xlinkHandlerPtr handler); + +/* + * Link detection module itself. + */ +xlinkType xlinkIsLink (xmlDocPtr doc, + xmlNodePtr node); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XLINK_H__ */ diff --git a/xml2-config.1 b/xml2-config.1 new file mode 100644 index 00000000..8a259620 --- /dev/null +++ b/xml2-config.1 @@ -0,0 +1,31 @@ +.TH GNOME-XML 1 "3 July 1999" Version 1.1.0 +.SH NAME +xml-config - script to get information about the installed version of GNOME-XML +.SH SYNOPSIS +.B xml-config +[\-\-prefix\fI[=DIR]\fP] [\-\-libs] [\-\-cflags] [\-\-version] [\-\-help] +.SH DESCRIPTION +\fIxml-config\fP is a tool that is used to determine the compile and +linker flags that should be used to compile and link programs that use +\fIGNOME-XML\fP. +.SH OPTIONS +.l +\fIxml-config\fP accepts the following options: +.TP 8 +.B \-\-version +Print the currently installed version of \fIGNOME-XML\fP on the standard output. +.TP 8 +.B \-\-libs +Print the linker flags that are necessary to link a \fIGNOME-XML\fP program. +.TP 8 +.B \-\-cflags +Print the compiler flags that are necessary to compile a \fIGNOME-XML\fP program. +.TP 8 +.B \-\-prefix=PREFIX +If specified, use PREFIX instead of the installation prefix that +\fIGNOME-XML\fP was built with when computing the output for the +\-\-cflags and \-\-libs options. This option must be specified before +any \-\-libs or \-\-cflags options. +.SH AUTHOR +This manual page was written by Fredrik Hallenberg , +for the Debian GNU/linux system (but may be used by others). diff --git a/xml2-config.in b/xml2-config.in new file mode 100644 index 00000000..768e336d --- /dev/null +++ b/xml2-config.in @@ -0,0 +1,72 @@ +#! /bin/sh + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +includedir=@includedir@ +libdir=@libdir@ + +usage() +{ + cat < +#include +#include + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_ZLIB_H +#include +#endif + +/* Figure a portable way to know if a file is a directory. */ +#ifndef HAVE_STAT +# ifdef HAVE__STAT +# define stat(x,y) _stat(x,y) +# define HAVE_STAT +# endif +#endif +#ifdef HAVE_STAT +# ifndef S_ISDIR +# ifdef _S_ISDIR +# define S_ISDIR(x) _S_ISDIR(x) +# else +# ifdef S_IFDIR +# ifndef S_IFMT +# ifdef _S_IFMT +# define S_IFMT _S_IFMT +# endif +# endif +# ifdef S_IFMT +# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +# endif +# endif +# endif +# endif +#endif + +#include +#include +#include +#include +#include +#include +#include + +#ifdef VMS +#define xmlRegisterDefaultInputCallbacks xmlRegisterDefInputCallbacks +#define xmlRegisterDefaultOutputCallbacks xmlRegisterDefOutputCallbacks +#endif + +/* #define VERBOSE_FAILURE */ +/* #define DEBUG_EXTERNAL_ENTITIES */ +/* #define DEBUG_INPUT */ + +#ifdef DEBUG_INPUT +#define MINLEN 40 +#else +#define MINLEN 4000 +#endif + +/* + * Input I/O callback sets + */ +typedef struct _xmlInputCallback { + xmlInputMatchCallback matchcallback; + xmlInputOpenCallback opencallback; + xmlInputReadCallback readcallback; + xmlInputCloseCallback closecallback; +} xmlInputCallback; + +#define MAX_INPUT_CALLBACK 15 + +xmlInputCallback xmlInputCallbackTable[MAX_INPUT_CALLBACK]; +int xmlInputCallbackNr = 0; +int xmlInputCallbackInitialized = 0; + +/* + * Output I/O callback sets + */ +typedef struct _xmlOutputCallback { + xmlOutputMatchCallback matchcallback; + xmlOutputOpenCallback opencallback; + xmlOutputWriteCallback writecallback; + xmlOutputCloseCallback closecallback; +} xmlOutputCallback; + +#define MAX_OUTPUT_CALLBACK 15 + +xmlOutputCallback xmlOutputCallbackTable[MAX_OUTPUT_CALLBACK]; +int xmlOutputCallbackNr = 0; +int xmlOutputCallbackInitialized = 0; + +/************************************************************************ + * * + * Standard I/O for file accesses * + * * + ************************************************************************/ + +/** + * xmlCheckFilename + * @path: the path to check + * + * function checks to see if @path is a valid source + * (file, socket...) for XML. + * + * if stat is not available on the target machine, + * returns 1. if stat fails, returns 0 (if calling + * stat on the filename fails, it can't be right). + * if stat succeeds and the file is a directory, + * sets errno to EISDIR and returns 0. otherwise + * returns 1. + */ + +static int +xmlCheckFilename (const char *path) +{ +#ifdef HAVE_STAT +#ifdef S_ISDIR + struct stat stat_buffer; + + if (stat(path, &stat_buffer) == -1) + return 0; + + if (S_ISDIR(stat_buffer.st_mode)) { + errno = EISDIR; + return 0; + } + +#endif +#endif + return 1; +} + +int +xmlNop(void) { + return(0); +} + +/** + * xmlFdMatch: + * @filename: the URI for matching + * + * input from file descriptor + * + * Returns 1 if matches, 0 otherwise + */ +int +xmlFdMatch (const char *filename) { + return(1); +} + +/** + * xmlFdOpen: + * @filename: the URI for matching + * + * input from file descriptor, supports compressed input + * if @filename is " " then the standard input is used + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlFdOpen (const char *filename) { + const char *path = NULL; + int fd; + + if (!strcmp(filename, "-")) { + fd = 0; + return((void *) fd); + } + + if (!strncmp(filename, "file://localhost", 16)) + path = &filename[16]; + else if (!strncmp(filename, "file:///", 8)) + path = &filename[8]; + else if (filename[0] == '/') + path = filename; + if (path == NULL) + return(NULL); + +#ifdef WIN32 + fd = _open (path, O_RDONLY | _O_BINARY); +#else + fd = open (path, O_RDONLY); +#endif + + return((void *) fd); +} + +/** + * xmlFdOpenW: + * @filename: the URI for matching + * + * input from file descriptor, + * if @filename is "-" then the standard output is used + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlFdOpenW (const char *filename) { + const char *path = NULL; + int fd; + + if (!strcmp(filename, "-")) { + fd = 1; + return((void *) fd); + } + + if (!strncmp(filename, "file://localhost", 16)) + path = &filename[16]; + else if (!strncmp(filename, "file:///", 8)) + path = &filename[8]; + else if (filename[0] == '/') + path = filename; + if (path == NULL) + return(NULL); + + fd = open (path, O_WRONLY); + + return((void *) fd); +} + +/** + * xmlFdRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to read + * + * Read @len bytes to @buffer from the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlFdRead (void * context, char * buffer, int len) { + return(read((int) context, &buffer[0], len)); +} + +/** + * xmlFdWrite: + * @context: the I/O context + * @buffer: where to get data + * @len: number of bytes to write + * + * Write @len bytes from @buffer to the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlFdWrite (void * context, const char * buffer, int len) { + return(write((int) context, &buffer[0], len)); +} + +/** + * xmlFdClose: + * @context: the I/O context + * + * Close an I/O channel + */ +void +xmlFdClose (void * context) { + close((int) context); +} + +/** + * xmlFileMatch: + * @filename: the URI for matching + * + * input from FILE * + * + * Returns 1 if matches, 0 otherwise + */ +int +xmlFileMatch (const char *filename) { + return(1); +} + +/** + * xmlFileOpen: + * @filename: the URI for matching + * + * input from FILE *, supports compressed input + * if @filename is " " then the standard input is used + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlFileOpen (const char *filename) { + const char *path = NULL; + FILE *fd; + + if (!strcmp(filename, "-")) { + fd = stdin; + return((void *) fd); + } + + if (!strncmp(filename, "file://localhost", 16)) + path = &filename[16]; + else if (!strncmp(filename, "file:///", 8)) + path = &filename[8]; + else + path = filename; + + if (path == NULL) + return(NULL); + if (!xmlCheckFilename(path)) + return(NULL); + +#ifdef WIN32 + fd = fopen(path, "rb"); +#else + fd = fopen(path, "r"); +#endif /* WIN32 */ + return((void *) fd); +} + +/** + * xmlFileOpenW: + * @filename: the URI for matching + * + * output to from FILE *, + * if @filename is "-" then the standard output is used + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlFileOpenW (const char *filename) { + const char *path = NULL; + FILE *fd; + + if (!strcmp(filename, "-")) { + fd = stdout; + return((void *) fd); + } + + if (!strncmp(filename, "file://localhost", 16)) + path = &filename[16]; + else if (!strncmp(filename, "file:///", 8)) + path = &filename[8]; + else + path = filename; + + if (path == NULL) + return(NULL); + + fd = fopen(path, "w"); + return((void *) fd); +} + +/** + * xmlFileRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Read @len bytes to @buffer from the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlFileRead (void * context, char * buffer, int len) { + return(fread(&buffer[0], 1, len, (FILE *) context)); +} + +/** + * xmlFileWrite: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Write @len bytes from @buffer to the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlFileWrite (void * context, const char * buffer, int len) { + return(fwrite(&buffer[0], 1, len, (FILE *) context)); +} + +/** + * xmlFileClose: + * @context: the I/O context + * + * Close an I/O channel + */ +void +xmlFileClose (void * context) { + fclose((FILE *) context); +} + +/** + * xmlFileFlush: + * @context: the I/O context + * + * Flush an I/O channel + */ +void +xmlFileFlush (void * context) { + fflush((FILE *) context); +} + +#ifdef HAVE_ZLIB_H +/************************************************************************ + * * + * I/O for compressed file accesses * + * * + ************************************************************************/ +/** + * xmlGzfileMatch: + * @filename: the URI for matching + * + * input from compressed file test + * + * Returns 1 if matches, 0 otherwise + */ +int +xmlGzfileMatch (const char *filename) { + return(1); +} + +/** + * xmlGzfileOpen: + * @filename: the URI for matching + * + * input from compressed file open + * if @filename is " " then the standard input is used + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlGzfileOpen (const char *filename) { + const char *path = NULL; + gzFile fd; + + if (!strcmp(filename, "-")) { + fd = gzdopen(fileno(stdin), "rb"); + return((void *) fd); + } + + if (!strncmp(filename, "file://localhost", 16)) + path = &filename[16]; + else if (!strncmp(filename, "file:///", 8)) + path = &filename[7]; + else + path = filename; + + if (path == NULL) + return(NULL); + if (!xmlCheckFilename(path)) + return(NULL); + + fd = gzopen(path, "rb"); + return((void *) fd); +} + +/** + * xmlGzfileOpenW: + * @filename: the URI for matching + * @compression: the compression factor (0 - 9 included) + * + * input from compressed file open + * if @filename is " " then the standard input is used + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlGzfileOpenW (const char *filename, int compression) { + const char *path = NULL; + char mode[15]; + gzFile fd; + + sprintf(mode, "wb%d", compression); + if (!strcmp(filename, "-")) { + fd = gzdopen(1, mode); + return((void *) fd); + } + + if (!strncmp(filename, "file://localhost", 16)) + path = &filename[16]; + else if (!strncmp(filename, "file:///", 8)) + path = &filename[8]; + else + path = filename; + + if (path == NULL) + return(NULL); + + fd = gzopen(path, mode); + return((void *) fd); +} + +/** + * xmlGzfileRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Read @len bytes to @buffer from the compressed I/O channel. + * + * Returns the number of bytes written + */ +int +xmlGzfileRead (void * context, char * buffer, int len) { + return(gzread((gzFile) context, &buffer[0], len)); +} + +/** + * xmlGzfileWrite: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Write @len bytes from @buffer to the compressed I/O channel. + * + * Returns the number of bytes written + */ +int +xmlGzfileWrite (void * context, const char * buffer, int len) { + return(gzwrite((gzFile) context, (char *) &buffer[0], len)); +} + +/** + * xmlGzfileClose: + * @context: the I/O context + * + * Close a compressed I/O channel + */ +void +xmlGzfileClose (void * context) { + gzclose((gzFile) context); +} +#endif /* HAVE_ZLIB_H */ + +#ifdef LIBXML_HTTP_ENABLED +/************************************************************************ + * * + * I/O for HTTP file accesses * + * * + ************************************************************************/ +/** + * xmlIOHTTPMatch: + * @filename: the URI for matching + * + * check if the URI matches an HTTP one + * + * Returns 1 if matches, 0 otherwise + */ +int +xmlIOHTTPMatch (const char *filename) { + if (!strncmp(filename, "http://", 7)) + return(1); + return(0); +} + +/** + * xmlIOHTTPOpen: + * @filename: the URI for matching + * + * open an HTTP I/O channel + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlIOHTTPOpen (const char *filename) { + return(xmlNanoHTTPOpen(filename, NULL)); +} + +/** + * xmlIOHTTPRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Read @len bytes to @buffer from the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlIOHTTPRead(void * context, char * buffer, int len) { + return(xmlNanoHTTPRead(context, &buffer[0], len)); +} + +/** + * xmlIOHTTPClose: + * @context: the I/O context + * + * Close an HTTP I/O channel + */ +void +xmlIOHTTPClose (void * context) { + xmlNanoHTTPClose(context); +} +#endif /* LIBXML_HTTP_ENABLED */ + +#ifdef LIBXML_FTP_ENABLED +/************************************************************************ + * * + * I/O for FTP file accesses * + * * + ************************************************************************/ +/** + * xmlIOFTPMatch: + * @filename: the URI for matching + * + * check if the URI matches an FTP one + * + * Returns 1 if matches, 0 otherwise + */ +int +xmlIOFTPMatch (const char *filename) { + if (!strncmp(filename, "ftp://", 6)) + return(1); + return(0); +} + +/** + * xmlIOFTPOpen: + * @filename: the URI for matching + * + * open an FTP I/O channel + * + * Returns an I/O context or NULL in case of error + */ +void * +xmlIOFTPOpen (const char *filename) { + return(xmlNanoFTPOpen(filename)); +} + +/** + * xmlIOFTPRead: + * @context: the I/O context + * @buffer: where to drop data + * @len: number of bytes to write + * + * Read @len bytes to @buffer from the I/O channel. + * + * Returns the number of bytes written + */ +int +xmlIOFTPRead(void * context, char * buffer, int len) { + return(xmlNanoFTPRead(context, &buffer[0], len)); +} + +/** + * xmlIOFTPClose: + * @context: the I/O context + * + * Close an FTP I/O channel + */ +void +xmlIOFTPClose (void * context) { + xmlNanoFTPClose(context); +} +#endif /* LIBXML_FTP_ENABLED */ + + +/** + * xmlRegisterInputCallbacks: + * @match: the xmlInputMatchCallback + * @open: the xmlInputOpenCallback + * @read: the xmlInputReadCallback + * @close: the xmlInputCloseCallback + * + * Register a new set of I/O callback for handling parser input. + * + * Returns the registered handler number or -1 in case of error + */ +int +xmlRegisterInputCallbacks(xmlInputMatchCallback match, + xmlInputOpenCallback open, xmlInputReadCallback read, + xmlInputCloseCallback close) { + if (xmlInputCallbackNr >= MAX_INPUT_CALLBACK) { + return(-1); + } + xmlInputCallbackTable[xmlInputCallbackNr].matchcallback = match; + xmlInputCallbackTable[xmlInputCallbackNr].opencallback = open; + xmlInputCallbackTable[xmlInputCallbackNr].readcallback = read; + xmlInputCallbackTable[xmlInputCallbackNr].closecallback = close; + return(xmlInputCallbackNr++); +} + +/** + * xmlRegisterOutputCallbacks: + * @match: the xmlOutputMatchCallback + * @open: the xmlOutputOpenCallback + * @write: the xmlOutputWriteCallback + * @close: the xmlOutputCloseCallback + * + * Register a new set of I/O callback for handling output. + * + * Returns the registered handler number or -1 in case of error + */ +int +xmlRegisterOutputCallbacks(xmlOutputMatchCallback match, + xmlOutputOpenCallback open, xmlOutputWriteCallback write, + xmlOutputCloseCallback close) { + if (xmlOutputCallbackNr >= MAX_INPUT_CALLBACK) { + return(-1); + } + xmlOutputCallbackTable[xmlOutputCallbackNr].matchcallback = match; + xmlOutputCallbackTable[xmlOutputCallbackNr].opencallback = open; + xmlOutputCallbackTable[xmlOutputCallbackNr].writecallback = write; + xmlOutputCallbackTable[xmlOutputCallbackNr].closecallback = close; + return(xmlOutputCallbackNr++); +} + +/** + * xmlRegisterDefaultInputCallbacks: + * + * Registers the default compiled-in I/O handlers. + */ +void +#ifdef VMS +xmlRegisterDefInputCallbacks +#else +xmlRegisterDefaultInputCallbacks +#endif +(void) { + if (xmlInputCallbackInitialized) + return; + + xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen, + xmlFileRead, xmlFileClose); +#ifdef HAVE_ZLIB_H + xmlRegisterInputCallbacks(xmlGzfileMatch, xmlGzfileOpen, + xmlGzfileRead, xmlGzfileClose); +#endif /* HAVE_ZLIB_H */ + +#ifdef LIBXML_HTTP_ENABLED + xmlRegisterInputCallbacks(xmlIOHTTPMatch, xmlIOHTTPOpen, + xmlIOHTTPRead, xmlIOHTTPClose); +#endif /* LIBXML_HTTP_ENABLED */ + +#ifdef LIBXML_FTP_ENABLED + xmlRegisterInputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen, + xmlIOFTPRead, xmlIOFTPClose); +#endif /* LIBXML_FTP_ENABLED */ + xmlInputCallbackInitialized = 1; +} + +/** + * xmlRegisterDefaultOutputCallbacks: + * + * Registers the default compiled-in I/O handlers. + */ +void +#ifdef VMS +xmlRegisterDefOutputCallbacks +#else +xmlRegisterDefaultOutputCallbacks +#endif +(void) { + if (xmlOutputCallbackInitialized) + return; + + xmlRegisterOutputCallbacks(xmlFileMatch, xmlFileOpenW, + xmlFileWrite, xmlFileClose); +/********************************* + No way a-priori to distinguish between gzipped files from + uncompressed ones except opening if existing then closing + and saving with same compression ratio ... a pain. + +#ifdef HAVE_ZLIB_H + xmlRegisterOutputCallbacks(xmlGzfileMatch, xmlGzfileOpen, + xmlGzfileWrite, xmlGzfileClose); +#endif + No HTTP PUT support yet, patches welcome + +#ifdef LIBXML_HTTP_ENABLED + xmlRegisterOutputCallbacks(xmlIOHTTPMatch, xmlIOHTTPOpen, + xmlIOHTTPWrite, xmlIOHTTPClose); +#endif + + Nor FTP PUT .... +#ifdef LIBXML_FTP_ENABLED + xmlRegisterOutputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen, + xmlIOFTPWrite, xmlIOFTPClose); +#endif + **********************************/ + xmlOutputCallbackInitialized = 1; +} + +/** + * xmlAllocParserInputBuffer: + * @enc: the charset encoding if known + * + * Create a buffered parser input for progressive parsing + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlAllocParserInputBuffer(xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAllocParserInputBuffer : out of memory!\n"); + return(NULL); + } + memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer)); + ret->buffer = xmlBufferCreate(); + if (ret->buffer == NULL) { + xmlFree(ret); + return(NULL); + } + ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT; + ret->encoder = xmlGetCharEncodingHandler(enc); + if (ret->encoder != NULL) + ret->raw = xmlBufferCreate(); + else + ret->raw = NULL; + ret->readcallback = NULL; + ret->closecallback = NULL; + ret->context = NULL; + + return(ret); +} + +/** + * xmlAllocOutputBuffer: + * @encoder: the encoding converter or NULL + * + * Create a buffered parser output + * + * Returns the new parser output or NULL + */ +xmlOutputBufferPtr +xmlAllocOutputBuffer(xmlCharEncodingHandlerPtr encoder) { + xmlOutputBufferPtr ret; + + ret = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlAllocOutputBuffer : out of memory!\n"); + return(NULL); + } + memset(ret, 0, (size_t) sizeof(xmlOutputBuffer)); + ret->buffer = xmlBufferCreate(); + if (ret->buffer == NULL) { + xmlFree(ret); + return(NULL); + } + ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT; + ret->encoder = encoder; + if (encoder != NULL) { + ret->conv = xmlBufferCreateSize(4000); + /* + * This call is designed to initiate the encoder state + */ + xmlCharEncOutFunc(encoder, ret->conv, NULL); + } else + ret->conv = NULL; + ret->writecallback = NULL; + ret->closecallback = NULL; + ret->context = NULL; + ret->written = 0; + + return(ret); +} + +/** + * xmlFreeParserInputBuffer: + * @in: a buffered parser input + * + * Free up the memory used by a buffered parser input + */ +void +xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) { + if (in->raw) { + xmlBufferFree(in->raw); + in->raw = NULL; + } + if (in->encoder != NULL) { + xmlCharEncCloseFunc(in->encoder); + } + if (in->closecallback != NULL) { + in->closecallback(in->context); + } + if (in->buffer != NULL) { + xmlBufferFree(in->buffer); + in->buffer = NULL; + } + + memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer)); + xmlFree(in); +} + +/** + * xmlOutputBufferClose: + * @out: a buffered output + * + * flushes and close the output I/O channel + * and free up all the associated resources + * + * Returns the number of byte written or -1 in case of error. + */ +int +xmlOutputBufferClose(xmlOutputBufferPtr out) { + int written; + + if (out == NULL) + return(-1); + if (out->writecallback != NULL) + xmlOutputBufferFlush(out); + if (out->closecallback != NULL) { + out->closecallback(out->context); + } + written = out->written; + if (out->conv) { + xmlBufferFree(out->conv); + out->conv = NULL; + } + if (out->encoder != NULL) { + xmlCharEncCloseFunc(out->encoder); + } + if (out->buffer != NULL) { + xmlBufferFree(out->buffer); + out->buffer = NULL; + } + + memset(out, 0xbe, (size_t) sizeof(xmlOutputBuffer)); + xmlFree(out); + return(written); +} + +/** + * xmlParserInputBufferCreateFilename: + * @URI: a C string containing the URI or filename + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing of a file + * If filename is "-' then we use stdin as the input. + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * Do an encoding check if enc == XML_CHAR_ENCODING_NONE + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +#ifdef VMS +xmlParserInputBufferCreateFname +#else +xmlParserInputBufferCreateFilename +#endif +(const char *URI, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + int i; + void *context = NULL; + + if (xmlInputCallbackInitialized == 0) + xmlRegisterDefaultInputCallbacks(); + + if (URI == NULL) return(NULL); + + /* + * Try to find one of the input accept method accepting taht scheme + * Go in reverse to give precedence to user defined handlers. + */ + for (i = xmlInputCallbackNr - 1;i >= 0;i--) { + if ((xmlInputCallbackTable[i].matchcallback != NULL) && + (xmlInputCallbackTable[i].matchcallback(URI) != 0)) { + context = xmlInputCallbackTable[i].opencallback(URI); + if (context != NULL) + break; + } + } + if (context == NULL) { + return(NULL); + } + + /* + * Allocate the Input buffer front-end. + */ + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = context; + ret->readcallback = xmlInputCallbackTable[i].readcallback; + ret->closecallback = xmlInputCallbackTable[i].closecallback; + } + return(ret); +} + +/** + * xmlOutputBufferCreateFilename: + * @URI: a C string containing the URI or filename + * @encoder: the encoding converter or NULL + * @compression: the compression ration (0 none, 9 max). + * + * Create a buffered output for the progressive saving of a file + * If filename is "-' then we use stdout as the output. + * Automatic support for ZLIB/Compress compressed document is provided + * by default if found at compile-time. + * TODO: currently if compression is set, the library only support + * writing to a local file. + * + * Returns the new output or NULL + */ +xmlOutputBufferPtr +xmlOutputBufferCreateFilename(const char *URI, + xmlCharEncodingHandlerPtr encoder, + int compression) { + xmlOutputBufferPtr ret; + int i; + void *context = NULL; + + if (xmlOutputCallbackInitialized == 0) + xmlRegisterDefaultOutputCallbacks(); + + if (URI == NULL) return(NULL); + +#ifdef HAVE_ZLIB_H + if ((compression > 0) && (compression <= 9)) { + context = xmlGzfileOpenW(URI, compression); + if (context != NULL) { + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = context; + ret->writecallback = xmlGzfileWrite; + ret->closecallback = xmlGzfileClose; + } + return(ret); + } + } +#endif + + /* + * Try to find one of the output accept method accepting taht scheme + * Go in reverse to give precedence to user defined handlers. + */ + for (i = xmlOutputCallbackNr - 1;i >= 0;i--) { + if ((xmlOutputCallbackTable[i].matchcallback != NULL) && + (xmlOutputCallbackTable[i].matchcallback(URI) != 0)) { + context = xmlOutputCallbackTable[i].opencallback(URI); + if (context != NULL) + break; + } + } + if (context == NULL) { + return(NULL); + } + + /* + * Allocate the Output buffer front-end. + */ + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = context; + ret->writecallback = xmlOutputCallbackTable[i].writecallback; + ret->closecallback = xmlOutputCallbackTable[i].closecallback; + } + return(ret); +} + +/** + * xmlParserInputBufferCreateFile: + * @file: a FILE* + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing of a FILE * + * buffered C I/O + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (xmlInputCallbackInitialized == 0) + xmlRegisterDefaultInputCallbacks(); + + if (file == NULL) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = file; + ret->readcallback = xmlFileRead; + ret->closecallback = xmlFileFlush; + } + + return(ret); +} + +/** + * xmlOutputBufferCreateFile: + * @file: a FILE* + * @encoder: the encoding converter or NULL + * + * Create a buffered output for the progressive saving to a FILE * + * buffered C I/O + * + * Returns the new parser output or NULL + */ +xmlOutputBufferPtr +xmlOutputBufferCreateFile(FILE *file, xmlCharEncodingHandlerPtr encoder) { + xmlOutputBufferPtr ret; + + if (xmlOutputCallbackInitialized == 0) + xmlRegisterDefaultOutputCallbacks(); + + if (file == NULL) return(NULL); + + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = file; + ret->writecallback = xmlFileWrite; + ret->closecallback = xmlFileFlush; + } + + return(ret); +} + +/** + * xmlParserInputBufferCreateFd: + * @fd: a file descriptor number + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing for the input + * from a file descriptor + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (fd < 0) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = (void *) fd; + ret->readcallback = xmlFdRead; + ret->closecallback = xmlFdClose; + } + + return(ret); +} + +/** + * xmlParserInputBufferCreateMem: + * @mem: the memory input + * @size: the length of the memory block + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing for the input + * from a memory area. + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateMem(const char *mem, int size, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (size <= 0) return(NULL); + if (mem == NULL) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = (void *) mem; + ret->readcallback = (xmlInputReadCallback) xmlNop; + ret->closecallback = NULL; + xmlBufferAdd(ret->buffer, (const xmlChar *) mem, size); + } + + return(ret); +} + +/** + * xmlOutputBufferCreateFd: + * @fd: a file descriptor number + * @encoder: the encoding converter or NULL + * + * Create a buffered output for the progressive saving + * to a file descriptor + * + * Returns the new parser output or NULL + */ +xmlOutputBufferPtr +xmlOutputBufferCreateFd(int fd, xmlCharEncodingHandlerPtr encoder) { + xmlOutputBufferPtr ret; + + if (fd < 0) return(NULL); + + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = (void *) fd; + ret->writecallback = xmlFdWrite; + ret->closecallback = xmlFdClose; + } + + return(ret); +} + +/** + * xmlParserInputBufferCreateIO: + * @ioread: an I/O read function + * @ioclose: an I/O close function + * @ioctx: an I/O handler + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing for the input + * from an I/O handler + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateIO(xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, void *ioctx, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (ioread == NULL) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = (void *) ioctx; + ret->readcallback = ioread; + ret->closecallback = ioclose; + } + + return(ret); +} + +/** + * xmlOutputBufferCreateIO: + * @iowrite: an I/O write function + * @ioclose: an I/O close function + * @ioctx: an I/O handler + * @enc: the charset encoding if known + * + * Create a buffered output for the progressive saving + * to an I/O handler + * + * Returns the new parser output or NULL + */ +xmlOutputBufferPtr +xmlOutputBufferCreateIO(xmlOutputWriteCallback iowrite, + xmlOutputCloseCallback ioclose, void *ioctx, + xmlCharEncodingHandlerPtr encoder) { + xmlOutputBufferPtr ret; + + if (iowrite == NULL) return(NULL); + + ret = xmlAllocOutputBuffer(encoder); + if (ret != NULL) { + ret->context = (void *) ioctx; + ret->writecallback = iowrite; + ret->closecallback = ioclose; + } + + return(ret); +} + +/** + * xmlParserInputBufferPush: + * @in: a buffered parser input + * @len: the size in bytes of the array. + * @buf: an char array + * + * Push the content of the arry in the input buffer + * This routine handle the I18N transcoding to internal UTF-8 + * This is used when operating the parser in progressive (push) mode. + * + * Returns the number of chars read and stored in the buffer, or -1 + * in case of error. + */ +int +xmlParserInputBufferPush(xmlParserInputBufferPtr in, + int len, const char *buf) { + int nbchars = 0; + + if (len < 0) return(0); + if (in->encoder != NULL) { + /* + * Store the data in the incoming raw buffer + */ + if (in->raw == NULL) { + in->raw = xmlBufferCreate(); + } + xmlBufferAdd(in->raw, (const xmlChar *) buf, len); + + /* + * convert as much as possible to the parser reading buffer. + */ + nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferPush: encoder error\n"); + return(-1); + } + } else { + nbchars = len; + xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars); + } +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, + "I/O: pushed %d chars, buffer %d/%d\n", + nbchars, in->buffer->use, in->buffer->size); +#endif + return(nbchars); +} + +/** + * xmlParserInputBufferGrow: + * @in: a buffered parser input + * @len: indicative value of the amount of chars to read + * + * Grow up the content of the input buffer, the old data are preserved + * This routine handle the I18N transcoding to internal UTF-8 + * This routine is used when operating the parser in normal (pull) mode + * + * TODO: one should be able to remove one extra copy by copying directy + * onto in->buffer or in->raw + * + * Returns the number of chars read and stored in the buffer, or -1 + * in case of error. + */ +int +xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) { + char *buffer = NULL; + int res = 0; + int nbchars = 0; + int buffree; + + if ((len <= MINLEN) && (len != 4)) + len = MINLEN; + buffree = in->buffer->size - in->buffer->use; + if (buffree <= 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferGrow : buffer full !\n"); + return(0); + } + if (len > buffree) + len = buffree; + + buffer = (char *) xmlMalloc((len + 1) * sizeof(char)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferGrow : out of memory !\n"); + return(-1); + } + + /* + * Call the read method for this I/O type. + */ + if (in->readcallback != NULL) { + res = in->readcallback(in->context, &buffer[0], len); + } else { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferGrow : no input !\n"); + xmlFree(buffer); + return(-1); + } + if (res < 0) { + perror ("read error"); + xmlFree(buffer); + return(-1); + } + len = res; + if (in->encoder != NULL) { + /* + * Store the data in the incoming raw buffer + */ + if (in->raw == NULL) { + in->raw = xmlBufferCreate(); + } + xmlBufferAdd(in->raw, (const xmlChar *) buffer, len); + + /* + * convert as much as possible to the parser reading buffer. + */ + nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlParserInputBufferGrow: encoder error\n"); + return(-1); + } + } else { + nbchars = len; + buffer[nbchars] = 0; + xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars); + } +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, + "I/O: read %d chars, buffer %d/%d\n", + nbchars, in->buffer->use, in->buffer->size); +#endif + xmlFree(buffer); + return(nbchars); +} + +/** + * xmlParserInputBufferRead: + * @in: a buffered parser input + * @len: indicative value of the amount of chars to read + * + * Refresh the content of the input buffer, the old data are considered + * consumed + * This routine handle the I18N transcoding to internal UTF-8 + * + * Returns the number of chars read and stored in the buffer, or -1 + * in case of error. + */ +int +xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) { + /* xmlBufferEmpty(in->buffer); */ + if (in->readcallback != NULL) + return(xmlParserInputBufferGrow(in, len)); + else + return(-1); +} + +/** + * xmlOutputBufferWrite: + * @out: a buffered parser output + * @len: the size in bytes of the array. + * @buf: an char array + * + * Write the content of the array in the output I/O buffer + * This routine handle the I18N transcoding from internal UTF-8 + * The buffer is lossless, i.e. will store in case of partial + * or delayed writes. + * + * Returns the number of chars immediately written, or -1 + * in case of error. + */ +int +xmlOutputBufferWrite(xmlOutputBufferPtr out, int len, const char *buf) { + int nbchars = 0; /* number of chars to output to I/O */ + int ret; /* return from function call */ + int written = 0; /* number of char written to I/O so far */ + int chunk; /* number of byte curreent processed from buf */ + + if (len < 0) return(0); + + do { + chunk = len; + if (chunk > 4 * MINLEN) + chunk = 4 * MINLEN; + + /* + * first handle encoding stuff. + */ + if (out->encoder != NULL) { + /* + * Store the data in the incoming raw buffer + */ + if (out->conv == NULL) { + out->conv = xmlBufferCreate(); + } + xmlBufferAdd(out->buffer, (const xmlChar *) buf, chunk); + + if ((out->buffer->use < MINLEN) && (chunk == len)) + goto done; + + /* + * convert as much as possible to the parser reading buffer. + */ + ret = xmlCharEncOutFunc(out->encoder, out->conv, out->buffer); + if (ret < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlOutputBufferWrite: encoder error\n"); + return(-1); + } + nbchars = out->conv->use; + } else { + xmlBufferAdd(out->buffer, (const xmlChar *) buf, chunk); + nbchars = out->buffer->use; + } + buf += chunk; + len -= chunk; + + if ((nbchars < MINLEN) && (len <= 0)) + goto done; + + if (out->writecallback) { + /* + * second write the stuff to the I/O channel + */ + if (out->encoder != NULL) { + ret = out->writecallback(out->context, + (const char *)out->conv->content, nbchars); + if (ret >= 0) + xmlBufferShrink(out->conv, nbchars); + } else { + ret = out->writecallback(out->context, + (const char *)out->buffer->content, nbchars); + if (ret >= 0) + xmlBufferShrink(out->buffer, nbchars); + } + if (ret < 0) { + xmlGenericError(xmlGenericErrorContext, + "I/O: error %d writing %d bytes\n", ret, nbchars); + return(ret); + } + out->written += ret; + } + written += nbchars; + } while (len > 0); + +done: +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, + "I/O: wrote %d chars\n", written); +#endif + return(written); +} + +/** + * xmlOutputBufferWriteString: + * @out: a buffered parser output + * @str: a zero terminated C string + * + * Write the content of the string in the output I/O buffer + * This routine handle the I18N transcoding from internal UTF-8 + * The buffer is lossless, i.e. will store in case of partial + * or delayed writes. + * + * Returns the number of chars immediately written, or -1 + * in case of error. + */ +int +xmlOutputBufferWriteString(xmlOutputBufferPtr out, const char *str) { + int len; + + if (str == NULL) + return(-1); + len = strlen(str); + + if (len > 0) + return(xmlOutputBufferWrite(out, len, str)); + return(len); +} + +/** + * xmlOutputBufferFlush: + * @out: a buffered output + * + * flushes the output I/O channel + * + * Returns the number of byte written or -1 in case of error. + */ +int +xmlOutputBufferFlush(xmlOutputBufferPtr out) { + int nbchars = 0, ret = 0; + + /* + * first handle encoding stuff. + */ + if ((out->conv != NULL) && (out->encoder != NULL)) { + /* + * convert as much as possible to the parser reading buffer. + */ + nbchars = xmlCharEncOutFunc(out->encoder, out->conv, out->buffer); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlOutputBufferWrite: encoder error\n"); + return(-1); + } + } + + /* + * second flush the stuff to the I/O channel + */ + if ((out->conv != NULL) && (out->encoder != NULL) && + (out->writecallback != NULL)) { + ret = out->writecallback(out->context, + (const char *)out->conv->content, out->conv->use); + if (ret >= 0) + xmlBufferShrink(out->conv, ret); + } else if (out->writecallback != NULL) { + ret = out->writecallback(out->context, + (const char *)out->buffer->content, out->buffer->use); + if (ret >= 0) + xmlBufferShrink(out->buffer, ret); + } + if (ret < 0) { + xmlGenericError(xmlGenericErrorContext, + "I/O: error %d flushing %d bytes\n", ret, nbchars); + return(ret); + } + out->written += ret; + +#ifdef DEBUG_INPUT + xmlGenericError(xmlGenericErrorContext, + "I/O: flushed %d chars\n", ret); +#endif + return(ret); +} + +/* + * xmlParserGetDirectory: + * @filename: the path to a file + * + * lookup the directory for that file + * + * Returns a new allocated string containing the directory, or NULL. + */ +char * +xmlParserGetDirectory(const char *filename) { + char *ret = NULL; + char dir[1024]; + char *cur; + char sep = '/'; + + if (xmlInputCallbackInitialized == 0) + xmlRegisterDefaultInputCallbacks(); + + if (filename == NULL) return(NULL); +#ifdef WIN32 + sep = '\\'; +#endif + + strncpy(dir, filename, 1023); + dir[1023] = 0; + cur = &dir[strlen(dir)]; + while (cur > dir) { + if (*cur == sep) break; + cur --; + } + if (*cur == sep) { + if (cur == dir) dir[1] = 0; + else *cur = 0; + ret = xmlMemStrdup(dir); + } else { + if (getcwd(dir, 1024) != NULL) { + dir[1023] = 0; + ret = xmlMemStrdup(dir); + } + } + return(ret); +} + +/**************************************************************** + * * + * External entities loading * + * * + ****************************************************************/ + +/* + * xmlDefaultExternalEntityLoader: + * @URL: the URL for the entity to load + * @ID: the System ID for the entity to load + * @ctxt: the context in which the entity is called or NULL + * + * By default we don't load external entitites, yet. + * + * Returns a new allocated xmlParserInputPtr, or NULL. + */ +static +xmlParserInputPtr +xmlDefaultExternalEntityLoader(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) { + xmlParserInputPtr ret = NULL; + +#ifdef DEBUG_EXTERNAL_ENTITIES + xmlGenericError(xmlGenericErrorContext, + "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL); +#endif + if (URL == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "failed to load external entity \"%s\"\n", ID); + return(NULL); + } + ret = xmlNewInputFromFile(ctxt, URL); + if (ret == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "failed to load external entity \"%s\"\n", URL); + } + return(ret); +} + +static xmlExternalEntityLoader xmlCurrentExternalEntityLoader = + xmlDefaultExternalEntityLoader; + +/* + * xmlSetExternalEntityLoader: + * @f: the new entity resolver function + * + * Changes the defaultexternal entity resolver function for the application + */ +void +xmlSetExternalEntityLoader(xmlExternalEntityLoader f) { + xmlCurrentExternalEntityLoader = f; +} + +/* + * xmlGetExternalEntityLoader: + * + * Get the default external entity resolver function for the application + * + * Returns the xmlExternalEntityLoader function pointer + */ +xmlExternalEntityLoader +xmlGetExternalEntityLoader(void) { + return(xmlCurrentExternalEntityLoader); +} + +/* + * xmlLoadExternalEntity: + * @URL: the URL for the entity to load + * @ID: the System ID for the entity to load + * @ctxt: the context in which the entity is called or NULL + * + * Load an external entity, note that the use of this function for + * unparsed entities may generate problems + * TODO: a more generic External entitiy API must be designed + * + * Returns the xmlParserInputPtr or NULL + */ +xmlParserInputPtr +xmlLoadExternalEntity(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) { + return(xmlCurrentExternalEntityLoader(URL, ID, ctxt)); +} + diff --git a/xmlIO.h b/xmlIO.h new file mode 100644 index 00000000..ecff73bc --- /dev/null +++ b/xmlIO.h @@ -0,0 +1,178 @@ +/* + * xmlIO.h : interface for the I/O interfaces used by the parser + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + * + * 15 Nov 2000 ht - modified for VMS + */ + +#ifndef __XML_IO_H__ +#define __XML_IO_H__ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Those are the functions and datatypes for the parser input + * I/O structures. + */ + +typedef int (*xmlInputMatchCallback) (char const *filename); +typedef void * (*xmlInputOpenCallback) (char const *filename); +typedef int (*xmlInputReadCallback) (void * context, char * buffer, int len); +typedef void (*xmlInputCloseCallback) (void * context); + +typedef struct _xmlParserInputBuffer xmlParserInputBuffer; +typedef xmlParserInputBuffer *xmlParserInputBufferPtr; +struct _xmlParserInputBuffer { + void* context; + xmlInputReadCallback readcallback; + xmlInputCloseCallback closecallback; + + xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */ + + xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */ + xmlBufferPtr raw; /* if encoder != NULL buffer for raw input */ +}; + + +/* + * Those are the functions and datatypes for the library output + * I/O structures. + */ + +typedef int (*xmlOutputMatchCallback) (char const *filename); +typedef void * (*xmlOutputOpenCallback) (char const *filename); +typedef int (*xmlOutputWriteCallback) (void * context, const char * buffer, + int len); +typedef void (*xmlOutputCloseCallback) (void * context); + +typedef struct _xmlOutputBuffer xmlOutputBuffer; +typedef xmlOutputBuffer *xmlOutputBufferPtr; +struct _xmlOutputBuffer { + void* context; + xmlOutputWriteCallback writecallback; + xmlOutputCloseCallback closecallback; + + xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */ + + xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 or ISOLatin */ + xmlBufferPtr conv; /* if encoder != NULL buffer for output */ + int written; /* total number of byte written */ +}; + +/* + * Interfaces for input + */ + +void xmlRegisterDefaultInputCallbacks (void); +xmlParserInputBufferPtr + xmlAllocParserInputBuffer (xmlCharEncoding enc); + +#ifdef VMS +xmlParserInputBufferPtr + xmlParserInputBufferCreateFname (const char *URI, + xmlCharEncoding enc); +#define xmlParserInputBufferCreateFilename xmlParserInputBufferCreateFname +#else +xmlParserInputBufferPtr + xmlParserInputBufferCreateFilename (const char *URI, + xmlCharEncoding enc); +#endif + +xmlParserInputBufferPtr + xmlParserInputBufferCreateFile (FILE *file, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateFd (int fd, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateMem (const char *mem, int size, + xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateIO (xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + xmlCharEncoding enc); +int xmlParserInputBufferRead (xmlParserInputBufferPtr in, + int len); +int xmlParserInputBufferGrow (xmlParserInputBufferPtr in, + int len); +int xmlParserInputBufferPush (xmlParserInputBufferPtr in, + int len, + const char *buf); +void xmlFreeParserInputBuffer (xmlParserInputBufferPtr in); +char * xmlParserGetDirectory (const char *filename); + +int xmlRegisterInputCallbacks (xmlInputMatchCallback match, + xmlInputOpenCallback open, + xmlInputReadCallback read, + xmlInputCloseCallback close); +/* + * Interfaces for output + */ +void xmlRegisterDefaultOutputCallbacks(void); +xmlOutputBufferPtr + xmlAllocOutputBuffer (xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateFilename (const char *URI, + xmlCharEncodingHandlerPtr encoder, + int compression); + +xmlOutputBufferPtr + xmlOutputBufferCreateFile (FILE *file, + xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateFd (int fd, + xmlCharEncodingHandlerPtr encoder); + +xmlOutputBufferPtr + xmlOutputBufferCreateIO (xmlOutputWriteCallback iowrite, + xmlOutputCloseCallback ioclose, + void *ioctx, + xmlCharEncodingHandlerPtr encoder); + +int xmlOutputBufferWrite (xmlOutputBufferPtr out, + int len, + const char *buf); +int xmlOutputBufferWriteString (xmlOutputBufferPtr out, + const char *str); + +int xmlOutputBufferFlush (xmlOutputBufferPtr out); +int xmlOutputBufferClose (xmlOutputBufferPtr out); + +int xmlRegisterOutputCallbacks (xmlOutputMatchCallback match, + xmlOutputOpenCallback open, + xmlOutputWriteCallback write, + xmlOutputCloseCallback close); + +/* + * This save function are part of tree.h and HTMLtree.h actually + */ +int xmlSaveFileTo (xmlOutputBuffer *buf, + xmlDocPtr cur, + const char *encoding); +void xmlNodeDumpOutput (xmlOutputBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format, + const char *encoding); +void htmlDocContentDumpOutput(xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding); +#ifdef __cplusplus +} +#endif + +#endif /* __XML_IO_H__ */ diff --git a/xmlerror.h b/xmlerror.h new file mode 100644 index 00000000..53c57518 --- /dev/null +++ b/xmlerror.h @@ -0,0 +1,180 @@ +#ifndef __XML_ERROR_H__ +#define __XML_ERROR_H__ + +#include +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_ERR_OK = 0, + XML_ERR_INTERNAL_ERROR, + XML_ERR_NO_MEMORY, + + XML_ERR_DOCUMENT_START, /* 3 */ + XML_ERR_DOCUMENT_EMPTY, + XML_ERR_DOCUMENT_END, + + XML_ERR_INVALID_HEX_CHARREF, /* 6 */ + XML_ERR_INVALID_DEC_CHARREF, + XML_ERR_INVALID_CHARREF, + XML_ERR_INVALID_CHAR, + + XML_ERR_CHARREF_AT_EOF, /* 10 */ + XML_ERR_CHARREF_IN_PROLOG, + XML_ERR_CHARREF_IN_EPILOG, + XML_ERR_CHARREF_IN_DTD, + XML_ERR_ENTITYREF_AT_EOF, + XML_ERR_ENTITYREF_IN_PROLOG, + XML_ERR_ENTITYREF_IN_EPILOG, + XML_ERR_ENTITYREF_IN_DTD, + XML_ERR_PEREF_AT_EOF, + XML_ERR_PEREF_IN_PROLOG, + XML_ERR_PEREF_IN_EPILOG, + XML_ERR_PEREF_IN_INT_SUBSET, + + XML_ERR_ENTITYREF_NO_NAME, /* 22 */ + XML_ERR_ENTITYREF_SEMICOL_MISSING, + + XML_ERR_PEREF_NO_NAME, /* 24 */ + XML_ERR_PEREF_SEMICOL_MISSING, + + XML_ERR_UNDECLARED_ENTITY, /* 26 */ + XML_WAR_UNDECLARED_ENTITY, + XML_ERR_UNPARSED_ENTITY, + XML_ERR_ENTITY_IS_EXTERNAL, + XML_ERR_ENTITY_IS_PARAMETER, + + XML_ERR_UNKNOWN_ENCODING, /* 31 */ + XML_ERR_UNSUPPORTED_ENCODING, + + XML_ERR_STRING_NOT_STARTED, /* 33 */ + XML_ERR_STRING_NOT_CLOSED, + XML_ERR_NS_DECL_ERROR, + + XML_ERR_ENTITY_NOT_STARTED, /* 36 */ + XML_ERR_ENTITY_NOT_FINISHED, + + XML_ERR_LT_IN_ATTRIBUTE, /* 38 */ + XML_ERR_ATTRIBUTE_NOT_STARTED, + XML_ERR_ATTRIBUTE_NOT_FINISHED, + XML_ERR_ATTRIBUTE_WITHOUT_VALUE, + XML_ERR_ATTRIBUTE_REDEFINED, + + XML_ERR_LITERAL_NOT_STARTED, /* 43 */ + XML_ERR_LITERAL_NOT_FINISHED, + + XML_ERR_COMMENT_NOT_FINISHED, /* 45 */ + + XML_ERR_PI_NOT_STARTED, /* 47 */ + XML_ERR_PI_NOT_FINISHED, + + XML_ERR_NOTATION_NOT_STARTED, /* 49 */ + XML_ERR_NOTATION_NOT_FINISHED, + + XML_ERR_ATTLIST_NOT_STARTED, /* 51 */ + XML_ERR_ATTLIST_NOT_FINISHED, + + XML_ERR_MIXED_NOT_STARTED, /* 53 */ + XML_ERR_MIXED_NOT_FINISHED, + + XML_ERR_ELEMCONTENT_NOT_STARTED, /* 55 */ + XML_ERR_ELEMCONTENT_NOT_FINISHED, + + XML_ERR_XMLDECL_NOT_STARTED, /* 57 */ + XML_ERR_XMLDECL_NOT_FINISHED, + + XML_ERR_CONDSEC_NOT_STARTED, /* 59 */ + XML_ERR_CONDSEC_NOT_FINISHED, + + XML_ERR_EXT_SUBSET_NOT_FINISHED, /* 61 */ + + XML_ERR_DOCTYPE_NOT_FINISHED, /* 62 */ + + XML_ERR_MISPLACED_CDATA_END, /* 63 */ + XML_ERR_CDATA_NOT_FINISHED, + + XML_ERR_RESERVED_XML_NAME, /* 65 */ + + XML_ERR_SPACE_REQUIRED, /* 66 */ + XML_ERR_SEPARATOR_REQUIRED, + XML_ERR_NMTOKEN_REQUIRED, + XML_ERR_NAME_REQUIRED, + XML_ERR_PCDATA_REQUIRED, + XML_ERR_URI_REQUIRED, + XML_ERR_PUBID_REQUIRED, + XML_ERR_LT_REQUIRED, + XML_ERR_GT_REQUIRED, + XML_ERR_LTSLASH_REQUIRED, + XML_ERR_EQUAL_REQUIRED, + + XML_ERR_TAG_NAME_MISMATCH, /* 77 */ + XML_ERR_TAG_NOT_FINISED, + + XML_ERR_STANDALONE_VALUE, /* 79 */ + + XML_ERR_ENCODING_NAME, /* 80 */ + + XML_ERR_HYPHEN_IN_COMMENT, /* 81 */ + + XML_ERR_INVALID_ENCODING, /* 82 */ + + XML_ERR_EXT_ENTITY_STANDALONE, /* 83 */ + + XML_ERR_CONDSEC_INVALID, /* 84 */ + + XML_ERR_VALUE_REQUIRED, /* 85 */ + + XML_ERR_NOT_WELL_BALANCED, /* 86 */ + XML_ERR_EXTRA_CONTENT, /* 87 */ + XML_ERR_ENTITY_CHAR_ERROR, /* 88 */ + XML_ERR_ENTITY_PE_INTERNAL, /* 88 */ + XML_ERR_ENTITY_LOOP, /* 89 */ + XML_ERR_ENTITY_BOUNDARY, /* 90 */ + XML_ERR_INVALID_URI, /* 91 */ + XML_ERR_URI_FRAGMENT /* 92 */ +}xmlParserErrors; + +/* + * Signature of the function to use when there is an error and + * no parsing or validity context available + */ +typedef void (*xmlGenericErrorFunc) (void *ctx, const char *msg, ...); + +/* + * Those are the default error function and associated context to use + * when when there is an error and no parsing or validity context available + */ + +LIBXML_DLL_IMPORT extern xmlGenericErrorFunc xmlGenericError; +LIBXML_DLL_IMPORT extern void *xmlGenericErrorContext; + +/* + * Use the following function to reset the two previous global variables. + */ +void xmlSetGenericErrorFunc (void *ctx, + xmlGenericErrorFunc handler); + +/* + * Default message routines used by SAX and Valid context for error + * and warning reporting + */ +void xmlParserError (void *ctx, + const char *msg, + ...); +void xmlParserWarning (void *ctx, + const char *msg, + ...); +void xmlParserValidityError (void *ctx, + const char *msg, + ...); +void xmlParserValidityWarning(void *ctx, + const char *msg, + ...); +void xmlParserPrintFileInfo (xmlParserInputPtr input); +void xmlParserPrintFileContext(xmlParserInputPtr input); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_ERROR_H__ */ diff --git a/xmlmemory.c b/xmlmemory.c new file mode 100644 index 00000000..4cf16031 --- /dev/null +++ b/xmlmemory.c @@ -0,0 +1,707 @@ +/* + * memory.c: libxml memory allocator wrapper. + * + * Daniel.Veillard@w3.org + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#include + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_TIME_H +#include +#endif +#ifdef HAVE_MALLOC_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_CTYPE_H +#include +#endif + + +#include +#include + +#ifdef xmlMalloc +#undef xmlMalloc +#endif +#ifdef xmlRealloc +#undef xmlRealloc +#endif +#ifdef xmlMemStrdup +#undef xmlMemStrdup +#endif + + +/* + * Each of the blocks allocated begin with a header containing informations + */ + +#define MEMTAG 0x5aa5 + +#define MALLOC_TYPE 1 +#define REALLOC_TYPE 2 +#define STRDUP_TYPE 3 + +typedef struct memnod { + unsigned int mh_tag; + unsigned int mh_type; + unsigned long mh_number; + size_t mh_size; +#ifdef MEM_LIST + struct memnod *mh_next; + struct memnod *mh_prev; +#endif + const char *mh_file; + unsigned int mh_line; +} MEMHDR; + + +#ifdef SUN4 +#define ALIGN_SIZE 16 +#else +#define ALIGN_SIZE sizeof(double) +#endif +#define HDR_SIZE sizeof(MEMHDR) +#define RESERVE_SIZE (((HDR_SIZE + (ALIGN_SIZE-1)) \ + / ALIGN_SIZE ) * ALIGN_SIZE) + + +#define CLIENT_2_HDR(a) ((MEMHDR *) (((char *) (a)) - RESERVE_SIZE)) +#define HDR_2_CLIENT(a) ((void *) (((char *) (a)) + RESERVE_SIZE)) + + +static unsigned long debugMemSize = 0; +static unsigned long debugMaxMemSize = 0; +static int block=0; +int xmlMemStopAtBlock = 0; +int xmlMemInitialized = 0; +#ifdef MEM_LIST +static MEMHDR *memlist = NULL; +#endif + +void debugmem_tag_error(void *addr); +#ifdef MEM_LIST +void debugmem_list_add(MEMHDR *); +void debugmem_list_delete(MEMHDR *); +#endif +#define Mem_Tag_Err(a) debugmem_tag_error(a); + +#ifndef TEST_POINT +#define TEST_POINT +#endif + +/** + * xmlMallocBreakpoint: + * + * Breakpoint to use in conjunction with xmlMemStopAtBlock. When the block + * number reaches the specified value this function is called. One need to add a breakpoint + * to it to get the context in which the given block is allocated. + */ + +void +xmlMallocBreakpoint(void) { + xmlGenericError(xmlGenericErrorContext, + "xmlMallocBreakpoint reached on block %d\n", xmlMemStopAtBlock); +} + +/** + * xmlMallocLoc: + * @size: an int specifying the size in byte to allocate. + * @file: the file name or NULL + * @line: the line number + * + * a malloc() equivalent, with logging of the allocation info. + * + * Returns a pointer to the allocated area or NULL in case of lack of memory. + */ + +void * +xmlMallocLoc(int size, const char * file, int line) +{ + MEMHDR *p; + + if (!xmlMemInitialized) xmlInitMemory(); +#ifdef DEBUG_MEMORY + xmlGenericError(xmlGenericErrorContext, + "Malloc(%d)\n",size); +#endif + + TEST_POINT + + p = (MEMHDR *) malloc(RESERVE_SIZE+size); + + if (!p) { + xmlGenericError(xmlGenericErrorContext, + "xmlMalloc : Out of free space\n"); + xmlMemoryDump(); + return(NULL); + } + p->mh_tag = MEMTAG; + p->mh_number = ++block; + p->mh_size = size; + p->mh_type = MALLOC_TYPE; + p->mh_file = file; + p->mh_line = line; + debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; +#ifdef MEM_LIST + debugmem_list_add(p); +#endif + +#ifdef DEBUG_MEMORY + xmlGenericError(xmlGenericErrorContext, + "Malloc(%d) Ok\n",size); +#endif + + if (xmlMemStopAtBlock == block) xmlMallocBreakpoint(); + + TEST_POINT + + return(HDR_2_CLIENT(p)); +} + +/** + * xmlMemMalloc: + * @size: an int specifying the size in byte to allocate. + * + * a malloc() equivalent, with logging of the allocation info. + * + * Returns a pointer to the allocated area or NULL in case of lack of memory. + */ + +void * +xmlMemMalloc(int size) +{ + return(xmlMallocLoc(size, "none", 0)); +} + +/** + * xmlReallocLoc: + * @ptr: the initial memory block pointer + * @size: an int specifying the size in byte to allocate. + * @file: the file name or NULL + * @line: the line number + * + * a realloc() equivalent, with logging of the allocation info. + * + * Returns a pointer to the allocated area or NULL in case of lack of memory. + */ + +void * +xmlReallocLoc(void *ptr,int size, const char * file, int line) +{ + MEMHDR *p; + unsigned long number; + + if (!xmlMemInitialized) xmlInitMemory(); + TEST_POINT + + p = CLIENT_2_HDR(ptr); + number = p->mh_number; + if (p->mh_tag != MEMTAG) { + Mem_Tag_Err(p); + goto error; + } + p->mh_tag = ~MEMTAG; + debugMemSize -= p->mh_size; +#ifdef MEM_LIST + debugmem_list_delete(p); +#endif + + p = (MEMHDR *) realloc(p,RESERVE_SIZE+size); + if (!p) { + goto error; + } + p->mh_tag = MEMTAG; + p->mh_number = number; + p->mh_type = REALLOC_TYPE; + p->mh_size = size; + p->mh_file = file; + p->mh_line = line; + debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; +#ifdef MEM_LIST + debugmem_list_add(p); +#endif + + TEST_POINT + + return(HDR_2_CLIENT(p)); + +error: + return(NULL); +} + +/** + * xmlMemRealloc: + * @ptr: the initial memory block pointer + * @size: an int specifying the size in byte to allocate. + * + * a realloc() equivalent, with logging of the allocation info. + * + * Returns a pointer to the allocated area or NULL in case of lack of memory. + */ + +void * +xmlMemRealloc(void *ptr,int size) { + return(xmlReallocLoc(ptr, size, "none", 0)); +} + +/** + * xmlMemFree: + * @ptr: the memory block pointer + * + * a free() equivalent, with error checking. + */ +void +xmlMemFree(void *ptr) +{ + MEMHDR *p; + + TEST_POINT + + p = CLIENT_2_HDR(ptr); + if (p->mh_tag != MEMTAG) { + Mem_Tag_Err(p); + goto error; + } + p->mh_tag = ~MEMTAG; + debugMemSize -= p->mh_size; + +#ifdef MEM_LIST + debugmem_list_delete(p); +#endif + free(p); + + TEST_POINT + + return; + +error: + xmlGenericError(xmlGenericErrorContext, + "xmlFree(%X) error\n", (unsigned int) ptr); + return; +} + +/** + * xmlMemStrdupLoc: + * @ptr: the initial string pointer + * @file: the file name or NULL + * @line: the line number + * + * a strdup() equivalent, with logging of the allocation info. + * + * Returns a pointer to the new string or NULL if allocation error occured. + */ + +char * +xmlMemStrdupLoc(const char *str, const char *file, int line) +{ + char *s; + size_t size = strlen(str) + 1; + MEMHDR *p; + + if (!xmlMemInitialized) xmlInitMemory(); + TEST_POINT + + p = (MEMHDR *) malloc(RESERVE_SIZE+size); + if (!p) { + goto error; + } + p->mh_tag = MEMTAG; + p->mh_number = ++block; + p->mh_size = size; + p->mh_type = STRDUP_TYPE; + p->mh_file = file; + p->mh_line = line; + debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; +#ifdef MEM_LIST + debugmem_list_add(p); +#endif + s = (char *) HDR_2_CLIENT(p); + + if (xmlMemStopAtBlock == block) xmlMallocBreakpoint(); + + if (s != NULL) + strcpy(s,str); + else + goto error; + + TEST_POINT + + return(s); + +error: + return(NULL); +} + +/** + * xmlMemoryStrdup: + * @ptr: the initial string pointer + * + * a strdup() equivalent, with logging of the allocation info. + * + * Returns a pointer to the new string or NULL if allocation error occured. + */ + +char * +xmlMemoryStrdup(const char *str) { + return(xmlMemStrdupLoc(str, "none", 0)); +} + +/** + * xmlMemUsed: + * + * returns the amount of memory currenly allocated + * + * Returns an int representing the amount of memory allocated. + */ + +int +xmlMemUsed(void) { + return(debugMemSize); +} + +#ifdef MEM_LIST +/** + * xmlMemContentShow: + * @fp: a FILE descriptor used as the output file + * @p: a memory block header + * + * tries to show some content from the memory block + */ + +void +xmlMemContentShow(FILE *fp, MEMHDR *p) +{ + int i,j,len = p->mh_size; + const char *buf = (const char *) HDR_2_CLIENT(p); + + if (p == NULL) { + fprintf(fp, " NULL"); + return; + } + + for (i = 0;i < len;i++) { + if (buf[i] == 0) break; + if (!isprint(buf[i])) break; + } + if ((i < 4) && ((buf[i] != 0) || (i == 0))) { + if (len >= 4) { + MEMHDR *q; + void *cur; + + for (j = 0;j < len -3;j += 4) { + cur = *((void **) &buf[j]); + q = CLIENT_2_HDR(cur); + p = memlist; + while (p != NULL) { + if (p == q) break; + p = p->mh_next; + } + if ((p != NULL) && (p == q)) { + fprintf(fp, " pointer to #%lu at index %d", + p->mh_number, j); + return; + } + } + } + } else if ((i == 0) && (buf[i] == 0)) { + fprintf(fp," null"); + } else { + if (buf[i] == 0) fprintf(fp," \"%.25s\"", buf); + else { + fprintf(fp," ["); + for (j = 0;j < i;j++) + fprintf(fp,"%c", buf[j]); + fprintf(fp,"]"); + } + } +} +#endif + +/** + * xmlMemShow: + * @fp: a FILE descriptor used as the output file + * @nr: number of entries to dump + * + * show a show display of the memory allocated, and dump + * the @nr last allocated areas which were not freed + */ + +void +xmlMemShow(FILE *fp, int nr) +{ +#ifdef MEM_LIST + MEMHDR *p; +#endif + + if (fp != NULL) + fprintf(fp," MEMORY ALLOCATED : %lu, MAX was %lu\n", + debugMemSize, debugMaxMemSize); +#ifdef MEM_LIST + if (nr > 0) { + fprintf(fp,"NUMBER SIZE TYPE WHERE\n"); + p = memlist; + while ((p) && nr > 0) { + fprintf(fp,"%6lu %6u ",p->mh_number,p->mh_size); + switch (p->mh_type) { + case STRDUP_TYPE:fprintf(fp,"strdup() in ");break; + case MALLOC_TYPE:fprintf(fp,"malloc() in ");break; + case REALLOC_TYPE:fprintf(fp,"realloc() in ");break; + default:fprintf(fp," ??? in ");break; + } + if (p->mh_file != NULL) + fprintf(fp,"%s(%d)", p->mh_file, p->mh_line); + if (p->mh_tag != MEMTAG) + fprintf(fp," INVALID"); + xmlMemContentShow(fp, p); + fprintf(fp,"\n"); + nr--; + p = p->mh_next; + } + } +#endif /* MEM_LIST */ +} + +/** + * xmlMemDisplay: + * @fp: a FILE descriptor used as the output file, if NULL, the result is + * written to the file .memorylist + * + * show in-extenso the memory blocks allocated + */ + +void +xmlMemDisplay(FILE *fp) +{ +#ifdef MEM_LIST + MEMHDR *p; + int idx; +#if defined(HAVE_LOCALTIME) && defined(HAVE_STRFTIME) + time_t currentTime; + char buf[500]; + struct tm * tstruct; + + currentTime = time(NULL); + tstruct = localtime(¤tTime); + strftime(buf, sizeof(buf) - 1, "%c", tstruct); + fprintf(fp," %s\n\n", buf); +#endif + + + fprintf(fp," MEMORY ALLOCATED : %lu, MAX was %lu\n", + debugMemSize, debugMaxMemSize); + fprintf(fp,"BLOCK NUMBER SIZE TYPE\n"); + idx = 0; + p = memlist; + while (p) { + fprintf(fp,"%-5u %6lu %6u ",idx++,p->mh_number,p->mh_size); + switch (p->mh_type) { + case STRDUP_TYPE:fprintf(fp,"strdup() in ");break; + case MALLOC_TYPE:fprintf(fp,"malloc() in ");break; + case REALLOC_TYPE:fprintf(fp,"realloc() in ");break; + default:fprintf(fp," ??? in ");break; + } + if (p->mh_file != NULL) fprintf(fp,"%s(%d)", p->mh_file, p->mh_line); + if (p->mh_tag != MEMTAG) + fprintf(fp," INVALID"); + xmlMemContentShow(fp, p); + fprintf(fp,"\n"); + p = p->mh_next; + } +#else + fprintf(fp,"Memory list not compiled (MEM_LIST not defined !)\n"); +#endif +} + +#ifdef MEM_LIST + +void debugmem_list_add(MEMHDR *p) +{ + p->mh_next = memlist; + p->mh_prev = NULL; + if (memlist) memlist->mh_prev = p; + memlist = p; +#ifdef MEM_LIST_DEBUG + if (stderr) + Mem_Display(stderr); +#endif +} + +void debugmem_list_delete(MEMHDR *p) +{ + if (p->mh_next) + p->mh_next->mh_prev = p->mh_prev; + if (p->mh_prev) + p->mh_prev->mh_next = p->mh_next; + else memlist = p->mh_next; +#ifdef MEM_LIST_DEBUG + if (stderr) + Mem_Display(stderr); +#endif +} + +#endif + +/* + * debugmem_tag_error : internal error function. + */ + +void debugmem_tag_error(void *p) +{ + xmlGenericError(xmlGenericErrorContext, + "Memory tag error occurs :%p \n\t bye\n", p); +#ifdef MEM_LIST + if (stderr) + xmlMemDisplay(stderr); +#endif +} + +FILE *xmlMemoryDumpFile = NULL; + + +/** + * xmlMemoryDump: + * + * Dump in-extenso the memory blocks allocated to the file .memorylist + */ + +void +xmlMemoryDump(void) +{ +#if defined(DEBUG_MEMORY_LOCATION) | defined(DEBUG_MEMORY) + FILE *dump; + + dump = fopen(".memdump", "w"); + if (dump == NULL) xmlMemoryDumpFile = stdout; + else xmlMemoryDumpFile = dump; + + xmlMemDisplay(xmlMemoryDumpFile); + + if (dump != NULL) fclose(dump); +#endif +} + + +/**************************************************************** + * * + * Initialization Routines * + * * + ****************************************************************/ + +#if defined(DEBUG_MEMORY_LOCATION) | defined(DEBUG_MEMORY) +xmlFreeFunc xmlFree = (xmlFreeFunc) xmlMemFree; +xmlMallocFunc xmlMalloc = (xmlMallocFunc) xmlMemMalloc; +xmlReallocFunc xmlRealloc = (xmlReallocFunc) xmlMemRealloc; +xmlStrdupFunc xmlMemStrdup = (xmlStrdupFunc) xmlMemoryStrdup; +#else +xmlFreeFunc xmlFree = (xmlFreeFunc) free; +xmlMallocFunc xmlMalloc = (xmlMallocFunc) malloc; +xmlReallocFunc xmlRealloc = (xmlReallocFunc) realloc; +xmlStrdupFunc xmlMemStrdup = (xmlStrdupFunc) strdup; +#endif + +/** + * xmlInitMemory: + * + * Initialize the memory layer. + * + * Returns 0 on success + */ + +static int xmlInitMemoryDone = 0; + +int +xmlInitMemory(void) +{ + int ret; + +#ifdef HAVE_STDLIB_H + char *breakpoint; +#endif + + if (xmlInitMemoryDone) return(-1); + +#ifdef HAVE_STDLIB_H + breakpoint = getenv("XML_MEM_BREAKPOINT"); + if (breakpoint != NULL) { + sscanf(breakpoint, "%d", &xmlMemStopAtBlock); + } +#endif + +#ifdef DEBUG_MEMORY + xmlGenericError(xmlGenericErrorContext, + "xmlInitMemory() Ok\n"); +#endif + ret = 0; + return(ret); +} + +/** + * xmlMemSetup: + * @freeFunc: the free() function to use + * @mallocFunc: the malloc() function to use + * @reallocFunc: the realloc() function to use + * @strdupFunc: the strdup() function to use + * + * Override the default memory access functions with a new set + * This has to be called before any other libxml routines ! + * + * Should this be blocked if there was already some allocations + * done ? + * + * Returns 0 on success + */ +int +xmlMemSetup(xmlFreeFunc freeFunc, xmlMallocFunc mallocFunc, + xmlReallocFunc reallocFunc, xmlStrdupFunc strdupFunc) { + if (freeFunc == NULL) + return(-1); + if (mallocFunc == NULL) + return(-1); + if (reallocFunc == NULL) + return(-1); + if (strdupFunc == NULL) + return(-1); + xmlFree = freeFunc; + xmlMalloc = mallocFunc; + xmlRealloc = reallocFunc; + xmlMemStrdup = strdupFunc; + return(0); +} + +/** + * xmlMemGet: + * @freeFunc: the free() function in use + * @mallocFunc: the malloc() function in use + * @reallocFunc: the realloc() function in use + * @strdupFunc: the strdup() function in use + * + * Return the memory access functions set currently in use + * + * Returns 0 on success + */ +int +xmlMemGet(xmlFreeFunc *freeFunc, xmlMallocFunc *mallocFunc, + xmlReallocFunc *reallocFunc, xmlStrdupFunc *strdupFunc) { + if (freeFunc != NULL) *freeFunc = xmlFree; + if (mallocFunc != NULL) *mallocFunc = xmlMalloc; + if (reallocFunc != NULL) *reallocFunc = xmlRealloc; + if (strdupFunc != NULL) *strdupFunc = xmlMemStrdup; + return(0); +} + diff --git a/xmlmemory.h b/xmlmemory.h new file mode 100644 index 00000000..1e533d15 --- /dev/null +++ b/xmlmemory.h @@ -0,0 +1,91 @@ +/* + * xmlmemory.h: interface for the memory allocation debug. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef _DEBUG_MEMORY_ALLOC_ +#define _DEBUG_MEMORY_ALLOC_ + +#include +#include + +/* + * DEBUG_MEMORY_LOCATION should be activated only done when debugging + * libxml. + */ +/* #define DEBUG_MEMORY_LOCATION */ + +#ifdef DEBUG +#ifndef DEBUG_MEMORY +#define DEBUG_MEMORY +#endif +#endif + +#ifdef DEBUG_MEMORY_LOCATION +#define MEM_LIST /* keep a list of all the allocated memory blocks */ +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The XML memory wrapper support 4 basic overloadable functions + */ +typedef void (*xmlFreeFunc)(void *); +typedef void *(*xmlMallocFunc)(int); +typedef void *(*xmlReallocFunc)(void *, int); +typedef char *(*xmlStrdupFunc)(const char *); + +/* + * The 4 interfaces used for all memory handling within libxml + */ +LIBXML_DLL_IMPORT extern xmlFreeFunc xmlFree; +LIBXML_DLL_IMPORT extern xmlMallocFunc xmlMalloc; +LIBXML_DLL_IMPORT extern xmlReallocFunc xmlRealloc; +LIBXML_DLL_IMPORT extern xmlStrdupFunc xmlMemStrdup; + +/* + * The way to overload the existing functions + */ +int xmlMemSetup (xmlFreeFunc freeFunc, + xmlMallocFunc mallocFunc, + xmlReallocFunc reallocFunc, + xmlStrdupFunc strdupFunc); +int xmlMemGet (xmlFreeFunc *freeFunc, + xmlMallocFunc *mallocFunc, + xmlReallocFunc *reallocFunc, + xmlStrdupFunc *strdupFunc); + +/* + * Initialization of the memory layer + */ +int xmlInitMemory (void); + +/* + * Those are specific to the XML debug memory wrapper + */ +int xmlMemUsed (void); +void xmlMemDisplay (FILE *fp); +void xmlMemShow (FILE *fp, int nr); +void xmlMemoryDump (void); +int xmlInitMemory (void); + +#ifdef DEBUG_MEMORY_LOCATION +#define xmlMalloc(x) xmlMallocLoc((x), __FILE__, __LINE__) +#define xmlRealloc(p, x) xmlReallocLoc((p), (x), __FILE__, __LINE__) +#define xmlMemStrdup(x) xmlMemStrdupLoc((x), __FILE__, __LINE__) + +void * xmlMallocLoc(int size, const char *file, int line); +void * xmlReallocLoc(void *ptr,int size, const char *file, int line); +char * xmlMemStrdupLoc(const char *str, const char *file, int line); +#endif /* DEBUG_MEMORY_LOCATION */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _DEBUG_MEMORY_ALLOC_ */ + diff --git a/xmlversion.h.in b/xmlversion.h.in new file mode 100644 index 00000000..71ab184c --- /dev/null +++ b/xmlversion.h.in @@ -0,0 +1,129 @@ +/* + * xmlversion.h : compile-time version informations for the XML parser. + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_VERSION_H__ +#define __XML_VERSION_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * use those to be sure nothing nasty will happen if + * your library and includes mismatch + */ +extern void xmlCheckVersion(int version); +#define LIBXML_DOTTED_VERSION "@VERSION@" +#define LIBXML_VERSION @LIBXML_VERSION_NUMBER@ +#define LIBXML_VERSION_STRING "@LIBXML_VERSION_NUMBER@" +#define LIBXML_TEST_VERSION xmlCheckVersion(@LIBXML_VERSION_NUMBER@); + +/* + * Whether the FTP support is configured in + */ +#if @WITH_FTP@ +#define LIBXML_FTP_ENABLED +#else +#define LIBXML_FTP_DISABLED +#endif + +/* + * Whether the HTTP support is configured in + */ +#if @WITH_HTTP@ +#define LIBXML_HTTP_ENABLED +#else +#define LIBXML_HTTP_DISABLED +#endif + +/* + * Whether the HTML support is configured in + */ +#if @WITH_HTML@ +#define LIBXML_HTML_ENABLED +#else +#define LIBXML_HTML_DISABLED +#endif + +/* + * Whether the Docbook support is configured in +#if @WITH_SGML@ +#define LIBXML_SGML_ENABLED +#else +#define LIBXML_SGML_DISABLED +#endif + */ + +/* + * Whether XPath is configured in + */ +#if @WITH_XPATH@ +#define LIBXML_XPATH_ENABLED +#else +#define LIBXML_XPATH_DISABLED +#endif + +/* + * Whether XPointer is configured in + */ +#if @WITH_XPTR@ +#define LIBXML_XPTR_ENABLED +#else +#define LIBXML_XPTR_DISABLED +#endif + +/* + * Whether XInclude is configured in + */ +#if @WITH_XINCLUDE@ +#define LIBXML_XINCLUDE_ENABLED +#else +#define LIBXML_XINCLUDE_DISABLED +#endif + +/* + * Whether iconv support is available + */ +#ifndef WIN32 +#if @WITH_ICONV@ +#define LIBXML_ICONV_ENABLED +#else +#define LIBXML_ICONV_DISABLED +#endif +#endif + +/* + * Whether Debugging module is configured in + */ +#if @WITH_DEBUG@ +#define LIBXML_DEBUG_ENABLED +#else +#define LIBXML_DEBUG_DISABLED +#endif + +/* + * Whether the memory debugging is configured in + */ +#if @WITH_MEM_DEBUG@ +#define DEBUG_MEMORY_LOCATION +#endif + +#ifndef LIBXML_DLL_IMPORT +#if defined(WIN32) && !defined(STATIC) +#define LIBXML_DLL_IMPORT __declspec(dllimport) +#else +#define LIBXML_DLL_IMPORT +#endif +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif + + diff --git a/xpath.c b/xpath.c new file mode 100644 index 00000000..669e4cd3 --- /dev/null +++ b/xpath.c @@ -0,0 +1,6498 @@ +/* + * xpath.c: XML Path Language implementation + * XPath is a language for addressing parts of an XML document, + * designed to be used by both XSLT and XPointer + * + * Reference: W3C Recommendation 16 November 1999 + * http://www.w3.org/TR/1999/REC-xpath-19991116 + * Public reference: + * http://www.w3.org/TR/xpath + * + * See COPYRIGHT for the status of this software + * + * Author: Daniel.Veillard@w3.org + * + * 14 Nov 2000 ht - truncated declaration of xmlXPathEvalRelativeLocationPath + * for VMS + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +#include +#ifdef LIBXML_XPATH_ENABLED + +#include +#include + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_MATH_H +#include +#endif +#ifdef HAVE_FLOAT_H +#include +#endif +#ifdef HAVE_IEEEFP_H +#include +#endif +#ifdef HAVE_NAN_H +#include +#endif +#ifdef HAVE_CTYPE_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#ifdef LIBXML_XPTR_ENABLED +#include +#endif +#ifdef LIBXML_DEBUG_ENABLED +#include +#endif +#include + +/* #define DEBUG */ +/* #define DEBUG_STEP */ +/* #define DEBUG_EXPR */ + +void xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs); +double xmlXPathStringEvalNumber(const xmlChar *str); + +/* + * Setup stuff for floating point + * The lack of portability of this section of the libc is annoying ! + */ +double xmlXPathNAN = 0; +double xmlXPathPINF = 1; +double xmlXPathNINF = -1; + +#ifndef isinf +#ifndef HAVE_ISINF + +#if HAVE_FPCLASS + +int isinf(double d) { + fpclass_t type = fpclass(d); + switch (type) { + case FP_NINF: + return(-1); + case FP_PINF: + return(1); + } + return(0); +} + +#elif defined(HAVE_FP_CLASS) || defined(HAVE_FP_CLASS_D) + +#if HAVE_FP_CLASS_H +#include +#endif + +int isinf(double d) { +#if HAVE_FP_CLASS + int fpclass = fp_class(d); +#else + int fpclass = fp_class_d(d); +#endif + if (fpclass == FP_POS_INF) + return(1); + if (fpclass == FP_NEG_INF) + return(-1); + return(0); +} + +#elif defined(HAVE_CLASS) + +int isinf(double d) { + int fpclass = class(d); + if (fpclass == FP_PLUS_INF) + return(1); + if (fpclass == FP_MINUS_INF) + return(-1); + return(0); +} +#elif defined(finite) || defined(HAVE_FINITE) +int isinf(double x) { return !finite(x) && x==x; } +#elif defined(HUGE_VAL) +int isinf(double x) +{ + if (x == HUGE_VAL) + return(1); + if (x == -HUGE_VAL) + return(-1); + return(0); +} +#endif + +#endif /* ! HAVE_ISINF */ +#endif /* ! defined(isinf) */ + +#ifndef isnan +#ifndef HAVE_ISNAN + +#ifdef HAVE_ISNAND +#define isnan(f) isnand(f) +#endif /* HAVE_iSNAND */ + +#endif /* ! HAVE_iSNAN */ +#endif /* ! defined(isnan) */ + +/** + * xmlXPathInit: + * + * Initialize the XPath environment + */ +void +xmlXPathInit(void) { + static int initialized = 0; + + if (initialized) return; + + xmlXPathNAN = 0; + xmlXPathNAN /= 0; + + xmlXPathPINF = 1; + xmlXPathPINF /= 0; + + xmlXPathNINF = -1; + xmlXPathNINF /= 0; + + initialized = 1; +} + +/************************************************************************ + * * + * Debugging related functions * + * * + ************************************************************************/ + +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#define STRANGE \ + xmlGenericError(xmlGenericErrorContext, \ + "Internal error at %s:%d\n", \ + __FILE__, __LINE__); + +#ifdef LIBXML_DEBUG_ENABLED +void xmlXPathDebugDumpNode(FILE *output, xmlNodePtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + if (cur == NULL) { + fprintf(output, shift); + fprintf(output, "Node is NULL !\n"); + return; + + } + + if ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + fprintf(output, shift); + fprintf(output, " /\n"); + } else if (cur->type == XML_ATTRIBUTE_NODE) + xmlDebugDumpAttr(output, (xmlAttrPtr)cur, depth); + else + xmlDebugDumpOneNode(output, cur, depth); +} + +void xmlXPathDebugDumpNodeSet(FILE *output, xmlNodeSetPtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + if (cur == NULL) { + fprintf(output, shift); + fprintf(output, "NodeSet is NULL !\n"); + return; + + } + + fprintf(output, "Set contains %d nodes:\n", cur->nodeNr); + for (i = 0;i < cur->nodeNr;i++) { + fprintf(output, shift); + fprintf(output, "%d", i + 1); + xmlXPathDebugDumpNode(output, cur->nodeTab[i], depth + 1); + } +} + +#if defined(LIBXML_XPTR_ENABLED) +void xmlXPathDebugDumpObject(FILE *output, xmlXPathObjectPtr cur, int depth); +void xmlXPathDebugDumpLocationSet(FILE *output, xmlLocationSetPtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + if (cur == NULL) { + fprintf(output, shift); + fprintf(output, "LocationSet is NULL !\n"); + return; + + } + + for (i = 0;i < cur->locNr;i++) { + fprintf(output, shift); + fprintf(output, "%d : ", i + 1); + xmlXPathDebugDumpObject(output, cur->locTab[i], depth + 1); + } +} +#endif + +void xmlXPathDebugDumpObject(FILE *output, xmlXPathObjectPtr cur, int depth) { + int i; + char shift[100]; + + for (i = 0;((i < depth) && (i < 25));i++) + shift[2 * i] = shift[2 * i + 1] = ' '; + shift[2 * i] = shift[2 * i + 1] = 0; + + fprintf(output, shift); + + if (cur == NULL) { + fprintf(output, "Object is empty (NULL)\n"); + return; + } + switch(cur->type) { + case XPATH_UNDEFINED: + fprintf(output, "Object is uninitialized\n"); + break; + case XPATH_NODESET: + fprintf(output, "Object is a Node Set :\n"); + xmlXPathDebugDumpNodeSet(output, cur->nodesetval, depth); + break; + case XPATH_XSLT_TREE: + fprintf(output, "Object is an XSLT value tree :\n"); + xmlXPathDebugDumpNode(output, cur->user, depth); + break; + case XPATH_BOOLEAN: + fprintf(output, "Object is a Boolean : "); + if (cur->boolval) fprintf(output, "true\n"); + else fprintf(output, "false\n"); + break; + case XPATH_NUMBER: + fprintf(output, "Object is a number : %0g\n", cur->floatval); + break; + case XPATH_STRING: + fprintf(output, "Object is a string : "); + xmlDebugDumpString(output, cur->stringval); + fprintf(output, "\n"); + break; + case XPATH_POINT: + fprintf(output, "Object is a point : index %d in node", cur->index); + xmlXPathDebugDumpNode(output, (xmlNodePtr) cur->user, depth + 1); + fprintf(output, "\n"); + break; + case XPATH_RANGE: + if ((cur->user2 == NULL) || + ((cur->user2 == cur->user) && (cur->index == cur->index2))) { + fprintf(output, "Object is a collapsed range :\n"); + fprintf(output, shift); + if (cur->index >= 0) + fprintf(output, "index %d in ", cur->index); + fprintf(output, "node\n"); + xmlXPathDebugDumpNode(output, (xmlNodePtr) cur->user, + depth + 1); + } else { + fprintf(output, "Object is a range :\n"); + fprintf(output, shift); + fprintf(output, "From "); + if (cur->index >= 0) + fprintf(output, "index %d in ", cur->index); + fprintf(output, "node\n"); + xmlXPathDebugDumpNode(output, (xmlNodePtr) cur->user, + depth + 1); + fprintf(output, shift); + fprintf(output, "To "); + if (cur->index2 >= 0) + fprintf(output, "index %d in ", cur->index2); + fprintf(output, "node\n"); + xmlXPathDebugDumpNode(output, (xmlNodePtr) cur->user2, + depth + 1); + fprintf(output, "\n"); + } + break; + case XPATH_LOCATIONSET: +#if defined(LIBXML_XPTR_ENABLED) + fprintf(output, "Object is a Location Set:\n"); + xmlXPathDebugDumpLocationSet(output, + (xmlLocationSetPtr) cur->user, depth); +#endif + break; + case XPATH_USERS: + fprintf(output, "Object is user defined\n"); + break; + } +} +#endif + +/************************************************************************ + * * + * Parser stacks related functions and macros * + * * + ************************************************************************/ + +/* + * Generic function for accessing stacks in the Parser Context + */ + +#define PUSH_AND_POP(type, name) \ +extern int name##Push(xmlXPathParserContextPtr ctxt, type value) { \ + if (ctxt->name##Nr >= ctxt->name##Max) { \ + ctxt->name##Max *= 2; \ + ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ + ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ + if (ctxt->name##Tab == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "realloc failed !\n"); \ + return(0); \ + } \ + } \ + ctxt->name##Tab[ctxt->name##Nr] = value; \ + ctxt->name = value; \ + return(ctxt->name##Nr++); \ +} \ +extern type name##Pop(xmlXPathParserContextPtr ctxt) { \ + type ret; \ + if (ctxt->name##Nr <= 0) return(0); \ + ctxt->name##Nr--; \ + if (ctxt->name##Nr > 0) \ + ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ + else \ + ctxt->name = NULL; \ + ret = ctxt->name##Tab[ctxt->name##Nr]; \ + ctxt->name##Tab[ctxt->name##Nr] = 0; \ + return(ret); \ +} \ + +PUSH_AND_POP(xmlXPathObjectPtr, value) + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one need to make assumption on the context to use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * CUR returns the current xmlChar value, i.e. a 8 bit value + * in ISO-Latin or UTF-8. + * This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * CURRENT Returns the current char value, with the full decoding of + * UTF-8 if we are using this mode. It returns an int. + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * It returns the pointer to the current xmlChar. + */ + +#define CUR (*ctxt->cur) +#define SKIP(val) ctxt->cur += (val) +#define NXT(val) ctxt->cur[(val)] +#define CUR_PTR ctxt->cur + +#define SKIP_BLANKS \ + while (IS_BLANK(*(ctxt->cur))) NEXT + +#define CURRENT (*ctxt->cur) +#define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur) + +/************************************************************************ + * * + * Error handling routines * + * * + ************************************************************************/ + + +const char *xmlXPathErrorMessages[] = { + "Ok", + "Number encoding", + "Unfinished litteral", + "Start of litteral", + "Expected $ for variable reference", + "Undefined variable", + "Invalid predicate", + "Invalid expression", + "Missing closing curly brace", + "Unregistered function", + "Invalid operand", + "Invalid type", + "Invalid number of arguments", + "Invalid context size", + "Invalid context position", + "Memory allocation error", + "Syntax error", + "Resource error", + "Sub resource error", + "Undefined namespace prefix" +}; + +/** + * xmlXPathError: + * @ctxt: the XPath Parser context + * @file: the file name + * @line: the line number + * @no: the error number + * + * Create a new xmlNodeSetPtr of type double and of value @val + * + * Returns the newly created object. + */ +void +xmlXPatherror(xmlXPathParserContextPtr ctxt, const char *file, + int line, int no) { + int n; + const xmlChar *cur; + const xmlChar *base; + + xmlGenericError(xmlGenericErrorContext, + "Error %s:%d: %s\n", file, line, + xmlXPathErrorMessages[no]); + + cur = ctxt->cur; + base = ctxt->base; + while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) { + cur--; + } + n = 0; + while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r')) + cur--; + if ((*cur == '\n') || (*cur == '\r')) cur++; + base = cur; + n = 0; + while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) { + xmlGenericError(xmlGenericErrorContext, "%c", (unsigned char) *cur++); + n++; + } + xmlGenericError(xmlGenericErrorContext, "\n"); + cur = ctxt->cur; + while ((*cur == '\n') || (*cur == '\r')) + cur--; + n = 0; + while ((cur != base) && (n++ < 80)) { + xmlGenericError(xmlGenericErrorContext, " "); + base++; + } + xmlGenericError(xmlGenericErrorContext,"^\n"); +} + + +/************************************************************************ + * * + * Routines to handle NodeSets * + * * + ************************************************************************/ + +/** + * xmlXPathCmpNodes: + * @node1: the first node + * @node2: the second node + * + * Compare two nodes w.r.t document order + * + * Returns -2 in case of error 1 if first point < second point, 0 if + * that's the same node, -1 otherwise + */ +int +xmlXPathCmpNodes(xmlNodePtr node1, xmlNodePtr node2) { + int depth1, depth2; + xmlNodePtr cur, root; + + if ((node1 == NULL) || (node2 == NULL)) + return(-2); + /* + * a couple of optimizations which will avoid computations in most cases + */ + if (node1 == node2) + return(0); + if (node1 == node2->prev) + return(1); + if (node1 == node2->next) + return(-1); + + /* + * compute depth to root + */ + for (depth2 = 0, cur = node2;cur->parent != NULL;cur = cur->parent) { + if (cur == node1) + return(1); + depth2++; + } + root = cur; + for (depth1 = 0, cur = node1;cur->parent != NULL;cur = cur->parent) { + if (cur == node2) + return(-1); + depth1++; + } + /* + * Distinct document (or distinct entities :-( ) case. + */ + if (root != cur) { + return(-2); + } + /* + * get the nearest common ancestor. + */ + while (depth1 > depth2) { + depth1--; + node1 = node1->parent; + } + while (depth2 > depth1) { + depth2--; + node2 = node2->parent; + } + while (node1->parent != node2->parent) { + node1 = node1->parent; + node2 = node2->parent; + /* should not happen but just in case ... */ + if ((node1 == NULL) || (node2 == NULL)) + return(-2); + } + /* + * Find who's first. + */ + if (node1 == node2->next) + return(-1); + for (cur = node1->next;cur != NULL;cur = cur->next) + if (cur == node2) + return(1); + return(-1); /* assume there is no sibling list corruption */ +} + +/** + * xmlXPathNodeSetSort: + * @set: the node set + * + * Sort the node set in document order + */ +void +xmlXPathNodeSetSort(xmlNodeSetPtr set) { + int i, j, incr, len, rc; + xmlNodePtr tmp; + + if (set == NULL) + return; + + /* Use Shell's sort to sort the node-set */ + len = set->nodeNr; + for (incr = len / 2; incr > 0; incr /= 2) { + for (i = incr; i < len; i++) { + j = i - incr; + while (j >= 0) { + rc = xmlXPathCmpNodes(set->nodeTab[j], set->nodeTab[j + incr]); + if (rc != 1 && rc != -2) { + tmp = set->nodeTab[j]; + set->nodeTab[j] = set->nodeTab[j + incr]; + set->nodeTab[j + incr] = tmp; + j -= incr; + } else + break; + } + } + } +} + +#define XML_NODESET_DEFAULT 10 +/** + * xmlXPathNodeSetCreate: + * @val: an initial xmlNodePtr, or NULL + * + * Create a new xmlNodeSetPtr of type double and of value @val + * + * Returns the newly created object. + */ +xmlNodeSetPtr +xmlXPathNodeSetCreate(xmlNodePtr val) { + xmlNodeSetPtr ret; + + ret = (xmlNodeSetPtr) xmlMalloc(sizeof(xmlNodeSet)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewNodeSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlNodeSet)); + if (val != NULL) { + ret->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (ret->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewNodeSet: out of memory\n"); + return(NULL); + } + memset(ret->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + ret->nodeMax = XML_NODESET_DEFAULT; + ret->nodeTab[ret->nodeNr++] = val; + } + return(ret); +} + +/** + * xmlXPathNodeSetAdd: + * @cur: the initial node set + * @val: a new xmlNodePtr + * + * add a new xmlNodePtr ot an existing NodeSet + */ +void +xmlXPathNodeSetAdd(xmlNodeSetPtr cur, xmlNodePtr val) { + int i; + + if (val == NULL) return; + + /* + * check against doublons + */ + for (i = 0;i < cur->nodeNr;i++) + if (cur->nodeTab[i] == val) return; + + /* + * grow the nodeTab if needed + */ + if (cur->nodeMax == 0) { + cur->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (cur->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAdd: out of memory\n"); + return; + } + memset(cur->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + cur->nodeMax = XML_NODESET_DEFAULT; + } else if (cur->nodeNr == cur->nodeMax) { + xmlNodePtr *temp; + + cur->nodeMax *= 2; + temp = (xmlNodePtr *) xmlRealloc(cur->nodeTab, cur->nodeMax * + sizeof(xmlNodePtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAdd: out of memory\n"); + return; + } + cur->nodeTab = temp; + } + cur->nodeTab[cur->nodeNr++] = val; +} + +/** + * xmlXPathNodeSetAddUnique: + * @cur: the initial node set + * @val: a new xmlNodePtr + * + * add a new xmlNodePtr ot an existing NodeSet, optimized version + * when we are sure the node is not already in the set. + */ +void +xmlXPathNodeSetAddUnique(xmlNodeSetPtr cur, xmlNodePtr val) { + if (val == NULL) return; + + /* + * grow the nodeTab if needed + */ + if (cur->nodeMax == 0) { + cur->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (cur->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAddUnique: out of memory\n"); + return; + } + memset(cur->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + cur->nodeMax = XML_NODESET_DEFAULT; + } else if (cur->nodeNr == cur->nodeMax) { + xmlNodePtr *temp; + + cur->nodeMax *= 2; + temp = (xmlNodePtr *) xmlRealloc(cur->nodeTab, cur->nodeMax * + sizeof(xmlNodePtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetAddUnique: out of memory\n"); + return; + } + cur->nodeTab = temp; + } + cur->nodeTab[cur->nodeNr++] = val; +} + +/** + * xmlXPathNodeSetMerge: + * @val1: the first NodeSet or NULL + * @val2: the second NodeSet + * + * Merges two nodesets, all nodes from @val2 are added to @val1 + * if @val1 is NULL, a new set is created and copied from @val2 + * + * Returns val1 once extended or NULL in case of error. + */ +xmlNodeSetPtr +xmlXPathNodeSetMerge(xmlNodeSetPtr val1, xmlNodeSetPtr val2) { + int i, j, initNr; + + if (val2 == NULL) return(val1); + if (val1 == NULL) { + val1 = xmlXPathNodeSetCreate(NULL); + } + + initNr = val1->nodeNr; + + for (i = 0;i < val2->nodeNr;i++) { + /* + * check against doublons + */ + for (j = 0; j < initNr; j++) + if (val1->nodeTab[j] == val2->nodeTab[i]) continue; + + /* + * grow the nodeTab if needed + */ + if (val1->nodeMax == 0) { + val1->nodeTab = (xmlNodePtr *) xmlMalloc(XML_NODESET_DEFAULT * + sizeof(xmlNodePtr)); + if (val1->nodeTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetMerge: out of memory\n"); + return(NULL); + } + memset(val1->nodeTab, 0 , + XML_NODESET_DEFAULT * (size_t) sizeof(xmlNodePtr)); + val1->nodeMax = XML_NODESET_DEFAULT; + } else if (val1->nodeNr == val1->nodeMax) { + xmlNodePtr *temp; + + val1->nodeMax *= 2; + temp = (xmlNodePtr *) xmlRealloc(val1->nodeTab, val1->nodeMax * + sizeof(xmlNodePtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetMerge: out of memory\n"); + return(NULL); + } + val1->nodeTab = temp; + } + val1->nodeTab[val1->nodeNr++] = val2->nodeTab[i]; + } + + return(val1); +} + +/** + * xmlXPathNodeSetDel: + * @cur: the initial node set + * @val: an xmlNodePtr + * + * Removes an xmlNodePtr from an existing NodeSet + */ +void +xmlXPathNodeSetDel(xmlNodeSetPtr cur, xmlNodePtr val) { + int i; + + if (cur == NULL) return; + if (val == NULL) return; + + /* + * check against doublons + */ + for (i = 0;i < cur->nodeNr;i++) + if (cur->nodeTab[i] == val) break; + + if (i >= cur->nodeNr) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNodeSetDel: Node %s wasn't found in NodeList\n", + val->name); +#endif + return; + } + cur->nodeNr--; + for (;i < cur->nodeNr;i++) + cur->nodeTab[i] = cur->nodeTab[i + 1]; + cur->nodeTab[cur->nodeNr] = NULL; +} + +/** + * xmlXPathNodeSetRemove: + * @cur: the initial node set + * @val: the index to remove + * + * Removes an entry from an existing NodeSet list. + */ +void +xmlXPathNodeSetRemove(xmlNodeSetPtr cur, int val) { + if (cur == NULL) return; + if (val >= cur->nodeNr) return; + cur->nodeNr--; + for (;val < cur->nodeNr;val++) + cur->nodeTab[val] = cur->nodeTab[val + 1]; + cur->nodeTab[cur->nodeNr] = NULL; +} + +/** + * xmlXPathFreeNodeSet: + * @obj: the xmlNodeSetPtr to free + * + * Free the NodeSet compound (not the actual nodes !). + */ +void +xmlXPathFreeNodeSet(xmlNodeSetPtr obj) { + if (obj == NULL) return; + if (obj->nodeTab != NULL) { +#ifdef DEBUG + memset(obj->nodeTab, 0xB , (size_t) sizeof(xmlNodePtr) * obj->nodeMax); +#endif + xmlFree(obj->nodeTab); + } +#ifdef DEBUG + memset(obj, 0xB , (size_t) sizeof(xmlNodeSet)); +#endif + xmlFree(obj); +} + +/** + * xmlXPathFreeValueTree: + * @obj: the xmlNodeSetPtr to free + * + * Free the NodeSet compound and the actual tree, this is different + * from xmlXPathFreeNodeSet() + */ +void +xmlXPathFreeValueTree(xmlNodeSetPtr obj) { + int i; + + if (obj == NULL) return; + for (i = 0;i < obj->nodeNr;i++) + if (obj->nodeTab[i] != NULL) + xmlFreeNode(obj->nodeTab[i]); + + if (obj->nodeTab != NULL) { +#ifdef DEBUG + memset(obj->nodeTab, 0xB , (size_t) sizeof(xmlNodePtr) * obj->nodeMax); +#endif + xmlFree(obj->nodeTab); + } +#ifdef DEBUG + memset(obj, 0xB , (size_t) sizeof(xmlNodeSet)); +#endif + xmlFree(obj); +} + +#if defined(DEBUG) || defined(DEBUG_STEP) +/** + * xmlGenericErrorContextNodeSet: + * @output: a FILE * for the output + * @obj: the xmlNodeSetPtr to free + * + * Quick display of a NodeSet + */ +void +xmlGenericErrorContextNodeSet(FILE *output, xmlNodeSetPtr obj) { + int i; + + if (output == NULL) output = xmlGenericErrorContext; + if (obj == NULL) { + fprintf(output, "NodeSet == NULL !\n"); + return; + } + if (obj->nodeNr == 0) { + fprintf(output, "NodeSet is empty\n"); + return; + } + if (obj->nodeTab == NULL) { + fprintf(output, " nodeTab == NULL !\n"); + return; + } + for (i = 0; i < obj->nodeNr; i++) { + if (obj->nodeTab[i] == NULL) { + fprintf(output, " NULL !\n"); + return; + } + if ((obj->nodeTab[i]->type == XML_DOCUMENT_NODE) || + (obj->nodeTab[i]->type == XML_HTML_DOCUMENT_NODE)) + fprintf(output, " /"); + else if (obj->nodeTab[i]->name == NULL) + fprintf(output, " noname!"); + else fprintf(output, " %s", obj->nodeTab[i]->name); + } + fprintf(output, "\n"); +} +#endif + +/** + * xmlXPathNewNodeSet: + * @val: the NodePtr value + * + * Create a new xmlXPathObjectPtr of type NodeSet and initialize + * it with the single Node @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewNodeSet(xmlNodePtr val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewNodeSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_NODESET; + ret->nodesetval = xmlXPathNodeSetCreate(val); + return(ret); +} + +/** + * xmlXPathNewValueTree: + * @val: the NodePtr value + * + * Create a new xmlXPathObjectPtr of type Value Tree (XSLT) and initialize + * it with the tree root @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewValueTree(xmlNodePtr val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewNodeSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_XSLT_TREE; + ret->nodesetval = xmlXPathNodeSetCreate(val); + return(ret); +} + +/** + * xmlXPathNewNodeSetList: + * @val: an existing NodeSet + * + * Create a new xmlXPathObjectPtr of type NodeSet and initialize + * it with the Nodeset @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewNodeSetList(xmlNodeSetPtr val) { + xmlXPathObjectPtr ret; + int i; + + if (val == NULL) + ret = NULL; + else if (val->nodeTab == NULL) + ret = xmlXPathNewNodeSet(NULL); + else + { + ret = xmlXPathNewNodeSet(val->nodeTab[0]); + for (i = 1; i < val->nodeNr; ++i) + xmlXPathNodeSetAddUnique(ret->nodesetval, val->nodeTab[i]); + } + + return(ret); +} + +/** + * xmlXPathWrapNodeSet: + * @val: the NodePtr value + * + * Wrap the Nodeset @val in a new xmlXPathObjectPtr + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathWrapNodeSet(xmlNodeSetPtr val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathWrapNodeSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_NODESET; + ret->nodesetval = val; + return(ret); +} + +/** + * xmlXPathFreeNodeSetList: + * @obj: an existing NodeSetList object + * + * Free up the xmlXPathObjectPtr @obj but don't deallocate the objects in + * the list contrary to xmlXPathFreeObject(). + */ +void +xmlXPathFreeNodeSetList(xmlXPathObjectPtr obj) { + if (obj == NULL) return; +#ifdef DEBUG + memset(obj, 0xB , (size_t) sizeof(xmlXPathObject)); +#endif + xmlFree(obj); +} + +/************************************************************************ + * * + * Routines to handle extra functions * + * * + ************************************************************************/ + +/** + * xmlXPathRegisterFunc: + * @ctxt: the XPath context + * @name: the function name + * @f: the function implementation or NULL + * + * Register a new function. If @f is NULL it unregisters the function + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterFunc(xmlXPathContextPtr ctxt, const xmlChar *name, + xmlXPathFunction f) { + return(xmlXPathRegisterFuncNS(ctxt, name, NULL, f)); +} + +/** + * xmlXPathRegisterFuncNS: + * @ctxt: the XPath context + * @name: the function name + * @ns_uri: the function namespace URI + * @f: the function implementation or NULL + * + * Register a new function. If @f is NULL it unregisters the function + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterFuncNS(xmlXPathContextPtr ctxt, const xmlChar *name, + const xmlChar *ns_uri, xmlXPathFunction f) { + if (ctxt == NULL) + return(-1); + if (name == NULL) + return(-1); + + if (ctxt->funcHash == NULL) + ctxt->funcHash = xmlHashCreate(0); + if (ctxt->funcHash == NULL) + return(-1); + return(xmlHashAddEntry2(ctxt->funcHash, name, ns_uri, (void *) f)); +} + +/** + * xmlXPathFunctionLookup: + * @ctxt: the XPath context + * @name: the function name + * + * Search in the Function array of the context for the given + * function. + * + * Returns the xmlXPathFunction or NULL if not found + */ +xmlXPathFunction +xmlXPathFunctionLookup(xmlXPathContextPtr ctxt, const xmlChar *name) { + return(xmlXPathFunctionLookupNS(ctxt, name, NULL)); +} + +/** + * xmlXPathFunctionLookupNS: + * @ctxt: the XPath context + * @name: the function name + * @ns_uri: the function namespace URI + * + * Search in the Function array of the context for the given + * function. + * + * Returns the xmlXPathFunction or NULL if not found + */ +xmlXPathFunction +xmlXPathFunctionLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name, + const xmlChar *ns_uri) { + if (ctxt == NULL) + return(NULL); + if (ctxt->funcHash == NULL) + return(NULL); + if (name == NULL) + return(NULL); + + return((xmlXPathFunction) xmlHashLookup2(ctxt->funcHash, name, ns_uri)); +} + +/** + * xmlXPathRegisteredFuncsCleanup: + * @ctxt: the XPath context + * + * Cleanup the XPath context data associated to registered functions + */ +void +xmlXPathRegisteredFuncsCleanup(xmlXPathContextPtr ctxt) { + if (ctxt == NULL) + return; + + xmlHashFree(ctxt->funcHash, NULL); + ctxt->funcHash = NULL; +} + +/************************************************************************ + * * + * Routines to handle Variable * + * * + ************************************************************************/ + +/** + * xmlXPathRegisterVariable: + * @ctxt: the XPath context + * @name: the variable name + * @value: the variable value or NULL + * + * Register a new variable value. If @value is NULL it unregisters + * the variable + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterVariable(xmlXPathContextPtr ctxt, const xmlChar *name, + xmlXPathObjectPtr value) { + return(xmlXPathRegisterVariableNS(ctxt, name, NULL, value)); +} + +/** + * xmlXPathRegisterVariableNS: + * @ctxt: the XPath context + * @name: the variable name + * @ns_uri: the variable namespace URI + * @value: the variable value or NULL + * + * Register a new variable value. If @value is NULL it unregisters + * the variable + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterVariableNS(xmlXPathContextPtr ctxt, const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathObjectPtr value) { + if (ctxt == NULL) + return(-1); + if (name == NULL) + return(-1); + + if (ctxt->varHash == NULL) + ctxt->varHash = xmlHashCreate(0); + if (ctxt->varHash == NULL) + return(-1); + return(xmlHashUpdateEntry2(ctxt->varHash, name, ns_uri, + (void *) value, + (xmlHashDeallocator)xmlXPathFreeObject)); +} + +/** + * xmlXPathRegisterVariableLookup: + * @ctxt: the XPath context + * @f: the lookup function + * @data: the lookup data + * + * register an external mechanism to do variable lookup + */ +void +xmlXPathRegisterVariableLookup(xmlXPathContextPtr ctxt, + xmlXPathVariableLookupFunc f, void *data) { + if (ctxt == NULL) + return; + ctxt->varLookupFunc = (void *) f; + ctxt->varLookupData = data; +} + +/** + * xmlXPathVariableLookup: + * @ctxt: the XPath context + * @name: the variable name + * + * Search in the Variable array of the context for the given + * variable value. + * + * Returns the value or NULL if not found + */ +xmlXPathObjectPtr +xmlXPathVariableLookup(xmlXPathContextPtr ctxt, const xmlChar *name) { + if (ctxt == NULL) + return(NULL); + + if (ctxt->varLookupFunc != NULL) { + xmlXPathObjectPtr ret; + + ret = ((xmlXPathVariableLookupFunc)ctxt->varLookupFunc) + (ctxt->varLookupData, name, NULL); + if (ret != NULL) return(ret); + } + return(xmlXPathVariableLookupNS(ctxt, name, NULL)); +} + +/** + * xmlXPathVariableLookupNS: + * @ctxt: the XPath context + * @name: the variable name + * @ns_uri: the variable namespace URI + * + * Search in the Variable array of the context for the given + * variable value. + * + * Returns the value or NULL if not found + */ +xmlXPathObjectPtr +xmlXPathVariableLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name, + const xmlChar *ns_uri) { + if (ctxt == NULL) + return(NULL); + + if (ctxt->varLookupFunc != NULL) { + xmlXPathObjectPtr ret; + + ret = ((xmlXPathVariableLookupFunc)ctxt->varLookupFunc) + (ctxt->varLookupData, name, ns_uri); + if (ret != NULL) return(ret); + } + + if (ctxt->varHash == NULL) + return(NULL); + if (name == NULL) + return(NULL); + + return((xmlXPathObjectPtr) xmlHashLookup2(ctxt->varHash, name, ns_uri)); +} + +/** + * xmlXPathRegisteredVariablesCleanup: + * @ctxt: the XPath context + * + * Cleanup the XPath context data associated to registered variables + */ +void +xmlXPathRegisteredVariablesCleanup(xmlXPathContextPtr ctxt) { + if (ctxt == NULL) + return; + + xmlHashFree(ctxt->varHash, NULL); + ctxt->varHash = NULL; +} + +/** + * xmlXPathRegisterNs: + * @ctxt: the XPath context + * @prefix: the namespace prefix + * @ns_uri: the namespace name + * + * Register a new namespace. If @ns_uri is NULL it unregisters + * the namespace + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlXPathRegisterNs(xmlXPathContextPtr ctxt, const xmlChar *prefix, + const xmlChar *ns_uri) { + if (ctxt == NULL) + return(-1); + if (prefix == NULL) + return(-1); + + if (ctxt->nsHash == NULL) + ctxt->nsHash = xmlHashCreate(10); + if (ctxt->nsHash == NULL) + return(-1); + return(xmlHashUpdateEntry(ctxt->nsHash, prefix, (void *) ns_uri, + (xmlHashDeallocator)xmlFree)); +} + +/** + * xmlXPathNsLookup: + * @ctxt: the XPath context + * @prefix: the namespace prefix value + * + * Search in the namespace declaration array of the context for the given + * namespace name associated to the given prefix + * + * Returns the value or NULL if not found + */ +const xmlChar * +xmlXPathNsLookup(xmlXPathContextPtr ctxt, const xmlChar *prefix) { + if (ctxt == NULL) + return(NULL); + if (prefix == NULL) + return(NULL); + +#ifdef XML_XML_NAMESPACE + if (xmlStrEqual(prefix, (const xmlChar *) "xml")) + return(XML_XML_NAMESPACE); +#endif + + if (ctxt->nsHash == NULL) + return(NULL); + + return((const xmlChar *) xmlHashLookup(ctxt->nsHash, prefix)); +} + +/** + * xmlXPathRegisteredVariablesCleanup: + * @ctxt: the XPath context + * + * Cleanup the XPath context data associated to registered variables + */ +void +xmlXPathRegisteredNsCleanup(xmlXPathContextPtr ctxt) { + if (ctxt == NULL) + return; + + xmlHashFree(ctxt->nsHash, NULL); + ctxt->nsHash = NULL; +} + +/************************************************************************ + * * + * Routines to handle Values * + * * + ************************************************************************/ + +/* Allocations are terrible, one need to optimize all this !!! */ + +/** + * xmlXPathNewFloat: + * @val: the double value + * + * Create a new xmlXPathObjectPtr of type double and of value @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewFloat(double val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewFloat: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_NUMBER; + ret->floatval = val; + return(ret); +} + +/** + * xmlXPathNewBoolean: + * @val: the boolean value + * + * Create a new xmlXPathObjectPtr of type boolean and of value @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewBoolean(int val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewBoolean: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_BOOLEAN; + ret->boolval = (val != 0); + return(ret); +} + +/** + * xmlXPathNewString: + * @val: the xmlChar * value + * + * Create a new xmlXPathObjectPtr of type string and of value @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewString(const xmlChar *val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewString: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_STRING; + if (val != NULL) + ret->stringval = xmlStrdup(val); + else + ret->stringval = xmlStrdup((const xmlChar *)""); + return(ret); +} + +/** + * xmlXPathNewCString: + * @val: the char * value + * + * Create a new xmlXPathObjectPtr of type string and of value @val + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathNewCString(const char *val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewCString: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_STRING; + ret->stringval = xmlStrdup(BAD_CAST val); + return(ret); +} + +/** + * xmlXPathObjectCopy: + * @val: the original object + * + * allocate a new copy of a given object + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPathObjectCopy(xmlXPathObjectPtr val) { + xmlXPathObjectPtr ret; + + if (val == NULL) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathObjectCopy: out of memory\n"); + return(NULL); + } + memcpy(ret, val , (size_t) sizeof(xmlXPathObject)); + switch (val->type) { + case XPATH_BOOLEAN: + case XPATH_NUMBER: + case XPATH_POINT: + case XPATH_RANGE: + break; + case XPATH_STRING: + ret->stringval = xmlStrdup(val->stringval); + break; + case XPATH_XSLT_TREE: + if ((val->nodesetval != NULL) && + (val->nodesetval->nodeTab != NULL)) + ret->nodesetval = xmlXPathNodeSetCreate( + xmlCopyNode(val->nodesetval->nodeTab[0], 1)); + else + ret->nodesetval = xmlXPathNodeSetCreate(NULL); + break; + case XPATH_NODESET: + ret->nodesetval = xmlXPathNodeSetMerge(NULL, val->nodesetval); + break; + case XPATH_LOCATIONSET: +#ifdef LIBXML_XPTR_ENABLED + { + xmlLocationSetPtr loc = val->user; + ret->user = (void *) xmlXPtrLocationSetMerge(NULL, loc); + break; + } +#endif + case XPATH_UNDEFINED: + case XPATH_USERS: + xmlGenericError(xmlGenericErrorContext, + "xmlXPathObjectCopy: unsupported type %d\n", + val->type); + break; + } + return(ret); +} + +/** + * xmlXPathFreeObject: + * @obj: the object to free + * + * Free up an xmlXPathObjectPtr object. + */ +void +xmlXPathFreeObject(xmlXPathObjectPtr obj) { + if (obj == NULL) return; + if (obj->type == XPATH_NODESET) { + if (obj->nodesetval != NULL) + xmlXPathFreeNodeSet(obj->nodesetval); +#ifdef LIBXML_XPTR_ENABLED + } else if (obj->type == XPATH_LOCATIONSET) { + if (obj->user != NULL) + xmlXPtrFreeLocationSet(obj->user); +#endif + } else if (obj->type == XPATH_STRING) { + if (obj->stringval != NULL) + xmlFree(obj->stringval); + } else if (obj->type == XPATH_XSLT_TREE) { + if (obj->nodesetval != NULL) + xmlXPathFreeValueTree(obj->nodesetval); + } + +#ifdef DEBUG + memset(obj, 0xB , (size_t) sizeof(xmlXPathObject)); +#endif + xmlFree(obj); +} + +/************************************************************************ + * * + * Routines to handle XPath contexts * + * * + ************************************************************************/ + +/** + * xmlXPathNewContext: + * @doc: the XML document + * + * Create a new xmlXPathContext + * + * Returns the xmlXPathContext just allocated. + */ +xmlXPathContextPtr +xmlXPathNewContext(xmlDocPtr doc) { + xmlXPathContextPtr ret; + + ret = (xmlXPathContextPtr) xmlMalloc(sizeof(xmlXPathContext)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewContext: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathContext)); + ret->doc = doc; + ret->node = NULL; + + ret->varHash = NULL; + + ret->nb_types = 0; + ret->max_types = 0; + ret->types = NULL; + + ret->funcHash = xmlHashCreate(0); + + ret->nb_axis = 0; + ret->max_axis = 0; + ret->axis = NULL; + + ret->nsHash = NULL; + ret->user = NULL; + + ret->contextSize = -1; + ret->proximityPosition = -1; + + xmlXPathRegisterAllFunctions(ret); + + return(ret); +} + +/** + * xmlXPathFreeContext: + * @ctxt: the context to free + * + * Free up an xmlXPathContext + */ +void +xmlXPathFreeContext(xmlXPathContextPtr ctxt) { + xmlXPathRegisteredNsCleanup(ctxt); + xmlXPathRegisteredFuncsCleanup(ctxt); + xmlXPathRegisteredVariablesCleanup(ctxt); +#ifdef DEBUG + memset(ctxt, 0xB , (size_t) sizeof(xmlXPathContext)); +#endif + xmlFree(ctxt); +} + +/************************************************************************ + * * + * Routines to handle XPath parser contexts * + * * + ************************************************************************/ + +#define CHECK_CTXT(ctxt) \ + if (ctxt == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "%s:%d Internal error: ctxt == NULL\n", \ + __FILE__, __LINE__); \ + } \ + + +#define CHECK_CONTEXT(ctxt) \ + if (ctxt == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "%s:%d Internal error: no context\n", \ + __FILE__, __LINE__); \ + } \ + else if (ctxt->doc == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "%s:%d Internal error: no document\n", \ + __FILE__, __LINE__); \ + } \ + else if (ctxt->doc->children == NULL) { \ + xmlGenericError(xmlGenericErrorContext, \ + "%s:%d Internal error: document without root\n", \ + __FILE__, __LINE__); \ + } \ + + +/** + * xmlXPathNewParserContext: + * @str: the XPath expression + * @ctxt: the XPath context + * + * Create a new xmlXPathParserContext + * + * Returns the xmlXPathParserContext just allocated. + */ +xmlXPathParserContextPtr +xmlXPathNewParserContext(const xmlChar *str, xmlXPathContextPtr ctxt) { + xmlXPathParserContextPtr ret; + + ret = (xmlXPathParserContextPtr) xmlMalloc(sizeof(xmlXPathParserContext)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathNewParserContext: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathParserContext)); + ret->cur = ret->base = str; + ret->context = ctxt; + + /* Allocate the value stack */ + ret->valueTab = (xmlXPathObjectPtr *) + xmlMalloc(10 * sizeof(xmlXPathObjectPtr)); + ret->valueNr = 0; + ret->valueMax = 10; + ret->value = NULL; + return(ret); +} + +/** + * xmlXPathFreeParserContext: + * @ctxt: the context to free + * + * Free up an xmlXPathParserContext + */ +void +xmlXPathFreeParserContext(xmlXPathParserContextPtr ctxt) { + if (ctxt->valueTab != NULL) { +#ifdef DEBUG + memset(ctxt->valueTab, 0xB , 10 * (size_t) sizeof(xmlXPathObjectPtr)); +#endif + xmlFree(ctxt->valueTab); + } +#ifdef DEBUG + memset(ctxt, 0xB , (size_t) sizeof(xmlXPathParserContext)); +#endif + xmlFree(ctxt); +} + +/************************************************************************ + * * + * The implicit core function library * + * * + ************************************************************************/ + +/* + * Auto-pop and cast to a number + */ +void xmlXPathNumberFunction(xmlXPathParserContextPtr ctxt, int nargs); + + +#define POP_FLOAT \ + arg = valuePop(ctxt); \ + if (arg == NULL) { \ + XP_ERROR(XPATH_INVALID_OPERAND); \ + } \ + if (arg->type != XPATH_NUMBER) { \ + valuePush(ctxt, arg); \ + xmlXPathNumberFunction(ctxt, 1); \ + arg = valuePop(ctxt); \ + } + +/** + * xmlXPathCompareNodeSetFloat: + * @ctxt: the XPath Parser context + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * @arg: the node set + * @f: the value + * + * Implement the compare operation between a nodeset and a number + * @ns < @val (1, 1, ... + * @ns <= @val (1, 0, ... + * @ns > @val (0, 1, ... + * @ns >= @val (0, 0, ... + * + * If one object to be compared is a node-set and the other is a number, + * then the comparison will be true if and only if there is a node in the + * node-set such that the result of performing the comparison on the number + * to be compared and on the result of converting the string-value of that + * node to a number using the number function is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathCompareNodeSetFloat(xmlXPathParserContextPtr ctxt, int inf, int strict, + xmlXPathObjectPtr arg, xmlXPathObjectPtr f) { + int i, ret = 0; + xmlNodeSetPtr ns; + xmlChar *str2; + + if ((f == NULL) || (arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) { + xmlXPathFreeObject(arg); + xmlXPathFreeObject(f); + return(0); + } + ns = arg->nodesetval; + for (i = 0;i < ns->nodeNr;i++) { + str2 = xmlNodeGetContent(ns->nodeTab[i]); + if (str2 != NULL) { + valuePush(ctxt, + xmlXPathNewString(str2)); + xmlFree(str2); + xmlXPathNumberFunction(ctxt, 1); + valuePush(ctxt, xmlXPathObjectCopy(f)); + ret = xmlXPathCompareValues(ctxt, inf, strict); + if (ret) + break; + } + } + xmlXPathFreeObject(arg); + xmlXPathFreeObject(f); + return(ret); +} + +/** + * xmlXPathCompareNodeSetString: + * @ctxt: the XPath Parser context + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * @arg: the node set + * @s: the value + * + * Implement the compare operation between a nodeset and a string + * @ns < @val (1, 1, ... + * @ns <= @val (1, 0, ... + * @ns > @val (0, 1, ... + * @ns >= @val (0, 0, ... + * + * If one object to be compared is a node-set and the other is a string, + * then the comparison will be true if and only if there is a node in + * the node-set such that the result of performing the comparison on the + * string-value of the node and the other string is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathCompareNodeSetString(xmlXPathParserContextPtr ctxt, int inf, int strict, + xmlXPathObjectPtr arg, xmlXPathObjectPtr s) { + int i, ret = 0; + xmlNodeSetPtr ns; + xmlChar *str2; + + if ((s == NULL) || (arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) { + xmlXPathFreeObject(arg); + xmlXPathFreeObject(s); + return(0); + } + ns = arg->nodesetval; + for (i = 0;i < ns->nodeNr;i++) { + str2 = xmlNodeGetContent(ns->nodeTab[i]); + if (str2 != NULL) { + valuePush(ctxt, + xmlXPathNewString(str2)); + xmlFree(str2); + valuePush(ctxt, xmlXPathObjectCopy(s)); + ret = xmlXPathCompareValues(ctxt, inf, strict); + if (ret) + break; + } + } + xmlXPathFreeObject(arg); + xmlXPathFreeObject(s); + return(ret); +} + +/** + * xmlXPathCompareNodeSets: + * @ctxt: the XPath Parser context + * @op: less than (-1), equal (0) or greater than (1) + * @strict: is the comparison strict + * @arg1: the fist node set object + * @arg2: the second node set object + * + * Implement the compare operation on nodesets: + * + * If both objects to be compared are node-sets, then the comparison + * will be true if and only if there is a node in the first node-set + * and a node in the second node-set such that the result of performing + * the comparison on the string-values of the two nodes is true. + * .... + * When neither object to be compared is a node-set and the operator + * is <=, <, >= or >, then the objects are compared by converting both + * objects to numbers and comparing the numbers according to IEEE 754. + * .... + * The number function converts its argument to a number as follows: + * - a string that consists of optional whitespace followed by an + * optional minus sign followed by a Number followed by whitespace + * is converted to the IEEE 754 number that is nearest (according + * to the IEEE 754 round-to-nearest rule) to the mathematical value + * represented by the string; any other string is converted to NaN + * + * Conclusion all nodes need to be converted first to their string value + * and then the comparison must be done when possible + */ +int +xmlXPathCompareNodeSets(xmlXPathParserContextPtr ctxt, int inf, int strict, + xmlXPathObjectPtr arg1, xmlXPathObjectPtr arg2) { + int i, j, init = 0; + double val1; + double *values2; + int ret = 0; + xmlChar *str; + xmlNodeSetPtr ns1; + xmlNodeSetPtr ns2; + + if ((arg1 == NULL) || + ((arg1->type != XPATH_NODESET) && (arg1->type != XPATH_XSLT_TREE))) + return(0); + if ((arg2 == NULL) || + ((arg2->type != XPATH_NODESET) && (arg2->type != XPATH_XSLT_TREE))) + return(0); + + ns1 = arg1->nodesetval; + ns2 = arg2->nodesetval; + + if (ns1->nodeNr <= 0) + return(0); + if (ns2->nodeNr <= 0) + return(0); + + values2 = (double *) xmlMalloc(ns2->nodeNr * sizeof(double)); + if (values2 == NULL) { + return(0); + } + for (i = 0;i < ns1->nodeNr;i++) { + str = xmlNodeGetContent(ns1->nodeTab[i]); + if (str == NULL) + continue; + val1 = xmlXPathStringEvalNumber(str); + xmlFree(str); + if (isnan(val1)) + continue; + for (j = 0;j < ns2->nodeNr;j++) { + if (init == 0) { + str = xmlNodeGetContent(ns2->nodeTab[j]); + if (str == NULL) { + values2[j] = xmlXPathNAN; + } else { + values2[j] = xmlXPathStringEvalNumber(str); + xmlFree(str); + } + } + if (isnan(values2[j])) + continue; + if (inf && strict) + ret = (val1 < values2[j]); + else if (inf && !strict) + ret = (val1 <= values2[j]); + else if (!inf && strict) + ret = (val1 > values2[j]); + else if (!inf && !strict) + ret = (val1 >= values2[j]); + if (ret) + break; + } + if (ret) + break; + init = 1; + } + xmlFree(values2); + return(ret); + return(0); +} + +/** + * xmlXPathCompareNodeSetValue: + * @ctxt: the XPath Parser context + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * @arg: the node set + * @val: the value + * + * Implement the compare operation between a nodeset and a value + * @ns < @val (1, 1, ... + * @ns <= @val (1, 0, ... + * @ns > @val (0, 1, ... + * @ns >= @val (0, 0, ... + * + * If one object to be compared is a node-set and the other is a boolean, + * then the comparison will be true if and only if the result of performing + * the comparison on the boolean and on the result of converting + * the node-set to a boolean using the boolean function is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathCompareNodeSetValue(xmlXPathParserContextPtr ctxt, int inf, int strict, + xmlXPathObjectPtr arg, xmlXPathObjectPtr val) { + if ((val == NULL) || (arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) + return(0); + + switch(val->type) { + case XPATH_NUMBER: + return(xmlXPathCompareNodeSetFloat(ctxt, inf, strict, arg, val)); + case XPATH_NODESET: + case XPATH_XSLT_TREE: + return(xmlXPathCompareNodeSets(ctxt, inf, strict, arg, val)); + case XPATH_STRING: + return(xmlXPathCompareNodeSetString(ctxt, inf, strict, arg, val)); + case XPATH_BOOLEAN: + valuePush(ctxt, arg); + xmlXPathBooleanFunction(ctxt, 1); + valuePush(ctxt, val); + return(xmlXPathCompareValues(ctxt, inf, strict)); + default: + TODO + return(0); + } + return(0); +} + +/** + * xmlXPathEqualNodeSetString + * @arg: the nodeset object argument + * @str: the string to compare to. + * + * Implement the equal operation on XPath objects content: @arg1 == @arg2 + * If one object to be compared is a node-set and the other is a string, + * then the comparison will be true if and only if there is a node in + * the node-set such that the result of performing the comparison on the + * string-value of the node and the other string is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathEqualNodeSetString(xmlXPathObjectPtr arg, const xmlChar *str) { + int i; + xmlNodeSetPtr ns; + xmlChar *str2; + + if ((str == NULL) || (arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) + return(0); + ns = arg->nodesetval; + if (ns->nodeNr <= 0) + return(0); + for (i = 0;i < ns->nodeNr;i++) { + str2 = xmlNodeGetContent(ns->nodeTab[i]); + if ((str2 != NULL) && (xmlStrEqual(str, str2))) { + xmlFree(str2); + return(1); + } + if (str2 != NULL) + xmlFree(str2); + } + return(0); +} + +/** + * xmlXPathEqualNodeSetFloat + * @arg: the nodeset object argument + * @f: the float to compare to + * + * Implement the equal operation on XPath objects content: @arg1 == @arg2 + * If one object to be compared is a node-set and the other is a number, + * then the comparison will be true if and only if there is a node in + * the node-set such that the result of performing the comparison on the + * number to be compared and on the result of converting the string-value + * of that node to a number using the number function is true. + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathEqualNodeSetFloat(xmlXPathObjectPtr arg, double f) { + char buf[100] = ""; + + if ((arg == NULL) || + ((arg->type != XPATH_NODESET) && (arg->type != XPATH_XSLT_TREE))) + return(0); + + if (isnan(f)) + sprintf(buf, "NaN"); + else if (isinf(f) > 0) + sprintf(buf, "+Infinity"); + else if (isinf(f) < 0) + sprintf(buf, "-Infinity"); + else + sprintf(buf, "%0g", f); + + return(xmlXPathEqualNodeSetString(arg, BAD_CAST buf)); +} + + +/** + * xmlXPathEqualNodeSets + * @arg1: first nodeset object argument + * @arg2: second nodeset object argument + * + * Implement the equal operation on XPath nodesets: @arg1 == @arg2 + * If both objects to be compared are node-sets, then the comparison + * will be true if and only if there is a node in the first node-set and + * a node in the second node-set such that the result of performing the + * comparison on the string-values of the two nodes is true. + * + * (needless to say, this is a costly operation) + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathEqualNodeSets(xmlXPathObjectPtr arg1, xmlXPathObjectPtr arg2) { + int i, j; + xmlChar **values1; + xmlChar **values2; + int ret = 0; + xmlNodeSetPtr ns1; + xmlNodeSetPtr ns2; + + if ((arg1 == NULL) || + ((arg1->type != XPATH_NODESET) && (arg1->type != XPATH_XSLT_TREE))) + return(0); + if ((arg2 == NULL) || + ((arg2->type != XPATH_NODESET) && (arg2->type != XPATH_XSLT_TREE))) + return(0); + + ns1 = arg1->nodesetval; + ns2 = arg2->nodesetval; + + if (ns1->nodeNr <= 0) + return(0); + if (ns2->nodeNr <= 0) + return(0); + + /* + * check if there is a node pertaining to both sets + */ + for (i = 0;i < ns1->nodeNr;i++) + for (j = 0;j < ns2->nodeNr;j++) + if (ns1->nodeTab[i] == ns2->nodeTab[j]) + return(1); + + values1 = (xmlChar **) xmlMalloc(ns1->nodeNr * sizeof(xmlChar *)); + if (values1 == NULL) + return(0); + memset(values1, 0, ns1->nodeNr * sizeof(xmlChar *)); + values2 = (xmlChar **) xmlMalloc(ns2->nodeNr * sizeof(xmlChar *)); + if (values2 == NULL) { + xmlFree(values1); + return(0); + } + memset(values2, 0, ns2->nodeNr * sizeof(xmlChar *)); + for (i = 0;i < ns1->nodeNr;i++) { + values1[i] = xmlNodeGetContent(ns1->nodeTab[i]); + for (j = 0;j < ns2->nodeNr;j++) { + if (i == 0) + values2[j] = xmlNodeGetContent(ns2->nodeTab[j]); + ret = xmlStrEqual(values1[i], values2[j]); + if (ret) + break; + } + if (ret) + break; + } + for (i = 0;i < ns1->nodeNr;i++) + if (values1[i] != NULL) + xmlFree(values1[i]); + for (j = 0;j < ns2->nodeNr;j++) + if (values2[j] != NULL) + xmlFree(values2[j]); + xmlFree(values1); + xmlFree(values2); + return(ret); +} + +/** + * xmlXPathEqualValues: + * @ctxt: the XPath Parser context + * + * Implement the equal operation on XPath objects content: @arg1 == @arg2 + * + * Returns 0 or 1 depending on the results of the test. + */ +int +xmlXPathEqualValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg1, arg2; + int ret = 0; + + arg1 = valuePop(ctxt); + if (arg1 == NULL) + XP_ERROR0(XPATH_INVALID_OPERAND); + + arg2 = valuePop(ctxt); + if (arg2 == NULL) { + xmlXPathFreeObject(arg1); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + + if (arg1 == arg2) { +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: by pointer\n"); +#endif + return(1); + } + + switch (arg1->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_XSLT_TREE: + case XPATH_NODESET: + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_XSLT_TREE: + case XPATH_NODESET: + ret = xmlXPathEqualNodeSets(arg1, arg2); + break; + case XPATH_BOOLEAN: + if ((arg1->nodesetval == NULL) || + (arg1->nodesetval->nodeNr == 0)) ret = 0; + else + ret = 1; + ret = (ret == arg2->boolval); + break; + case XPATH_NUMBER: + ret = xmlXPathEqualNodeSetFloat(arg1, arg2->floatval); + break; + case XPATH_STRING: + ret = xmlXPathEqualNodeSetString(arg1, arg2->stringval); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + } + break; + case XPATH_BOOLEAN: + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + if ((arg2->nodesetval == NULL) || + (arg2->nodesetval->nodeNr == 0)) ret = 0; + else + ret = 1; + break; + case XPATH_BOOLEAN: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: %d boolean %d \n", + arg1->boolval, arg2->boolval); +#endif + ret = (arg1->boolval == arg2->boolval); + break; + case XPATH_NUMBER: + if (arg2->floatval) ret = 1; + else ret = 0; + ret = (arg1->boolval == ret); + break; + case XPATH_STRING: + if ((arg2->stringval == NULL) || + (arg2->stringval[0] == 0)) ret = 0; + else + ret = 1; + ret = (arg1->boolval == ret); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + } + break; + case XPATH_NUMBER: + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + ret = xmlXPathEqualNodeSetFloat(arg2, arg1->floatval); + break; + case XPATH_BOOLEAN: + if (arg1->floatval) ret = 1; + else ret = 0; + ret = (arg2->boolval == ret); + break; + case XPATH_STRING: + valuePush(ctxt, arg2); + xmlXPathNumberFunction(ctxt, 1); + arg2 = valuePop(ctxt); + /* no break on purpose */ + case XPATH_NUMBER: + ret = (arg1->floatval == arg2->floatval); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + } + break; + case XPATH_STRING: + switch (arg2->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "Equal: undefined\n"); +#endif + break; + case XPATH_NODESET: + case XPATH_XSLT_TREE: + ret = xmlXPathEqualNodeSetString(arg2, arg1->stringval); + break; + case XPATH_BOOLEAN: + if ((arg1->stringval == NULL) || + (arg1->stringval[0] == 0)) ret = 0; + else + ret = 1; + ret = (arg2->boolval == ret); + break; + case XPATH_STRING: + ret = xmlStrEqual(arg1->stringval, arg2->stringval); + break; + case XPATH_NUMBER: + valuePush(ctxt, arg1); + xmlXPathNumberFunction(ctxt, 1); + arg1 = valuePop(ctxt); + ret = (arg1->floatval == arg2->floatval); + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + } + break; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + break; + } + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(ret); +} + + +/** + * xmlXPathCompareValues: + * @ctxt: the XPath Parser context + * @inf: less than (1) or greater than (0) + * @strict: is the comparison strict + * + * Implement the compare operation on XPath objects: + * @arg1 < @arg2 (1, 1, ... + * @arg1 <= @arg2 (1, 0, ... + * @arg1 > @arg2 (0, 1, ... + * @arg1 >= @arg2 (0, 0, ... + * + * When neither object to be compared is a node-set and the operator is + * <=, <, >=, >, then the objects are compared by converted both objects + * to numbers and comparing the numbers according to IEEE 754. The < + * comparison will be true if and only if the first number is less than the + * second number. The <= comparison will be true if and only if the first + * number is less than or equal to the second number. The > comparison + * will be true if and only if the first number is greater than the second + * number. The >= comparison will be true if and only if the first number + * is greater than or equal to the second number. + * + * Returns 1 if the comparaison succeeded, 0 if it failed + */ +int +xmlXPathCompareValues(xmlXPathParserContextPtr ctxt, int inf, int strict) { + int ret = 0; + xmlXPathObjectPtr arg1, arg2; + + arg2 = valuePop(ctxt); + if (arg2 == NULL) { + XP_ERROR0(XPATH_INVALID_OPERAND); + } + + arg1 = valuePop(ctxt); + if (arg1 == NULL) { + xmlXPathFreeObject(arg2); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + + if ((arg2->type == XPATH_NODESET) || (arg1->type == XPATH_NODESET)) { + if ((arg2->type == XPATH_NODESET) && (arg1->type == XPATH_NODESET)) { + ret = xmlXPathCompareNodeSets(ctxt, inf, strict, arg1, arg2); + } else { + if (arg1->type == XPATH_NODESET) { + ret = xmlXPathCompareNodeSetValue(ctxt, inf, strict, arg1, arg2); + } else { + ret = xmlXPathCompareNodeSetValue(ctxt, !inf, !strict, arg2, arg2); + } + } + return(ret); + } + + if (arg1->type != XPATH_NUMBER) { + valuePush(ctxt, arg1); + xmlXPathNumberFunction(ctxt, 1); + arg1 = valuePop(ctxt); + } + if (arg1->type != XPATH_NUMBER) { + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + if (arg2->type != XPATH_NUMBER) { + valuePush(ctxt, arg2); + xmlXPathNumberFunction(ctxt, 1); + arg2 = valuePop(ctxt); + } + if (arg2->type != XPATH_NUMBER) { + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + XP_ERROR0(XPATH_INVALID_OPERAND); + } + /* + * Add tests for infinity and nan + * => feedback on 3.4 for Inf and NaN + */ + if (inf && strict) + ret = (arg1->floatval < arg2->floatval); + else if (inf && !strict) + ret = (arg1->floatval <= arg2->floatval); + else if (!inf && strict) + ret = (arg1->floatval > arg2->floatval); + else if (!inf && !strict) + ret = (arg1->floatval >= arg2->floatval); + xmlXPathFreeObject(arg1); + xmlXPathFreeObject(arg2); + return(ret); +} + +/** + * xmlXPathValueFlipSign: + * @ctxt: the XPath Parser context + * + * Implement the unary - operation on an XPath object + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathValueFlipSign(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + + POP_FLOAT + arg->floatval = -arg->floatval; + valuePush(ctxt, arg); +} + +/** + * xmlXPathAddValues: + * @ctxt: the XPath Parser context + * + * Implement the add operation on XPath objects: + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathAddValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double val; + + POP_FLOAT + val = arg->floatval; + xmlXPathFreeObject(arg); + + POP_FLOAT + arg->floatval += val; + valuePush(ctxt, arg); +} + +/** + * xmlXPathSubValues: + * @ctxt: the XPath Parser context + * + * Implement the substraction operation on XPath objects: + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathSubValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double val; + + POP_FLOAT + val = arg->floatval; + xmlXPathFreeObject(arg); + + POP_FLOAT + arg->floatval -= val; + valuePush(ctxt, arg); +} + +/** + * xmlXPathMultValues: + * @ctxt: the XPath Parser context + * + * Implement the multiply operation on XPath objects: + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathMultValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double val; + + POP_FLOAT + val = arg->floatval; + xmlXPathFreeObject(arg); + + POP_FLOAT + arg->floatval *= val; + valuePush(ctxt, arg); +} + +/** + * xmlXPathDivValues: + * @ctxt: the XPath Parser context + * + * Implement the div operation on XPath objects @arg1 / @arg2: + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathDivValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + double val; + + POP_FLOAT + val = arg->floatval; + xmlXPathFreeObject(arg); + + POP_FLOAT + arg->floatval /= val; + valuePush(ctxt, arg); +} + +/** + * xmlXPathModValues: + * @ctxt: the XPath Parser context + * + * Implement the mod operation on XPath objects: @arg1 / @arg2 + * The numeric operators convert their operands to numbers as if + * by calling the number function. + */ +void +xmlXPathModValues(xmlXPathParserContextPtr ctxt) { + xmlXPathObjectPtr arg; + int arg1, arg2; + + POP_FLOAT + arg2 = (int) arg->floatval; + xmlXPathFreeObject(arg); + + POP_FLOAT + arg1 = (int) arg->floatval; + arg->floatval = arg1 % arg2; + valuePush(ctxt, arg); +} + +/************************************************************************ + * * + * The traversal functions * + * * + ************************************************************************/ + +typedef enum { + AXIS_ANCESTOR = 1, + AXIS_ANCESTOR_OR_SELF, + AXIS_ATTRIBUTE, + AXIS_CHILD, + AXIS_DESCENDANT, + AXIS_DESCENDANT_OR_SELF, + AXIS_FOLLOWING, + AXIS_FOLLOWING_SIBLING, + AXIS_NAMESPACE, + AXIS_PARENT, + AXIS_PRECEDING, + AXIS_PRECEDING_SIBLING, + AXIS_SELF +} xmlXPathAxisVal; + +/* + * A traversal function enumerates nodes along an axis. + * Initially it must be called with NULL, and it indicates + * termination on the axis by returning NULL. + */ +typedef xmlNodePtr (*xmlXPathTraversalFunction) + (xmlXPathParserContextPtr ctxt, xmlNodePtr cur); + +/** + * xmlXPathNextSelf: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "self" direction + * The self axis contains just the context node itself + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextSelf(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) + return(ctxt->context->node); + return(NULL); +} + +/** + * xmlXPathNextChild: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "child" direction + * The child axis contains the children of the context node in document order. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) { + if (ctxt->context->node == NULL) return(NULL); + switch (ctxt->context->node->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + return(ctxt->context->node->children); + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif + return(((xmlDocPtr) ctxt->context->node)->children); + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_ATTRIBUTE_NODE: + case XML_NAMESPACE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return(NULL); + } + return(NULL); + } + if ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) + return(NULL); + return(cur->next); +} + +/** + * xmlXPathNextDescendant: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "descendant" direction + * the descendant axis contains the descendants of the context node in document + * order; a descendant is a child or a child of a child and so on. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextDescendant(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) { + if (ctxt->context->node == NULL) + return(NULL); + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); + + if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc) + return(ctxt->context->doc->children); + return(ctxt->context->node->children); + } + + if (cur->children != NULL) + { + if (cur->children->type != XML_ENTITY_DECL) + return(cur->children); + } + if (cur->next != NULL) return(cur->next); + + do { + cur = cur->parent; + if (cur == NULL) return(NULL); + if (cur == ctxt->context->node) return(NULL); + if (cur->next != NULL) { + cur = cur->next; + return(cur); + } + } while (cur != NULL); + return(cur); +} + +/** + * xmlXPathNextDescendantOrSelf: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "descendant-or-self" direction + * the descendant-or-self axis contains the context node and the descendants + * of the context node in document order; thus the context node is the first + * node on the axis, and the first child of the context node is the second node + * on the axis + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextDescendantOrSelf(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) { + if (ctxt->context->node == NULL) + return(NULL); + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); + return(ctxt->context->node); + } + + return(xmlXPathNextDescendant(ctxt, cur)); +} + +/** + * xmlXPathNextParent: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "parent" direction + * The parent axis contains the parent of the context node, if there is one. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextParent(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + /* + * the parent of an attribute or namespace node is the element + * to which the attribute or namespace node is attached + * Namespace handling !!! + */ + if (cur == NULL) { + if (ctxt->context->node == NULL) return(NULL); + switch (ctxt->context->node->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + case XML_ENTITY_DECL: + if (ctxt->context->node->parent == NULL) + return((xmlNodePtr) ctxt->context->doc); + return(ctxt->context->node->parent); + case XML_ATTRIBUTE_NODE: { + xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node; + + return(att->parent); + } + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif + return(NULL); + case XML_NAMESPACE_DECL: + /* + * TODO !!! may require extending struct _xmlNs with + * parent field + * C.f. Infoset case... + */ + return(NULL); + } + } + return(NULL); +} + +/** + * xmlXPathNextAncestor: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "ancestor" direction + * the ancestor axis contains the ancestors of the context node; the ancestors + * of the context node consist of the parent of context node and the parent's + * parent and so on; the nodes are ordered in reverse document order; thus the + * parent is the first node on the axis, and the parent's parent is the second + * node on the axis + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + /* + * the parent of an attribute or namespace node is the element + * to which the attribute or namespace node is attached + * !!!!!!!!!!!!! + */ + if (cur == NULL) { + if (ctxt->context->node == NULL) return(NULL); + switch (ctxt->context->node->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_NOTATION_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + if (ctxt->context->node->parent == NULL) + return((xmlNodePtr) ctxt->context->doc); + return(ctxt->context->node->parent); + case XML_ATTRIBUTE_NODE: { + xmlAttrPtr cur = (xmlAttrPtr) ctxt->context->node; + + return(cur->parent); + } + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif + return(NULL); + case XML_NAMESPACE_DECL: + /* + * TODO !!! may require extending struct _xmlNs with + * parent field + * C.f. Infoset case... + */ + return(NULL); + } + return(NULL); + } + if (cur == ctxt->context->doc->children) + return((xmlNodePtr) ctxt->context->doc); + if (cur == (xmlNodePtr) ctxt->context->doc) + return(NULL); + switch (cur->type) { + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_NOTATION_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + return(cur->parent); + case XML_ATTRIBUTE_NODE: { + xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node; + + return(att->parent); + } + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_HTML_DOCUMENT_NODE: +#ifdef LIBXML_SGML_ENABLED + case XML_SGML_DOCUMENT_NODE: +#endif + return(NULL); + case XML_NAMESPACE_DECL: + /* + * TODO !!! may require extending struct _xmlNs with + * parent field + * C.f. Infoset case... + */ + return(NULL); + } + return(NULL); +} + +/** + * xmlXPathNextAncestorOrSelf: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "ancestor-or-self" direction + * he ancestor-or-self axis contains the context node and ancestors of + * the context node in reverse document order; thus the context node is + * the first node on the axis, and the context node's parent the second; + * parent here is defined the same as with the parent axis. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextAncestorOrSelf(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) + return(ctxt->context->node); + return(xmlXPathNextAncestor(ctxt, cur)); +} + +/** + * xmlXPathNextFollowingSibling: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "following-sibling" direction + * The following-sibling axis contains the following siblings of the context + * node in document order. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextFollowingSibling(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); + if (cur == (xmlNodePtr) ctxt->context->doc) + return(NULL); + if (cur == NULL) + return(ctxt->context->node->next); + return(cur->next); +} + +/** + * xmlXPathNextPrecedingSibling: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "preceding-sibling" direction + * The preceding-sibling axis contains the preceding siblings of the context + * node in reverse document order; the first preceding sibling is first on the + * axis; the sibling preceding that node is the second on the axis and so on. + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextPrecedingSibling(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if ((ctxt->context->node->type == XML_ATTRIBUTE_NODE) || + (ctxt->context->node->type == XML_NAMESPACE_DECL)) + return(NULL); + if (cur == (xmlNodePtr) ctxt->context->doc) + return(NULL); + if (cur == NULL) + return(ctxt->context->node->prev); + return(cur->prev); +} + +/** + * xmlXPathNextFollowing: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "following" direction + * The following axis contains all nodes in the same document as the context + * node that are after the context node in document order, excluding any + * descendants and excluding attribute nodes and namespace nodes; the nodes + * are ordered in document order + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextFollowing(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur != NULL && cur->children != NULL) + return cur->children ; + if (cur == NULL) cur = ctxt->context->node; + if (cur == NULL) return(NULL) ; /* ERROR */ + if (cur->next != NULL) return(cur->next) ; + do { + cur = cur->parent; + if (cur == NULL) return(NULL); + if (cur == (xmlNodePtr) ctxt->context->doc) return(NULL); + if (cur->next != NULL) return(cur->next); + } while (cur != NULL); + return(cur); +} + +/* + * xmlXPathIsAncestor: + * @ancestor: the ancestor node + * @node: the current node + * + * Check that @ancestor is a @node's ancestor + * + * returns 1 if @ancestor is a @node's ancestor, 0 otherwise. + */ +static int +xmlXPathIsAncestor(xmlNodePtr ancestor, xmlNodePtr node) { + if ((ancestor == NULL) || (node == NULL)) return(0); + /* nodes need to be in the same document */ + if (ancestor->doc != node->doc) return(0); + /* avoid searching if ancestor or node is the root node */ + if (ancestor == (xmlNodePtr) node->doc) return(1); + if (node == (xmlNodePtr) ancestor->doc) return(0); + while (node->parent != NULL) { + if (node->parent == ancestor) + return(1); + node = node->parent; + } + return(0); +} + +/** + * xmlXPathNextPreceding: + * @ctxt: the XPath Parser context + * @cur: the current node in the traversal + * + * Traversal function for the "preceding" direction + * the preceding axis contains all nodes in the same document as the context + * node that are before the context node in document order, excluding any + * ancestors and excluding attribute nodes and namespace nodes; the nodes are + * ordered in reverse document order + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (cur == NULL) + cur = ctxt->context->node ; + do { + if (cur->prev != NULL) { + for (cur = cur->prev ; cur->last != NULL ; cur = cur->last) + ; + return(cur) ; + } + + cur = cur->parent; + if (cur == NULL) return(NULL); + if (cur == ctxt->context->doc->children) return(NULL); + } while (xmlXPathIsAncestor(cur, ctxt->context->node)); + return(cur); +} + +/** + * xmlXPathNextNamespace: + * @ctxt: the XPath Parser context + * @cur: the current attribute in the traversal + * + * Traversal function for the "namespace" direction + * the namespace axis contains the namespace nodes of the context node; + * the order of nodes on this axis is implementation-defined; the axis will + * be empty unless the context node is an element + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextNamespace(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (ctxt->context->node->type != XML_ELEMENT_NODE) return(NULL); + if ((cur == NULL) || (ctxt->context->namespaces == NULL)) { + if (ctxt->context->namespaces != NULL) + xmlFree(ctxt->context->namespaces); + ctxt->context->namespaces = + xmlGetNsList(ctxt->context->doc, ctxt->context->node); + if (ctxt->context->namespaces == NULL) return(NULL); + ctxt->context->nsNr = 0; + } + return((xmlNodePtr)ctxt->context->namespaces[ctxt->context->nsNr++]); +} + +/** + * xmlXPathNextAttribute: + * @ctxt: the XPath Parser context + * @cur: the current attribute in the traversal + * + * Traversal function for the "attribute" direction + * TODO: support DTD inherited default attributes + * + * Returns the next element following that axis + */ +xmlNodePtr +xmlXPathNextAttribute(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { + if (ctxt->context->node->type != XML_ELEMENT_NODE) return(NULL); + if (cur == NULL) { + if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc) + return(NULL); + return((xmlNodePtr)ctxt->context->node->properties); + } + return((xmlNodePtr)cur->next); +} + +/************************************************************************ + * * + * NodeTest Functions * + * * + ************************************************************************/ + +typedef enum { + NODE_TEST_NONE = 0, + NODE_TEST_TYPE = 1, + NODE_TEST_PI = 2, + NODE_TEST_ALL = 3, + NODE_TEST_NS = 4, + NODE_TEST_NAME = 5 +} xmlXPathTestVal; + +typedef enum { + NODE_TYPE_NODE = 0, + NODE_TYPE_COMMENT = XML_COMMENT_NODE, + NODE_TYPE_TEXT = XML_TEXT_NODE, + NODE_TYPE_PI = XML_PI_NODE +} xmlXPathTypeVal; + +#define IS_FUNCTION 200 + +/** + * xmlXPathNodeCollectAndTest: + * @ctxt: the XPath Parser context + * @axis: the XPath axis + * @test: the XPath test + * @type: the XPath type + * @prefix: the namesapce prefix if any + * @name: the name used in the search if any + * + * This is the function implementing a step: based on the current list + * of nodes, it builds up a new list, looking at all nodes under that + * axis and selecting them. + * + * Returns the new NodeSet resulting from the search. + */ +void +xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt, xmlXPathAxisVal axis, + xmlXPathTestVal test, xmlXPathTypeVal type, + const xmlChar *prefix, const xmlChar *name) { +#ifdef DEBUG_STEP + int n = 0, t = 0; +#endif + int i; + xmlNodeSetPtr ret; + xmlXPathTraversalFunction next = NULL; + void (*addNode)(xmlNodeSetPtr, xmlNodePtr); + xmlNodePtr cur = NULL; + xmlXPathObjectPtr obj; + xmlNodeSetPtr nodelist; + + CHECK_TYPE(XPATH_NODESET); + obj = valuePop(ctxt); + addNode = xmlXPathNodeSetAdd; + +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "new step : "); +#endif + switch (axis) { + case AXIS_ANCESTOR: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'ancestors' "); +#endif + next = xmlXPathNextAncestor; break; + case AXIS_ANCESTOR_OR_SELF: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'ancestors-or-self' "); +#endif + next = xmlXPathNextAncestorOrSelf; break; + case AXIS_ATTRIBUTE: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'attributes' "); +#endif + next = xmlXPathNextAttribute; break; + break; + case AXIS_CHILD: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'child' "); +#endif + next = xmlXPathNextChild; break; + case AXIS_DESCENDANT: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'descendant' "); +#endif + next = xmlXPathNextDescendant; break; + case AXIS_DESCENDANT_OR_SELF: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'descendant-or-self' "); +#endif + next = xmlXPathNextDescendantOrSelf; break; + case AXIS_FOLLOWING: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'following' "); +#endif + next = xmlXPathNextFollowing; break; + case AXIS_FOLLOWING_SIBLING: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'following-siblings' "); +#endif + next = xmlXPathNextFollowingSibling; break; + case AXIS_NAMESPACE: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'namespace' "); +#endif + next = (xmlXPathTraversalFunction) xmlXPathNextNamespace; break; + break; + case AXIS_PARENT: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'parent' "); +#endif + next = xmlXPathNextParent; break; + case AXIS_PRECEDING: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'preceding' "); +#endif + next = xmlXPathNextPreceding; break; + case AXIS_PRECEDING_SIBLING: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'preceding-sibling' "); +#endif + next = xmlXPathNextPrecedingSibling; break; + case AXIS_SELF: +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "axis 'self' "); +#endif + next = xmlXPathNextSelf; break; + } + if (next == NULL) + return; + + nodelist = obj->nodesetval; + if ((nodelist != NULL) && + (nodelist->nodeNr <= 1)) + addNode = xmlXPathNodeSetAddUnique; + else + addNode = xmlXPathNodeSetAdd; + ret = xmlXPathNodeSetCreate(NULL); +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + " context contains %d nodes\n", + nodelist->nodeNr); + switch (test) { + case NODE_TEST_NODE: + xmlGenericError(xmlGenericErrorContext, + " searching all nodes\n"); + break; + case NODE_TEST_NONE: + xmlGenericError(xmlGenericErrorContext, + " searching for none !!!\n"); + break; + case NODE_TEST_TYPE: + xmlGenericError(xmlGenericErrorContext, + " searching for type %d\n", type); + break; + case NODE_TEST_PI: + xmlGenericError(xmlGenericErrorContext, + " searching for PI !!!\n"); + break; + case NODE_TEST_ALL: + xmlGenericError(xmlGenericErrorContext, + " searching for *\n"); + break; + case NODE_TEST_NS: + xmlGenericError(xmlGenericErrorContext, + " searching for namespace %s\n", + prefix); + break; + case NODE_TEST_NAME: + xmlGenericError(xmlGenericErrorContext, + " searching for name %s\n", name); + if (prefix != NULL) + xmlGenericError(xmlGenericErrorContext, + " with namespace %s\n", + prefix); + break; + } + xmlGenericError(xmlGenericErrorContext, "Testing : "); +#endif + /* + * 2.3 Node Tests + * - For the attribute axis, the principal node type is attribute. + * - For the namespace axis, the principal node type is namespace. + * - For other axes, the principal node type is element. + * + * A node test * is true for any node of the + * principal node type. For example, child::* willi + * select all element children of the context node + */ + for (i = 0;i < nodelist->nodeNr; i++) { + ctxt->context->node = nodelist->nodeTab[i]; + + cur = NULL; + do { + cur = next(ctxt, cur); + if (cur == NULL) break; +#ifdef DEBUG_STEP + t++; + xmlGenericError(xmlGenericErrorContext, " %s", cur->name); +#endif + switch (test) { + case NODE_TEST_NONE: + STRANGE + return; + case NODE_TEST_TYPE: + if ((cur->type == type) || + ((type == NODE_TYPE_NODE) && + ((cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE) || + (cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_PI_NODE) || + (cur->type == XML_COMMENT_NODE) || + (cur->type == XML_CDATA_SECTION_NODE) || + (cur->type == XML_TEXT_NODE)))) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, cur); + } + break; + case NODE_TEST_PI: + if (cur->type == XML_PI_NODE) { + if ((name != NULL) && + (!xmlStrEqual(name, cur->name))) + break; +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, cur); + } + break; + case NODE_TEST_ALL: + if (axis == AXIS_ATTRIBUTE) { + if (cur->type == XML_ATTRIBUTE_NODE) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, cur); + } + } else if (axis == AXIS_NAMESPACE) { + if (cur->type == XML_NAMESPACE_DECL) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, cur); + } + } else { + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + if (prefix == NULL) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, cur); + } else if ((cur->ns != NULL) && + (xmlStrEqual(prefix, + cur->ns->href))) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, cur); + } + } + } + break; + case NODE_TEST_NS: { + TODO; + break; + } + case NODE_TEST_NAME: + switch (cur->type) { + case XML_ELEMENT_NODE: + if (xmlStrEqual(name, cur->name)) { + if (prefix == NULL) { + if ((cur->ns == NULL) || + (cur->ns->prefix == NULL)) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, cur); + } + } else { + if ((cur->ns != NULL) && + (xmlStrEqual(prefix, + cur->ns->href))) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, cur); + } + } + } + break; + case XML_ATTRIBUTE_NODE: { + xmlAttrPtr attr = (xmlAttrPtr) cur; + if (xmlStrEqual(name, attr->name)) { + if (prefix == NULL) { + if ((attr->ns == NULL) || + (attr->ns->prefix == NULL)) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, (xmlNodePtr) attr); + } + } else { + if ((attr->ns != NULL) && + (xmlStrEqual(prefix, + attr->ns->href))) { +#ifdef DEBUG_STEP + n++; +#endif + addNode(ret, (xmlNodePtr) attr); + } + } + } + break; + } + case XML_NAMESPACE_DECL: { + TODO; + break; + } + default: + break; + } + break; + } + } while (cur != NULL); + } +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "\nExamined %d nodes, found %d nodes at that step\n", t, n); +#endif + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPathWrapNodeSet(ret)); +} + + +/************************************************************************ + * * + * Implicit tree core function library * + * * + ************************************************************************/ + +/** + * xmlXPathRoot: + * @ctxt: the XPath Parser context + * + * Initialize the context to the root of the document + */ +void +xmlXPathRoot(xmlXPathParserContextPtr ctxt) { + ctxt->context->node = (xmlNodePtr) ctxt->context->doc; + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); +} + +/************************************************************************ + * * + * The explicit core function library * + *http://www.w3.org/Style/XSL/Group/1999/07/xpath-19990705.html#corelib * + * * + ************************************************************************/ + + +/** + * xmlXPathLastFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the last() XPath function + * number last() + * The last function returns the number of nodes in the context node list. + */ +void +xmlXPathLastFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + if (ctxt->context->contextSize >= 0) { + valuePush(ctxt, xmlXPathNewFloat((double) ctxt->context->contextSize)); +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, + "last() : %d\n", ctxt->context->contextSize); +#endif + } else { + XP_ERROR(XPATH_INVALID_CTXT_SIZE); + } +} + +/** + * xmlXPathPositionFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the position() XPath function + * number position() + * The position function returns the position of the context node in the + * context node list. The first position is 1, and so the last positionr + * will be equal to last(). + */ +void +xmlXPathPositionFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + if (ctxt->context->proximityPosition >= 0) { + valuePush(ctxt, + xmlXPathNewFloat((double) ctxt->context->proximityPosition)); +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, "position() : %d\n", + ctxt->context->proximityPosition); +#endif + } else { + XP_ERROR(XPATH_INVALID_CTXT_POSITION); + } +} + +/** + * xmlXPathCountFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the count() XPath function + * number count(node-set) + */ +void +xmlXPathCountFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + valuePush(ctxt, xmlXPathNewFloat((double) cur->nodesetval->nodeNr)); + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathIdFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the id() XPath function + * node-set id(object) + * The id function selects elements by their unique ID + * (see [5.2.1 Unique IDs]). When the argument to id is of type node-set, + * then the result is the union of the result of applying id to the + * string value of each of the nodes in the argument node-set. When the + * argument to id is of any other type, the argument is converted to a + * string as if by a call to the string function; the string is split + * into a whitespace-separated list of tokens (whitespace is any sequence + * of characters matching the production S); the result is a node-set + * containing the elements in the same document as the context node that + * have a unique ID equal to any of the tokens in the list. + */ +void +xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs) { + const xmlChar *tokens; + const xmlChar *cur; + xmlChar *ID; + xmlAttrPtr attr; + xmlNodePtr elem = NULL; + xmlXPathObjectPtr ret, obj; + + CHECK_ARITY(1); + obj = valuePop(ctxt); + if (obj == NULL) XP_ERROR(XPATH_INVALID_OPERAND); + if (obj->type == XPATH_NODESET) { + xmlXPathObjectPtr newobj; + int i; + + ret = xmlXPathNewNodeSet(NULL); + + for (i = 0; i < obj->nodesetval->nodeNr; i++) { + valuePush(ctxt, + xmlXPathNewNodeSet(obj->nodesetval->nodeTab[i])); + xmlXPathStringFunction(ctxt, 1); + xmlXPathIdFunction(ctxt, 1); + newobj = valuePop(ctxt); + ret->nodesetval = xmlXPathNodeSetMerge(ret->nodesetval, + newobj->nodesetval); + xmlXPathFreeObject(newobj); + } + + xmlXPathFreeObject(obj); + valuePush(ctxt, ret); + return; + } + if (obj->type != XPATH_STRING) { + valuePush(ctxt, obj); + xmlXPathStringFunction(ctxt, 1); + obj = valuePop(ctxt); + if (obj->type != XPATH_STRING) { + xmlXPathFreeObject(obj); + return; + } + } + tokens = obj->stringval; + + ret = xmlXPathNewNodeSet(NULL); + valuePush(ctxt, ret); + if (tokens == NULL) { + xmlXPathFreeObject(obj); + return; + } + + cur = tokens; + + while (IS_BLANK(*cur)) cur++; + while (*cur != 0) { + while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) || + (*cur == '.') || (*cur == '-') || + (*cur == '_') || (*cur == ':') || + (IS_COMBINING(*cur)) || + (IS_EXTENDER(*cur))) + cur++; + + if ((!IS_BLANK(*cur)) && (*cur != 0)) break; + + ID = xmlStrndup(tokens, cur - tokens); + attr = xmlGetID(ctxt->context->doc, ID); + if (attr != NULL) { + elem = attr->parent; + xmlXPathNodeSetAdd(ret->nodesetval, elem); + } + if (ID != NULL) + xmlFree(ID); + + while (IS_BLANK(*cur)) cur++; + tokens = cur; + } + xmlXPathFreeObject(obj); + return; +} + +/** + * xmlXPathLocalNameFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the local-name() XPath function + * string local-name(node-set?) + * The local-name function returns a string containing the local part + * of the name of the node in the argument node-set that is first in + * document order. If the node-set is empty or the first node has no + * name, an empty string is returned. If the argument is omitted it + * defaults to the context node. + */ +void +xmlXPathLocalNameFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + nargs = 1; + } + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if (cur->nodesetval->nodeNr == 0) { + valuePush(ctxt, xmlXPathNewCString("")); + } else { + int i = 0; /* Should be first in document order !!!!! */ + switch (cur->nodesetval->nodeTab[i]->type) { + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + case XML_PI_NODE: + valuePush(ctxt, + xmlXPathNewString(cur->nodesetval->nodeTab[i]->name)); + break; + case XML_NAMESPACE_DECL: + valuePush(ctxt, xmlXPathNewString( + ((xmlNsPtr)cur->nodesetval->nodeTab[i])->prefix)); + break; + default: + valuePush(ctxt, xmlXPathNewCString("")); + } + } + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathNamespaceURIFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the namespace-uri() XPath function + * string namespace-uri(node-set?) + * The namespace-uri function returns a string containing the + * namespace URI of the expanded name of the node in the argument + * node-set that is first in document order. If the node-set is empty, + * the first node has no name, or the expanded name has no namespace + * URI, an empty string is returned. If the argument is omitted it + * defaults to the context node. + */ +void +xmlXPathNamespaceURIFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + nargs = 1; + } + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if (cur->nodesetval->nodeNr == 0) { + valuePush(ctxt, xmlXPathNewCString("")); + } else { + int i = 0; /* Should be first in document order !!!!! */ + switch (cur->nodesetval->nodeTab[i]->type) { + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + if (cur->nodesetval->nodeTab[i]->ns == NULL) + valuePush(ctxt, xmlXPathNewCString("")); + else + valuePush(ctxt, xmlXPathNewString( + cur->nodesetval->nodeTab[i]->ns->href)); + break; + default: + valuePush(ctxt, xmlXPathNewCString("")); + } + } + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathNameFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the name() XPath function + * string name(node-set?) + * The name function returns a string containing a QName representing + * the name of the node in the argument node-set that is first in documenti + * order. The QName must represent the name with respect to the namespace + * declarations in effect on the node whose name is being represented. + * Typically, this will be the form in which the name occurred in the XML + * source. This need not be the case if there are namespace declarations + * in effect on the node that associate multiple prefixes with the same + * namespace. However, an implementation may include information about + * the original prefix in its representation of nodes; in this case, an + * implementation can ensure that the returned string is always the same + * as the QName used in the XML source. If the argument it omitted it + * defaults to the context node. + * Libxml keep the original prefix so the "real qualified name" used is + * returned. + */ +void +xmlXPathNameFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + nargs = 1; + } + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if (cur->nodesetval->nodeNr == 0) { + valuePush(ctxt, xmlXPathNewCString("")); + } else { + int i = 0; /* Should be first in document order !!!!! */ + + switch (cur->nodesetval->nodeTab[i]->type) { + case XML_ELEMENT_NODE: + case XML_ATTRIBUTE_NODE: + if (cur->nodesetval->nodeTab[i]->ns == NULL) + valuePush(ctxt, xmlXPathNewString( + cur->nodesetval->nodeTab[i]->name)); + + else { + char name[2000]; +#ifdef HAVE_SNPRINTF + snprintf(name, sizeof(name), "%s:%s", + (char *) cur->nodesetval->nodeTab[i]->ns->prefix, + (char *) cur->nodesetval->nodeTab[i]->name); +#else + sprintf(name, "%s:%s", + (char *) cur->nodesetval->nodeTab[i]->ns->prefix, + (char *) cur->nodesetval->nodeTab[i]->name); +#endif + name[sizeof(name) - 1] = 0; + valuePush(ctxt, xmlXPathNewCString(name)); + } + break; + default: + valuePush(ctxt, + xmlXPathNewNodeSet(cur->nodesetval->nodeTab[i])); + xmlXPathLocalNameFunction(ctxt, 1); + } + } + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathStringFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the string() XPath function + * string string(object?) + * he string function converts an object to a string as follows: + * - A node-set is converted to a string by returning the value of + * the node in the node-set that is first in document order. + * If the node-set is empty, an empty string is returned. + * - A number is converted to a string as follows + * + NaN is converted to the string NaN + * + positive zero is converted to the string 0 + * + negative zero is converted to the string 0 + * + positive infinity is converted to the string Infinity + * + negative infinity is converted to the string -Infinity + * + if the number is an integer, the number is represented in + * decimal form as a Number with no decimal point and no leading + * zeros, preceded by a minus sign (-) if the number is negative + * + otherwise, the number is represented in decimal form as a + * Number including a decimal point with at least one digit + * before the decimal point and at least one digit after the + * decimal point, preceded by a minus sign (-) if the number + * is negative; there must be no leading zeros before the decimal + * point apart possibly from the one required digit immediatelyi + * before the decimal point; beyond the one required digit + * after the decimal point there must be as many, but only as + * many, more digits as are needed to uniquely distinguish the + * number from all other IEEE 754 numeric values. + * - The boolean false value is converted to the string false. + * The boolean true value is converted to the string true. + * + * If the argument is omitted, it defaults to a node-set with the + * context node as its only member. + */ +void +xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + nargs = 1; + } + + CHECK_ARITY(1); + cur = valuePop(ctxt); + if (cur == NULL) XP_ERROR(XPATH_INVALID_OPERAND); + switch (cur->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, "String: undefined\n"); +#endif + valuePush(ctxt, xmlXPathNewCString("")); + break; + case XPATH_XSLT_TREE: + case XPATH_NODESET: + if (cur->nodesetval == NULL) + valuePush(ctxt, xmlXPathNewCString("")); + else if (cur->nodesetval->nodeNr == 0) { + valuePush(ctxt, xmlXPathNewCString("")); + } else { + xmlChar *res; + int i = 0; /* Should be first in document order !!!!! */ + res = xmlNodeGetContent(cur->nodesetval->nodeTab[i]); + valuePush(ctxt, xmlXPathNewString(res)); + if (res != NULL) + xmlFree(res); + } + xmlXPathFreeObject(cur); + return; + case XPATH_STRING: + valuePush(ctxt, cur); + return; + case XPATH_BOOLEAN: + if (cur->boolval) valuePush(ctxt, xmlXPathNewCString("true")); + else valuePush(ctxt, xmlXPathNewCString("false")); + xmlXPathFreeObject(cur); + return; + case XPATH_NUMBER: { + char buf[100]; + + if (isnan(cur->floatval)) + sprintf(buf, "NaN"); + else if (isinf(cur->floatval) > 0) + sprintf(buf, "+Infinity"); + else if (isinf(cur->floatval) < 0) + sprintf(buf, "-Infinity"); + else + sprintf(buf, "%0g", cur->floatval); + valuePush(ctxt, xmlXPathNewCString(buf)); + xmlXPathFreeObject(cur); + return; + } + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + valuePush(ctxt, xmlXPathNewCString("")); + break; + } + STRANGE +} + +/** + * xmlXPathStringLengthFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the string-length() XPath function + * number string-length(string?) + * The string-length returns the number of characters in the string + * (see [3.6 Strings]). If the argument is omitted, it defaults to + * the context node converted to a string, in other words the value + * of the context node. + */ +void +xmlXPathStringLengthFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + + if (nargs == 0) { + if (ctxt->context->node == NULL) { + valuePush(ctxt, xmlXPathNewFloat(0)); + } else { + xmlChar *content; + + content = xmlNodeGetContent(ctxt->context->node); + valuePush(ctxt, xmlXPathNewFloat(xmlStrlen(content))); + xmlFree(content); + } + return; + } + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + cur = valuePop(ctxt); + valuePush(ctxt, xmlXPathNewFloat(xmlStrlen(cur->stringval))); + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathConcatFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the concat() XPath function + * string concat(string, string, string*) + * The concat function returns the concatenation of its arguments. + */ +void +xmlXPathConcatFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur, newobj; + xmlChar *tmp; + + if (nargs < 2) { + CHECK_ARITY(2); + } + + CAST_TO_STRING; + cur = valuePop(ctxt); + if ((cur == NULL) || (cur->type != XPATH_STRING)) { + xmlXPathFreeObject(cur); + return; + } + nargs--; + + while (nargs > 0) { + CAST_TO_STRING; + newobj = valuePop(ctxt); + if ((newobj == NULL) || (newobj->type != XPATH_STRING)) { + xmlXPathFreeObject(newobj); + xmlXPathFreeObject(cur); + XP_ERROR(XPATH_INVALID_TYPE); + } + tmp = xmlStrcat(newobj->stringval, cur->stringval); + newobj->stringval = cur->stringval; + cur->stringval = tmp; + + xmlXPathFreeObject(newobj); + nargs--; + } + valuePush(ctxt, cur); +} + +/** + * xmlXPathContainsFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the contains() XPath function + * boolean contains(string, string) + * The contains function returns true if the first argument string + * contains the second argument string, and otherwise returns false. + */ +void +xmlXPathContainsFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr hay, needle; + + CHECK_ARITY(2); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + needle = valuePop(ctxt); + CAST_TO_STRING; + hay = valuePop(ctxt); + if ((hay == NULL) || (hay->type != XPATH_STRING)) { + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); + XP_ERROR(XPATH_INVALID_TYPE); + } + if (xmlStrstr(hay->stringval, needle->stringval)) + valuePush(ctxt, xmlXPathNewBoolean(1)); + else + valuePush(ctxt, xmlXPathNewBoolean(0)); + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); +} + +/** + * xmlXPathStartsWithFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the starts-with() XPath function + * boolean starts-with(string, string) + * The starts-with function returns true if the first argument string + * starts with the second argument string, and otherwise returns false. + */ +void +xmlXPathStartsWithFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr hay, needle; + int n; + + CHECK_ARITY(2); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + needle = valuePop(ctxt); + CAST_TO_STRING; + hay = valuePop(ctxt); + if ((hay == NULL) || (hay->type != XPATH_STRING)) { + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); + XP_ERROR(XPATH_INVALID_TYPE); + } + n = xmlStrlen(needle->stringval); + if (xmlStrncmp(hay->stringval, needle->stringval, n)) + valuePush(ctxt, xmlXPathNewBoolean(0)); + else + valuePush(ctxt, xmlXPathNewBoolean(1)); + xmlXPathFreeObject(hay); + xmlXPathFreeObject(needle); +} + +/** + * xmlXPathSubstringFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the substring() XPath function + * string substring(string, number, number?) + * The substring function returns the substring of the first argument + * starting at the position specified in the second argument with + * length specified in the third argument. For example, + * substring("12345",2,3) returns "234". If the third argument is not + * specified, it returns the substring starting at the position specified + * in the second argument and continuing to the end of the string. For + * example, substring("12345",2) returns "2345". More precisely, each + * character in the string (see [3.6 Strings]) is considered to have a + * numeric position: the position of the first character is 1, the position + * of the second character is 2 and so on. The returned substring contains + * those characters for which the position of the character is greater than + * or equal to the second argument and, if the third argument is specified, + * less than the sum of the second and third arguments; the comparisons + * and addition used for the above follow the standard IEEE 754 rules. Thus: + * - substring("12345", 1.5, 2.6) returns "234" + * - substring("12345", 0, 3) returns "12" + * - substring("12345", 0 div 0, 3) returns "" + * - substring("12345", 1, 0 div 0) returns "" + * - substring("12345", -42, 1 div 0) returns "12345" + * - substring("12345", -1 div 0, 1 div 0) returns "" + */ +void +xmlXPathSubstringFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str, start, len; + double le, in; + int i, l; + xmlChar *ret; + + /* + * Conformance needs to be checked !!!!! + */ + if (nargs < 2) { + CHECK_ARITY(2); + } + if (nargs > 3) { + CHECK_ARITY(3); + } + if (nargs == 3) { + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + len = valuePop(ctxt); + le = len->floatval; + xmlXPathFreeObject(len); + } else { + le = 2000000000; + } + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + start = valuePop(ctxt); + in = start->floatval; + xmlXPathFreeObject(start); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + str = valuePop(ctxt); + le += in; + + /* integer index of the first char */ + i = (int) in; + if (((double)i) != in) i++; + + /* integer index of the last char */ + l = (int) le; + if (((double)l) != le) l++; + + /* back to a zero based len */ + i--; + l--; + + /* check against the string len */ + if (l > 1024) { + l = xmlStrlen(str->stringval); + } + if (i < 0) { + i = 0; + } + + /* number of chars to copy */ + l -= i; + + ret = xmlStrsub(str->stringval, i, l); + if (ret == NULL) + valuePush(ctxt, xmlXPathNewCString("")); + else { + valuePush(ctxt, xmlXPathNewString(ret)); + xmlFree(ret); + } + xmlXPathFreeObject(str); +} + +/** + * xmlXPathSubstringBeforeFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the substring-before() XPath function + * string substring-before(string, string) + * The substring-before function returns the substring of the first + * argument string that precedes the first occurrence of the second + * argument string in the first argument string, or the empty string + * if the first argument string does not contain the second argument + * string. For example, substring-before("1999/04/01","/") returns 1999. + */ +void +xmlXPathSubstringBeforeFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str; + xmlXPathObjectPtr find; + xmlBufferPtr target; + const xmlChar *point; + int offset; + + CHECK_ARITY(2); + CAST_TO_STRING; + find = valuePop(ctxt); + CAST_TO_STRING; + str = valuePop(ctxt); + + target = xmlBufferCreate(); + if (target) { + point = xmlStrstr(str->stringval, find->stringval); + if (point) { + offset = (int)(point - str->stringval); + xmlBufferAdd(target, str->stringval, offset); + } + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + } + + xmlXPathFreeObject(str); + xmlXPathFreeObject(find); +} + +/** + * xmlXPathSubstringAfterFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the substring-after() XPath function + * string substring-after(string, string) + * The substring-after function returns the substring of the first + * argument string that follows the first occurrence of the second + * argument string in the first argument string, or the empty stringi + * if the first argument string does not contain the second argument + * string. For example, substring-after("1999/04/01","/") returns 04/01, + * and substring-after("1999/04/01","19") returns 99/04/01. + */ +void +xmlXPathSubstringAfterFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str; + xmlXPathObjectPtr find; + xmlBufferPtr target; + const xmlChar *point; + int offset; + + CHECK_ARITY(2); + CAST_TO_STRING; + find = valuePop(ctxt); + CAST_TO_STRING; + str = valuePop(ctxt); + + target = xmlBufferCreate(); + if (target) { + point = xmlStrstr(str->stringval, find->stringval); + if (point) { + offset = (int)(point - str->stringval) + xmlStrlen(find->stringval); + xmlBufferAdd(target, &str->stringval[offset], + xmlStrlen(str->stringval) - offset); + } + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + } + + xmlXPathFreeObject(str); + xmlXPathFreeObject(find); +} + +/** + * xmlXPathNormalizeFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the normalize-space() XPath function + * string normalize-space(string?) + * The normalize-space function returns the argument string with white + * space normalized by stripping leading and trailing whitespace + * and replacing sequences of whitespace characters by a single + * space. Whitespace characters are the same allowed by the S production + * in XML. If the argument is omitted, it defaults to the context + * node converted to a string, in other words the value of the context node. + */ +void +xmlXPathNormalizeFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr obj = NULL; + xmlChar *source = NULL; + xmlBufferPtr target; + xmlChar blank; + + if (nargs == 0) { + /* Use current context node */ + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + xmlXPathStringFunction(ctxt, 1); + nargs = 1; + } + + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + obj = valuePop(ctxt); + source = obj->stringval; + + target = xmlBufferCreate(); + if (target && source) { + + /* Skip leading whitespaces */ + while (IS_BLANK(*source)) + source++; + + /* Collapse intermediate whitespaces, and skip trailing whitespaces */ + blank = 0; + while (*source) { + if (IS_BLANK(*source)) { + blank = *source; + } else { + if (blank) { + xmlBufferAdd(target, &blank, 1); + blank = 0; + } + xmlBufferAdd(target, source, 1); + } + source++; + } + + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + } + xmlXPathFreeObject(obj); +} + +/** + * xmlXPathTranslateFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the translate() XPath function + * string translate(string, string, string) + * The translate function returns the first argument string with + * occurrences of characters in the second argument string replaced + * by the character at the corresponding position in the third argument + * string. For example, translate("bar","abc","ABC") returns the string + * BAr. If there is a character in the second argument string with no + * character at a corresponding position in the third argument string + * (because the second argument string is longer than the third argument + * string), then occurrences of that character in the first argument + * string are removed. For example, translate("--aaa--","abc-","ABC") + * returns "AAA". If a character occurs more than once in second + * argument string, then the first occurrence determines the replacement + * character. If the third argument string is longer than the second + * argument string, then excess characters are ignored. + */ +void +xmlXPathTranslateFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr str; + xmlXPathObjectPtr from; + xmlXPathObjectPtr to; + xmlBufferPtr target; + int i, offset, max; + xmlChar ch; + const xmlChar *point; + + CHECK_ARITY(3); + + CAST_TO_STRING; + to = valuePop(ctxt); + CAST_TO_STRING; + from = valuePop(ctxt); + CAST_TO_STRING; + str = valuePop(ctxt); + + target = xmlBufferCreate(); + if (target) { + max = xmlStrlen(to->stringval); + for (i = 0; (ch = str->stringval[i]); i++) { + point = xmlStrchr(from->stringval, ch); + if (point) { + /* Warning: This may not work with UTF-8 */ + offset = (int)(point - from->stringval); + if (offset < max) + xmlBufferAdd(target, &to->stringval[offset], 1); + } else + xmlBufferAdd(target, &ch, 1); + } + } + valuePush(ctxt, xmlXPathNewString(xmlBufferContent(target))); + xmlBufferFree(target); + xmlXPathFreeObject(str); + xmlXPathFreeObject(from); + xmlXPathFreeObject(to); +} + +/** + * xmlXPathBooleanFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the boolean() XPath function + * boolean boolean(object) + * he boolean function converts its argument to a boolean as follows: + * - a number is true if and only if it is neither positive or + * negative zero nor NaN + * - a node-set is true if and only if it is non-empty + * - a string is true if and only if its length is non-zero + */ +void +xmlXPathBooleanFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + int res = 0; + + CHECK_ARITY(1); + cur = valuePop(ctxt); + if (cur == NULL) XP_ERROR(XPATH_INVALID_OPERAND); + switch (cur->type) { + case XPATH_NODESET: + case XPATH_XSLT_TREE: + if ((cur->nodesetval == NULL) || + (cur->nodesetval->nodeNr == 0)) res = 0; + else + res = 1; + break; + case XPATH_STRING: + if ((cur->stringval == NULL) || + (cur->stringval[0] == 0)) res = 0; + else + res = 1; + break; + case XPATH_BOOLEAN: + valuePush(ctxt, cur); + return; + case XPATH_NUMBER: + if (cur->floatval) res = 1; + break; + default: + STRANGE + } + xmlXPathFreeObject(cur); + valuePush(ctxt, xmlXPathNewBoolean(res)); +} + +/** + * xmlXPathNotFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the not() XPath function + * boolean not(boolean) + * The not function returns true if its argument is false, + * and false otherwise. + */ +void +xmlXPathNotFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(1); + CAST_TO_BOOLEAN; + CHECK_TYPE(XPATH_BOOLEAN); + ctxt->value->boolval = ! ctxt->value->boolval; +} + +/** + * xmlXPathTrueFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the true() XPath function + * boolean true() + */ +void +xmlXPathTrueFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + valuePush(ctxt, xmlXPathNewBoolean(1)); +} + +/** + * xmlXPathFalseFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the false() XPath function + * boolean false() + */ +void +xmlXPathFalseFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(0); + valuePush(ctxt, xmlXPathNewBoolean(0)); +} + +/** + * xmlXPathLangFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the lang() XPath function + * boolean lang(string) + * The lang function returns true or false depending on whether the + * language of the context node as specified by xml:lang attributes + * is the same as or is a sublanguage of the language specified by + * the argument string. The language of the context node is determined + * by the value of the xml:lang attribute on the context node, or, if + * the context node has no xml:lang attribute, by the value of the + * xml:lang attribute on the nearest ancestor of the context node that + * has an xml:lang attribute. If there is no such attribute, then lang + * returns false. If there is such an attribute, then lang returns + * true if the attribute value is equal to the argument ignoring case, + * or if there is some suffix starting with - such that the attribute + * value is equal to the argument ignoring that suffix of the attribute + * value and ignoring case. + */ +void +xmlXPathLangFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr val; + const xmlChar *theLang; + const xmlChar *lang; + int ret = 0; + int i; + + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + val = valuePop(ctxt); + lang = val->stringval; + theLang = xmlNodeGetLang(ctxt->context->node); + if ((theLang != NULL) && (lang != NULL)) { + for (i = 0;lang[i] != 0;i++) + if (toupper(lang[i]) != toupper(theLang[i])) + goto not_equal; + ret = 1; + } +not_equal: + xmlXPathFreeObject(val); + valuePush(ctxt, xmlXPathNewBoolean(ret)); +} + +/** + * xmlXPathNumberFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the number() XPath function + * number number(object?) + */ +void +xmlXPathNumberFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + double res; + + if (nargs == 0) { + if (ctxt->context->node == NULL) { + valuePush(ctxt, xmlXPathNewFloat(0.0)); + } else { + xmlChar* content = xmlNodeGetContent(ctxt->context->node); + + res = xmlXPathStringEvalNumber(content); + valuePush(ctxt, xmlXPathNewFloat(res)); + xmlFree(content); + } + return; + } + + CHECK_ARITY(1); + cur = valuePop(ctxt); + switch (cur->type) { + case XPATH_UNDEFINED: +#ifdef DEBUG_EXPR + xmlGenericError(xmlGenericErrorContext, "NUMBER: undefined\n"); +#endif + valuePush(ctxt, xmlXPathNewFloat(0.0)); + break; + case XPATH_XSLT_TREE: + case XPATH_NODESET: + valuePush(ctxt, cur); + xmlXPathStringFunction(ctxt, 1); + cur = valuePop(ctxt); + case XPATH_STRING: + res = xmlXPathStringEvalNumber(cur->stringval); + valuePush(ctxt, xmlXPathNewFloat(res)); + xmlXPathFreeObject(cur); + return; + case XPATH_BOOLEAN: + if (cur->boolval) valuePush(ctxt, xmlXPathNewFloat(1.0)); + else valuePush(ctxt, xmlXPathNewFloat(0.0)); + xmlXPathFreeObject(cur); + return; + case XPATH_NUMBER: + valuePush(ctxt, cur); + return; + case XPATH_USERS: + case XPATH_POINT: + case XPATH_RANGE: + case XPATH_LOCATIONSET: + TODO + valuePush(ctxt, xmlXPathNewFloat(0.0)); + break; + } + STRANGE +} + +/** + * xmlXPathSumFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the sum() XPath function + * number sum(node-set) + * The sum function returns the sum of the values of the nodes in + * the argument node-set. + */ +void +xmlXPathSumFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr cur; + int i; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_XSLT_TREE))) + XP_ERROR(XPATH_INVALID_TYPE); + cur = valuePop(ctxt); + + if (cur->nodesetval->nodeNr == 0) { + valuePush(ctxt, xmlXPathNewFloat(0.0)); + } else { + valuePush(ctxt, + xmlXPathNewNodeSet(cur->nodesetval->nodeTab[0])); + xmlXPathNumberFunction(ctxt, 1); + for (i = 1; i < cur->nodesetval->nodeNr; i++) { + valuePush(ctxt, + xmlXPathNewNodeSet(cur->nodesetval->nodeTab[i])); + xmlXPathAddValues(ctxt); + } + } + xmlXPathFreeObject(cur); +} + +/** + * xmlXPathFloorFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the floor() XPath function + * number floor(number) + * The floor function returns the largest (closest to positive infinity) + * number that is not greater than the argument and that is an integer. + */ +void +xmlXPathFloorFunction(xmlXPathParserContextPtr ctxt, int nargs) { + CHECK_ARITY(1); + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); +#if 0 + ctxt->value->floatval = floor(ctxt->value->floatval); +#else + /* floor(0.999999999999) => 1.0 !!!!!!!!!!! */ + ctxt->value->floatval = (double)((int) ctxt->value->floatval); +#endif +} + +/** + * xmlXPathCeilingFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the ceiling() XPath function + * number ceiling(number) + * The ceiling function returns the smallest (closest to negative infinity) + * number that is not less than the argument and that is an integer. + */ +void +xmlXPathCeilingFunction(xmlXPathParserContextPtr ctxt, int nargs) { + double f; + + CHECK_ARITY(1); + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + +#if 0 + ctxt->value->floatval = ceil(ctxt->value->floatval); +#else + f = (double)((int) ctxt->value->floatval); + if (f != ctxt->value->floatval) + ctxt->value->floatval = f + 1; +#endif +} + +/** + * xmlXPathRoundFunction: + * @ctxt: the XPath Parser context + * @nargs: the number of arguments + * + * Implement the round() XPath function + * number round(number) + * The round function returns the number that is closest to the + * argument and that is an integer. If there are two such numbers, + * then the one that is even is returned. + */ +void +xmlXPathRoundFunction(xmlXPathParserContextPtr ctxt, int nargs) { + double f; + + CHECK_ARITY(1); + CAST_TO_NUMBER; + CHECK_TYPE(XPATH_NUMBER); + + if ((ctxt->value->floatval == xmlXPathNAN) || + (ctxt->value->floatval == xmlXPathPINF) || + (ctxt->value->floatval == xmlXPathNINF) || + (ctxt->value->floatval == 0.0)) + return; + +#if 0 + f = floor(ctxt->value->floatval); +#else + f = (double)((int) ctxt->value->floatval); +#endif + if (ctxt->value->floatval < f + 0.5) + ctxt->value->floatval = f; + else + ctxt->value->floatval = f + 1; +} + +/************************************************************************ + * * + * The Parser * + * * + ************************************************************************/ + +/* + * a couple of forward declarations since we use a recursive call based + * implementation. + */ +void xmlXPathEvalExpr(xmlXPathParserContextPtr ctxt); +void xmlXPathEvalPredicate(xmlXPathParserContextPtr ctxt); +void xmlXPathEvalLocationPath(xmlXPathParserContextPtr ctxt); +#ifdef VMS +void xmlXPathEvalRelLocationPath(xmlXPathParserContextPtr ctxt); +#define xmlXPathEvalRelativeLocationPath xmlXPathEvalRelLocationPath +#else +void xmlXPathEvalRelativeLocationPath(xmlXPathParserContextPtr ctxt); +#endif + +/** + * xmlXPathParseNCName: + * @ctxt: the XPath Parser context + * + * parse an XML namespace non qualified name. + * + * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* + * + * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * Returns the namespace name or NULL + */ + +xmlChar * +xmlXPathParseNCName(xmlXPathParserContextPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + + if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL); + q = NEXT; + + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + + ret = xmlStrndup(q, CUR_PTR - q); + + return(ret); +} + +/** + * xmlXPathParseQName: + * @ctxt: the XPath Parser context + * @prefix: a xmlChar ** + * + * parse an XML qualified name + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns the function returns the local part, and prefix is updated + * to get the Prefix if any. + */ + +xmlChar * +xmlXPathParseQName(xmlXPathParserContextPtr ctxt, xmlChar **prefix) { + xmlChar *ret = NULL; + + *prefix = NULL; + ret = xmlXPathParseNCName(ctxt); + if (CUR == ':') { + *prefix = ret; + NEXT; + ret = xmlXPathParseNCName(ctxt); + } + return(ret); +} + +/** + * xmlXPathParseName: + * @ctxt: the XPath Parser context + * + * parse an XML name + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * Returns the namespace name or NULL + */ + +xmlChar * +xmlXPathParseName(xmlXPathParserContextPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + + if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL); + q = NEXT; + + /* TODO Make this UTF8 compliant !!! */ + while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) || + (CUR == '.') || (CUR == '-') || + (CUR == '_') || (CUR == ':') || + (IS_COMBINING(CUR)) || + (IS_EXTENDER(CUR))) + NEXT; + + ret = xmlStrndup(q, CUR_PTR - q); + + return(ret); +} + +/** + * xmlXPathStringEvalNumber: + * @str: A string to scan + * + * [30] Number ::= Digits ('.' Digits?)? + * | '.' Digits + * [31] Digits ::= [0-9]+ + * + * Parse and evaluate a Number in the string + * In complement of the Number expression, this function also handles + * negative values : '-' Number. + * + * Returns the double value. + */ +double +xmlXPathStringEvalNumber(const xmlChar *str) { + const xmlChar *cur = str; + double ret = 0.0; + double mult = 1; + int ok = 0; + int isneg = 0; + + while (IS_BLANK(*cur)) cur++; + if ((*cur != '.') && ((*cur < '0') || (*cur > '9')) && (*cur != '-')) { + return(xmlXPathNAN); + } + if (*cur == '-') { + isneg = 1; + cur++; + } + while ((*cur >= '0') && (*cur <= '9')) { + ret = ret * 10 + (*cur - '0'); + ok = 1; + cur++; + } + if (*cur == '.') { + cur++; + if (((*cur < '0') || (*cur > '9')) && (!ok)) { + return(xmlXPathNAN); + } + while ((*cur >= '0') && (*cur <= '9')) { + mult /= 10; + ret = ret + (*cur - '0') * mult; + cur++; + } + } + while (IS_BLANK(*cur)) cur++; + if (*cur != 0) return(xmlXPathNAN); + if (isneg) ret = -ret; + return(ret); +} + +/** + * xmlXPathEvalNumber: + * @ctxt: the XPath Parser context + * + * [30] Number ::= Digits ('.' Digits?)? + * | '.' Digits + * [31] Digits ::= [0-9]+ + * + * Parse and evaluate a Number, then push it on the stack + * + */ +void +xmlXPathEvalNumber(xmlXPathParserContextPtr ctxt) { + double ret = 0.0; + double mult = 1; + int ok = 0; + + CHECK_ERROR; + if ((CUR != '.') && ((CUR < '0') || (CUR > '9'))) { + XP_ERROR(XPATH_NUMBER_ERROR); + } + while ((CUR >= '0') && (CUR <= '9')) { + ret = ret * 10 + (CUR - '0'); + ok = 1; + NEXT; + } + if (CUR == '.') { + NEXT; + if (((CUR < '0') || (CUR > '9')) && (!ok)) { + XP_ERROR(XPATH_NUMBER_ERROR); + } + while ((CUR >= '0') && (CUR <= '9')) { + mult /= 10; + ret = ret + (CUR - '0') * mult; + NEXT; + } + } + valuePush(ctxt, xmlXPathNewFloat(ret)); +} + +/** + * xmlXPathEvalLiteral: + * @ctxt: the XPath Parser context + * + * Parse a Literal and push it on the stack. + * + * [29] Literal ::= '"' [^"]* '"' + * | "'" [^']* "'" + * + * TODO: xmlXPathEvalLiteral memory allocation could be improved. + */ +void +xmlXPathEvalLiteral(xmlXPathParserContextPtr ctxt) { + const xmlChar *q; + xmlChar *ret = NULL; + + if (CUR == '"') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '"')) + NEXT; + if (!IS_CHAR(CUR)) { + XP_ERROR(XPATH_UNFINISHED_LITERAL_ERROR); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else if (CUR == '\'') { + NEXT; + q = CUR_PTR; + while ((IS_CHAR(CUR)) && (CUR != '\'')) + NEXT; + if (!IS_CHAR(CUR)) { + XP_ERROR(XPATH_UNFINISHED_LITERAL_ERROR); + } else { + ret = xmlStrndup(q, CUR_PTR - q); + NEXT; + } + } else { + XP_ERROR(XPATH_START_LITERAL_ERROR); + } + if (ret == NULL) return; + valuePush(ctxt, xmlXPathNewString(ret)); + xmlFree(ret); +} + +/** + * xmlXPathEvalVariableReference: + * @ctxt: the XPath Parser context + * + * Parse a VariableReference, evaluate it and push it on the stack. + * + * The variable bindings consist of a mapping from variable names + * to variable values. The value of a variable is an object, which + * of any of the types that are possible for the value of an expression, + * and may also be of additional types not specified here. + * + * Early evaluation is possible since: + * The variable bindings [...] used to evaluate a subexpression are + * always the same as those used to evaluate the containing expression. + * + * [36] VariableReference ::= '$' QName + */ +void +xmlXPathEvalVariableReference(xmlXPathParserContextPtr ctxt) { + xmlChar *name; + xmlChar *prefix; + xmlXPathObjectPtr value; + + SKIP_BLANKS; + if (CUR != '$') { + XP_ERROR(XPATH_VARIABLE_REF_ERROR); + } + NEXT; + name = xmlXPathParseQName(ctxt, &prefix); + if (name == NULL) { + XP_ERROR(XPATH_VARIABLE_REF_ERROR); + } + if (prefix == NULL) { + value = xmlXPathVariableLookup(ctxt->context, name); + } else { + TODO; + value = NULL; + } + xmlFree(name); + if (prefix != NULL) xmlFree(prefix); + if (value == NULL) { + XP_ERROR(XPATH_UNDEF_VARIABLE_ERROR); + } + valuePush(ctxt, value); + SKIP_BLANKS; +} + +/** + * xmlXPathIsNodeType: + * @ctxt: the XPath Parser context + * @name: a name string + * + * Is the name given a NodeType one. + * + * [38] NodeType ::= 'comment' + * | 'text' + * | 'processing-instruction' + * | 'node' + * + * Returns 1 if true 0 otherwise + */ +int +xmlXPathIsNodeType(const xmlChar *name) { + if (name == NULL) + return(0); + + if (xmlStrEqual(name, BAD_CAST "comment")) + return(1); + if (xmlStrEqual(name, BAD_CAST "text")) + return(1); + if (xmlStrEqual(name, BAD_CAST "processing-instruction")) + return(1); + if (xmlStrEqual(name, BAD_CAST "node")) + return(1); + return(0); +} + +/** + * xmlXPathEvalFunctionCall: + * @ctxt: the XPath Parser context + * + * [16] FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument)*)? ')' + * [17] Argument ::= Expr + * + * Parse and evaluate a function call, the evaluation of all arguments are + * pushed on the stack + */ +void +xmlXPathEvalFunctionCall(xmlXPathParserContextPtr ctxt) { + xmlChar *name; + xmlChar *prefix; + xmlXPathFunction func; + int nbargs = 0; + + name = xmlXPathParseQName(ctxt, &prefix); + if (name == NULL) { + XP_ERROR(XPATH_EXPR_ERROR); + } + SKIP_BLANKS; + if (prefix == NULL) { + func = xmlXPathFunctionLookup(ctxt->context, name); + } else { + TODO; + func = NULL; + } + if (func == NULL) { + xmlFree(name); + if (prefix != NULL) xmlFree(prefix); + XP_ERROR(XPATH_UNKNOWN_FUNC_ERROR); + } +#ifdef DEBUG_EXPR + if (prefix == NULL) + xmlGenericError(xmlGenericErrorContext, "Calling function %s\n", + name); + else + xmlGenericError(xmlGenericErrorContext, "Calling function %s:%s\n", + prefix, name); +#endif + + xmlFree(name); + if (prefix != NULL) xmlFree(prefix); + + if (CUR != '(') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + SKIP_BLANKS; + + while (CUR != ')') { + xmlXPathEvalExpr(ctxt); + nbargs++; + if (CUR == ')') break; + if (CUR != ',') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + SKIP_BLANKS; + } + NEXT; + SKIP_BLANKS; + func(ctxt, nbargs); +} + +/** + * xmlXPathEvalPrimaryExpr: + * @ctxt: the XPath Parser context + * + * [15] PrimaryExpr ::= VariableReference + * | '(' Expr ')' + * | Literal + * | Number + * | FunctionCall + * + * Parse and evaluate a primary expression, then push the result on the stack + */ +void +xmlXPathEvalPrimaryExpr(xmlXPathParserContextPtr ctxt) { + SKIP_BLANKS; + if (CUR == '$') xmlXPathEvalVariableReference(ctxt); + else if (CUR == '(') { + NEXT; + SKIP_BLANKS; + xmlXPathEvalExpr(ctxt); + if (CUR != ')') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + SKIP_BLANKS; + } else if (IS_DIGIT(CUR)) { + xmlXPathEvalNumber(ctxt); + } else if ((CUR == '\'') || (CUR == '"')) { + xmlXPathEvalLiteral(ctxt); + } else { + xmlXPathEvalFunctionCall(ctxt); + } + SKIP_BLANKS; +} + +/** + * xmlXPathEvalFilterExpr: + * @ctxt: the XPath Parser context + * + * [20] FilterExpr ::= PrimaryExpr + * | FilterExpr Predicate + * + * Parse and evaluate a filter expression, then push the result on the stack + * Square brackets are used to filter expressions in the same way that + * they are used in location paths. It is an error if the expression to + * be filtered does not evaluate to a node-set. The context node list + * used for evaluating the expression in square brackets is the node-set + * to be filtered listed in document order. + */ + +void +xmlXPathEvalFilterExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathEvalPrimaryExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + + while (CUR == '[') { + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_LOCATIONSET))) + XP_ERROR(XPATH_INVALID_TYPE) + + if (ctxt->value->type == XPATH_NODESET) + xmlXPathEvalPredicate(ctxt); + else + xmlXPtrEvalRangePredicate(ctxt); + SKIP_BLANKS; + } + + +} + +/** + * xmlXPathScanName: + * @ctxt: the XPath Parser context + * + * Trickery: parse an XML name but without consuming the input flow + * Needed to avoid insanity in the parser state. + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + * + * Returns the Name parsed or NULL + */ + +xmlChar * +xmlXPathScanName(xmlXPathParserContextPtr ctxt) { + xmlChar buf[XML_MAX_NAMELEN]; + int len = 0; + + SKIP_BLANKS; + if (!IS_LETTER(CUR) && (CUR != '_') && + (CUR != ':')) { + return(NULL); + } + + while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || + (NXT(len) == '.') || (NXT(len) == '-') || + (NXT(len) == '_') || (NXT(len) == ':') || + (IS_COMBINING(NXT(len))) || + (IS_EXTENDER(NXT(len)))) { + buf[len] = NXT(len); + len++; + if (len >= XML_MAX_NAMELEN) { + xmlGenericError(xmlGenericErrorContext, + "xmlScanName: reached XML_MAX_NAMELEN limit\n"); + while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || + (NXT(len) == '.') || (NXT(len) == '-') || + (NXT(len) == '_') || (NXT(len) == ':') || + (IS_COMBINING(NXT(len))) || + (IS_EXTENDER(NXT(len)))) + len++; + break; + } + } + return(xmlStrndup(buf, len)); +} + +/** + * xmlXPathEvalPathExpr: + * @ctxt: the XPath Parser context + * + * [19] PathExpr ::= LocationPath + * | FilterExpr + * | FilterExpr '/' RelativeLocationPath + * | FilterExpr '//' RelativeLocationPath + * + * Parse and evaluate a path expression, then push the result on the stack + * The / operator and // operators combine an arbitrary expression + * and a relative location path. It is an error if the expression + * does not evaluate to a node-set. + * The / operator does composition in the same way as when / is + * used in a location path. As in location paths, // is short for + * /descendant-or-self::node()/. + */ + +void +xmlXPathEvalPathExpr(xmlXPathParserContextPtr ctxt) { + int lc = 1; /* Should we branch to LocationPath ? */ + xmlChar *name = NULL; /* we may have to preparse a name to find out */ + + SKIP_BLANKS; + if ((CUR == '$') || (CUR == '(') || (IS_DIGIT(CUR)) || + (CUR == '\'') || (CUR == '"')) { + lc = 0; + } else if (CUR == '*') { + /* relative or absolute location path */ + lc = 1; + } else if (CUR == '/') { + /* relative or absolute location path */ + lc = 1; + } else if (CUR == '@') { + /* relative abbreviated attribute location path */ + lc = 1; + } else if (CUR == '.') { + /* relative abbreviated attribute location path */ + lc = 1; + } else { + /* + * Problem is finding if we have a name here whether it's: + * - a nodetype + * - a function call in which case it's followed by '(' + * - an axis in which case it's followed by ':' + * - a element name + * We do an a priori analysis here rather than having to + * maintain parsed token content through the recursive function + * calls. This looks uglier but makes the code quite easier to + * read/write/debug. + */ + SKIP_BLANKS; + name = xmlXPathScanName(ctxt); + if ((name != NULL) && (xmlStrstr(name, (xmlChar *) "::") != NULL)) { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: Axis\n"); +#endif + lc = 1; + xmlFree(name); + } else if (name != NULL) { + int len =xmlStrlen(name); + int blank = 0; + + + while (NXT(len) != 0) { + if (NXT(len) == '/') { + /* element name */ +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: AbbrRelLocation\n"); +#endif + lc = 1; + break; + } else if (IS_BLANK(NXT(len))) { + /* skip to next */ + blank = 1; + } else if (NXT(len) == ':') { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: AbbrRelLocation\n"); +#endif + lc = 1; + break; + } else if ((NXT(len) == '(')) { + /* Note Type or Function */ + if (xmlXPathIsNodeType(name)) { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: Type search\n"); +#endif + lc = 1; + } else { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: function call\n"); +#endif + lc = 0; + } + break; + } else if ((NXT(len) == '[')) { + /* element name */ +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: AbbrRelLocation\n"); +#endif + lc = 1; + break; + } else if ((NXT(len) == '<') || (NXT(len) == '>') || + (NXT(len) == '=')) { + lc = 1; + break; + } else { + lc = 1; + break; + } + len++; + } + if (NXT(len) == 0) { +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "PathExpr: AbbrRelLocation\n"); +#endif + /* element name */ + lc = 1; + } + xmlFree(name); + } else { + /* make sure all cases are covered explicitely */ + XP_ERROR(XPATH_EXPR_ERROR); + } + } + + if (lc) { + if (CUR == '/') + xmlXPathRoot(ctxt); + else { + /* TAG:9999 */ + valuePush(ctxt, xmlXPathNewNodeSet(ctxt->context->node)); + } + xmlXPathEvalLocationPath(ctxt); + } else { + xmlXPathEvalFilterExpr(ctxt); + CHECK_ERROR; + if ((CUR == '/') && (NXT(1) == '/')) { + SKIP(2); + SKIP_BLANKS; + xmlXPathNodeCollectAndTest(ctxt, AXIS_DESCENDANT_OR_SELF, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + ctxt->context->node = NULL; + xmlXPathEvalRelativeLocationPath(ctxt); + } else if (CUR == '/') { + xmlXPathEvalRelativeLocationPath(ctxt); + } + } + SKIP_BLANKS; +} + +/** + * xmlXPathEvalUnionExpr: + * @ctxt: the XPath Parser context + * + * [18] UnionExpr ::= PathExpr + * | UnionExpr '|' PathExpr + * + * Parse and evaluate an union expression, then push the result on the stack + */ + +void +xmlXPathEvalUnionExpr(xmlXPathParserContextPtr ctxt) { + int sort = 0; + xmlXPathEvalPathExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while (CUR == '|') { + xmlXPathObjectPtr obj1,obj2, tmp; + + sort = 1; + CHECK_TYPE(XPATH_NODESET); + obj1 = valuePop(ctxt); + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + + NEXT; + SKIP_BLANKS; + xmlXPathEvalPathExpr(ctxt); + + CHECK_TYPE(XPATH_NODESET); + obj2 = valuePop(ctxt); + obj1->nodesetval = xmlXPathNodeSetMerge(obj1->nodesetval, + obj2->nodesetval); + if (ctxt->value == tmp) { + tmp = valuePop(ctxt); + xmlXPathFreeObject(tmp); + } + valuePush(ctxt, obj1); + xmlXPathFreeObject(obj2); + SKIP_BLANKS; + } + if (sort) { + } +} + +/** + * xmlXPathEvalUnaryExpr: + * @ctxt: the XPath Parser context + * + * [27] UnaryExpr ::= UnionExpr + * | '-' UnaryExpr + * + * Parse and evaluate an unary expression, then push the result on the stack + */ + +void +xmlXPathEvalUnaryExpr(xmlXPathParserContextPtr ctxt) { + int minus = 0; + + SKIP_BLANKS; + if (CUR == '-') { + minus = 1; + NEXT; + SKIP_BLANKS; + } + xmlXPathEvalUnionExpr(ctxt); + CHECK_ERROR; + if (minus) { + xmlXPathValueFlipSign(ctxt); + } +} + +/** + * xmlXPathEvalMultiplicativeExpr: + * @ctxt: the XPath Parser context + * + * [26] MultiplicativeExpr ::= UnaryExpr + * | MultiplicativeExpr MultiplyOperator UnaryExpr + * | MultiplicativeExpr 'div' UnaryExpr + * | MultiplicativeExpr 'mod' UnaryExpr + * [34] MultiplyOperator ::= '*' + * + * Parse and evaluate an Additive expression, then push the result on the stack + */ + +void +xmlXPathEvalMultiplicativeExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathEvalUnaryExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == '*') || + ((CUR == 'd') && (NXT(1) == 'i') && (NXT(2) == 'v')) || + ((CUR == 'm') && (NXT(1) == 'o') && (NXT(2) == 'd'))) { + int op = -1; + + if (CUR == '*') { + op = 0; + NEXT; + } else if (CUR == 'd') { + op = 1; + SKIP(3); + } else if (CUR == 'm') { + op = 2; + SKIP(3); + } + SKIP_BLANKS; + xmlXPathEvalUnaryExpr(ctxt); + CHECK_ERROR; + switch (op) { + case 0: + xmlXPathMultValues(ctxt); + break; + case 1: + xmlXPathDivValues(ctxt); + break; + case 2: + xmlXPathModValues(ctxt); + break; + } + SKIP_BLANKS; + } +} + +/** + * xmlXPathEvalAdditiveExpr: + * @ctxt: the XPath Parser context + * + * [25] AdditiveExpr ::= MultiplicativeExpr + * | AdditiveExpr '+' MultiplicativeExpr + * | AdditiveExpr '-' MultiplicativeExpr + * + * Parse and evaluate an Additive expression, then push the result on the stack + */ + +void +xmlXPathEvalAdditiveExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathEvalMultiplicativeExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == '+') || (CUR == '-')) { + int plus; + + if (CUR == '+') plus = 1; + else plus = 0; + NEXT; + SKIP_BLANKS; + xmlXPathEvalMultiplicativeExpr(ctxt); + CHECK_ERROR; + if (plus) xmlXPathAddValues(ctxt); + else xmlXPathSubValues(ctxt); + SKIP_BLANKS; + } +} + +/** + * xmlXPathEvalRelationalExpr: + * @ctxt: the XPath Parser context + * + * [24] RelationalExpr ::= AdditiveExpr + * | RelationalExpr '<' AdditiveExpr + * | RelationalExpr '>' AdditiveExpr + * | RelationalExpr '<=' AdditiveExpr + * | RelationalExpr '>=' AdditiveExpr + * + * A <= B > C is allowed ? Answer from James, yes with + * (AdditiveExpr <= AdditiveExpr) > AdditiveExpr + * which is basically what got implemented. + * + * Parse and evaluate a Relational expression, then push the result + * on the stack + */ + +void +xmlXPathEvalRelationalExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathEvalAdditiveExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == '<') || + (CUR == '>') || + ((CUR == '<') && (NXT(1) == '=')) || + ((CUR == '>') && (NXT(1) == '='))) { + int inf, strict, ret; + + if (CUR == '<') inf = 1; + else inf = 0; + if (NXT(1) == '=') strict = 0; + else strict = 1; + NEXT; + if (!strict) NEXT; + SKIP_BLANKS; + xmlXPathEvalAdditiveExpr(ctxt); + CHECK_ERROR; + ret = xmlXPathCompareValues(ctxt, inf, strict); + valuePush(ctxt, xmlXPathNewBoolean(ret)); + SKIP_BLANKS; + } +} + +/** + * xmlXPathEvalEqualityExpr: + * @ctxt: the XPath Parser context + * + * [23] EqualityExpr ::= RelationalExpr + * | EqualityExpr '=' RelationalExpr + * | EqualityExpr '!=' RelationalExpr + * + * A != B != C is allowed ? Answer from James, yes with + * (RelationalExpr = RelationalExpr) = RelationalExpr + * (RelationalExpr != RelationalExpr) != RelationalExpr + * which is basically what got implemented. + * + * Parse and evaluate an Equality expression, then push the result on the stack + * + */ +void +xmlXPathEvalEqualityExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathEvalRelationalExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == '=') || ((CUR == '!') && (NXT(1) == '='))) { + xmlXPathObjectPtr res; + int eq, equal; + + if (CUR == '=') eq = 1; + else eq = 0; + NEXT; + if (!eq) NEXT; + SKIP_BLANKS; + xmlXPathEvalRelationalExpr(ctxt); + CHECK_ERROR; + equal = xmlXPathEqualValues(ctxt); + if (eq) res = xmlXPathNewBoolean(equal); + else res = xmlXPathNewBoolean(!equal); + valuePush(ctxt, res); + SKIP_BLANKS; + } +} + +/** + * xmlXPathEvalAndExpr: + * @ctxt: the XPath Parser context + * + * [22] AndExpr ::= EqualityExpr + * | AndExpr 'and' EqualityExpr + * + * Parse and evaluate an AND expression, then push the result on the stack + * + */ +void +xmlXPathEvalAndExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathEvalEqualityExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == 'a') && (NXT(1) == 'n') && (NXT(2) == 'd')) { + xmlXPathObjectPtr arg1, arg2; + + SKIP(3); + SKIP_BLANKS; + xmlXPathEvalEqualityExpr(ctxt); + CHECK_ERROR; + xmlXPathBooleanFunction(ctxt, 1); + arg2 = valuePop(ctxt); + xmlXPathBooleanFunction(ctxt, 1); + arg1 = valuePop(ctxt); + arg1->boolval &= arg2->boolval; + valuePush(ctxt, arg1); + xmlXPathFreeObject(arg2); + SKIP_BLANKS; + } +} + +/** + * xmlXPathEvalExpr: + * @ctxt: the XPath Parser context + * + * [14] Expr ::= OrExpr + * [21] OrExpr ::= AndExpr + * | OrExpr 'or' AndExpr + * + * Parse and evaluate an expression, then push the result on the stack + * + */ +void +xmlXPathEvalExpr(xmlXPathParserContextPtr ctxt) { + xmlXPathEvalAndExpr(ctxt); + CHECK_ERROR; + SKIP_BLANKS; + while ((CUR == 'o') && (NXT(1) == 'r')) { + xmlXPathObjectPtr arg1, arg2; + + SKIP(2); + SKIP_BLANKS; + xmlXPathEvalAndExpr(ctxt); + CHECK_ERROR; + xmlXPathBooleanFunction(ctxt, 1); + arg2 = valuePop(ctxt); + xmlXPathBooleanFunction(ctxt, 1); + arg1 = valuePop(ctxt); + arg1->boolval |= arg2->boolval; + valuePush(ctxt, arg1); + xmlXPathFreeObject(arg2); + SKIP_BLANKS; + } + if ((ctxt->value != NULL) && (ctxt->value->type == XPATH_NODESET) && + (ctxt->value->nodesetval != NULL)) + xmlXPathNodeSetSort(ctxt->value->nodesetval); +} + +/** + * xmlXPathEvaluatePredicateResult: + * @ctxt: the XPath Parser context + * @res: the Predicate Expression evaluation result + * + * Evaluate a predicate result for the current node. + * A PredicateExpr is evaluated by evaluating the Expr and converting + * the result to a boolean. If the result is a number, the result will + * be converted to true if the number is equal to the position of the + * context node in the context node list (as returned by the position + * function) and will be converted to false otherwise; if the result + * is not a number, then the result will be converted as if by a call + * to the boolean function. + * + * Return 1 if predicate is true, 0 otherwise + */ +int +xmlXPathEvaluatePredicateResult(xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr res) { + if (res == NULL) return(0); + switch (res->type) { + case XPATH_BOOLEAN: + return(res->boolval); + case XPATH_NUMBER: + return(res->floatval == ctxt->context->proximityPosition); + case XPATH_NODESET: + case XPATH_XSLT_TREE: + return(res->nodesetval->nodeNr != 0); + case XPATH_STRING: + return((res->stringval != NULL) && + (xmlStrlen(res->stringval) != 0)); + default: + STRANGE + } + return(0); +} + +/** + * xmlXPathEvalPredicate: + * @ctxt: the XPath Parser context + * + * [8] Predicate ::= '[' PredicateExpr ']' + * [9] PredicateExpr ::= Expr + * + * --------------------- + * For each node in the node-set to be filtered, the PredicateExpr is + * evaluated with that node as the context node, with the number of nodes + * in the node-set as the context size, and with the proximity position + * of the node in the node-set with respect to the axis as the context + * position; if PredicateExpr evaluates to true for that node, the node + * is included in the new node-set; otherwise, it is not included. + * --------------------- + * + * Parse and evaluate a predicate for all the elements of the + * current node list. Then refine the list by removing all + * nodes where the predicate is false. + */ +void +xmlXPathEvalPredicate(xmlXPathParserContextPtr ctxt) { + const xmlChar *cur; + xmlXPathObjectPtr res; + xmlXPathObjectPtr obj, tmp; + xmlNodeSetPtr newset = NULL; + xmlNodeSetPtr oldset; + xmlNodePtr oldnode; + int i; + + SKIP_BLANKS; + if (CUR != '[') { + XP_ERROR(XPATH_INVALID_PREDICATE_ERROR); + } + NEXT; + SKIP_BLANKS; + + /* + * Extract the old set, and then evaluate the result of the + * expression for all the element in the set. use it to grow + * up a new set. + */ + CHECK_TYPE(XPATH_NODESET); + obj = valuePop(ctxt); + oldset = obj->nodesetval; + oldnode = ctxt->context->node; + ctxt->context->node = NULL; + + if ((oldset == NULL) || (oldset->nodeNr == 0)) { + ctxt->context->contextSize = 0; + ctxt->context->proximityPosition = 0; + xmlXPathEvalExpr(ctxt); + res = valuePop(ctxt); + if (res != NULL) + xmlXPathFreeObject(res); + valuePush(ctxt, obj); + CHECK_ERROR; + } else { + /* + * Save the expression pointer since we will have to evaluate + * it multiple times. Initialize the new set. + */ + cur = ctxt->cur; + newset = xmlXPathNodeSetCreate(NULL); + + for (i = 0; i < oldset->nodeNr; i++) { + ctxt->cur = cur; + + /* + * Run the evaluation with a node list made of a single item + * in the nodeset. + */ + ctxt->context->node = oldset->nodeTab[i]; + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + ctxt->context->contextSize = oldset->nodeNr; + ctxt->context->proximityPosition = i + 1; + + xmlXPathEvalExpr(ctxt); + CHECK_ERROR; + + /* + * The result of the evaluation need to be tested to + * decided whether the filter succeeded or not + */ + res = valuePop(ctxt); + if (xmlXPathEvaluatePredicateResult(ctxt, res)) { + xmlXPathNodeSetAdd(newset, oldset->nodeTab[i]); + } + + /* + * Cleanup + */ + if (res != NULL) + xmlXPathFreeObject(res); + if (ctxt->value == tmp) { + res = valuePop(ctxt); + xmlXPathFreeObject(res); + } + + ctxt->context->node = NULL; + } + + /* + * The result is used as the new evaluation set. + */ + xmlXPathFreeObject(obj); + ctxt->context->node = NULL; + ctxt->context->contextSize = -1; + ctxt->context->proximityPosition = -1; + valuePush(ctxt, xmlXPathWrapNodeSet(newset)); + } + if (CUR != ']') { + XP_ERROR(XPATH_INVALID_PREDICATE_ERROR); + } + + NEXT; + SKIP_BLANKS; +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "After predicate : "); + xmlGenericErrorContextNodeSet(xmlGenericErrorContext, + ctxt->value->nodesetval); +#endif + ctxt->context->node = oldnode; +} + +/** + * xmlXPathEvalNodeTest: + * @ctxt: the XPath Parser context + * @test: pointer to a xmlXPathTestVal + * @type: pointer to a xmlXPathTypeVal + * @prefix: placeholder for a possible name prefix + * + * [7] NodeTest ::= NameTest + * | NodeType '(' ')' + * | 'processing-instruction' '(' Literal ')' + * + * [37] NameTest ::= '*' + * | NCName ':' '*' + * | QName + * [38] NodeType ::= 'comment' + * | 'text' + * | 'processing-instruction' + * | 'node' + * + * Returns the name found and update @test, @type and @prefix appropriately + */ +xmlChar * +xmlXPathEvalNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test, + xmlXPathTypeVal *type, const xmlChar **prefix, xmlChar *name) { + int blanks; + + if ((test == NULL) || (type == NULL) || (prefix == NULL)) { + STRANGE; + return(NULL); + } + *type = 0; + *test = 0; + *prefix = NULL; + SKIP_BLANKS; + + if ((name == NULL) && (CUR == '*')) { + /* + * All elements + */ + NEXT; + *test = NODE_TEST_ALL; + return(NULL); + } + + if (name == NULL) + name = xmlXPathParseNCName(ctxt); + if (name == NULL) { + XP_ERROR0(XPATH_EXPR_ERROR); + } + + blanks = IS_BLANK(CUR); + SKIP_BLANKS; + if (CUR == '(') { + NEXT; + /* + * NodeType or PI search + */ + if (xmlStrEqual(name, BAD_CAST "comment")) + *type = NODE_TYPE_COMMENT; + else if (xmlStrEqual(name, BAD_CAST "node")) + *type = NODE_TYPE_NODE; + else if (xmlStrEqual(name, BAD_CAST "processing-instruction")) + *type = NODE_TYPE_PI; + else if (xmlStrEqual(name, BAD_CAST "text")) + *type = NODE_TYPE_TEXT; + else { + if (name != NULL) + xmlFree(name); + XP_ERROR0(XPATH_EXPR_ERROR); + } + + *test = NODE_TEST_TYPE; + + SKIP_BLANKS; + if (*type == NODE_TYPE_PI) { + /* + * Specific case: search a PI by name. + */ + xmlXPathObjectPtr cur; + + if (name != NULL) + xmlFree(name); + + xmlXPathEvalLiteral(ctxt); + CHECK_ERROR 0; + xmlXPathStringFunction(ctxt, 1); + CHECK_ERROR0; + cur = valuePop(ctxt); + name = xmlStrdup(cur->stringval); + xmlXPathFreeObject(cur); + SKIP_BLANKS; + } + if (CUR != ')') { + if (name != NULL) + xmlFree(name); + XP_ERROR0(XPATH_UNCLOSED_ERROR); + } + NEXT; + return(name); + } + *test = NODE_TEST_NAME; + if ((!blanks) && (CUR == ':')) { + NEXT; + + /* + * get the namespace name for this prefix + */ + *prefix = xmlXPathNsLookup(ctxt->context, name); + if (name != NULL) + xmlFree(name); + if (*prefix == NULL) { + XP_ERROR0(XPATH_UNDEF_PREFIX_ERROR); + } + + if (CUR == '*') { + /* + * All elements + */ + NEXT; + *test = NODE_TEST_ALL; + return(NULL); + } + + name = xmlXPathParseNCName(ctxt); + if (name == NULL) { + XP_ERROR0(XPATH_EXPR_ERROR); + } + } + return(name); +} + +/** + * xmlXPathIsAxisName: + * @name: a preparsed name token + * + * [6] AxisName ::= 'ancestor' + * | 'ancestor-or-self' + * | 'attribute' + * | 'child' + * | 'descendant' + * | 'descendant-or-self' + * | 'following' + * | 'following-sibling' + * | 'namespace' + * | 'parent' + * | 'preceding' + * | 'preceding-sibling' + * | 'self' + * + * Returns the axis or 0 + */ +xmlXPathAxisVal +xmlXPathIsAxisName(const xmlChar *name) { + xmlXPathAxisVal ret = 0; + switch (name[0]) { + case 'a': + if (xmlStrEqual(name, BAD_CAST "ancestor")) + ret = AXIS_ANCESTOR; + if (xmlStrEqual(name, BAD_CAST "ancestor-or-self")) + ret = AXIS_ANCESTOR_OR_SELF; + if (xmlStrEqual(name, BAD_CAST "attribute")) + ret = AXIS_ATTRIBUTE; + break; + case 'c': + if (xmlStrEqual(name, BAD_CAST "child")) + ret = AXIS_CHILD; + break; + case 'd': + if (xmlStrEqual(name, BAD_CAST "descendant")) + ret = AXIS_DESCENDANT; + if (xmlStrEqual(name, BAD_CAST "descendant-or-self")) + ret = AXIS_DESCENDANT_OR_SELF; + break; + case 'f': + if (xmlStrEqual(name, BAD_CAST "following")) + ret = AXIS_FOLLOWING; + if (xmlStrEqual(name, BAD_CAST "following-sibling")) + ret = AXIS_FOLLOWING_SIBLING; + break; + case 'n': + if (xmlStrEqual(name, BAD_CAST "namespace")) + ret = AXIS_NAMESPACE; + break; + case 'p': + if (xmlStrEqual(name, BAD_CAST "parent")) + ret = AXIS_PARENT; + if (xmlStrEqual(name, BAD_CAST "preceding")) + ret = AXIS_PRECEDING; + if (xmlStrEqual(name, BAD_CAST "preceding-sibling")) + ret = AXIS_PRECEDING_SIBLING; + break; + case 's': + if (xmlStrEqual(name, BAD_CAST "self")) + ret = AXIS_SELF; + break; + } + return(ret); +} + +/** + * xmlXPathEvalAxisSpecifier: + * @ctxt: the XPath Parser context + * + * + * Returns the axis found + */ +xmlXPathAxisVal +xmlXPathEvalAxisSpecifier(xmlXPathParserContextPtr ctxt) { + xmlXPathAxisVal ret = AXIS_CHILD; + int blank = 0; + xmlChar *name; + + if (CUR == '@') { + NEXT; + return(AXIS_ATTRIBUTE); + } else { + name = xmlXPathParseNCName(ctxt); + if (name == NULL) { + XP_ERROR0(XPATH_EXPR_ERROR); + } + if (IS_BLANK(CUR)) + blank = 1; + SKIP_BLANKS; + if ((CUR == ':') && (NXT(1) == ':')) { + ret = xmlXPathIsAxisName(name); + } else if ((blank) && (CUR == ':')) + XP_ERROR0(XPATH_EXPR_ERROR); + + xmlFree(name); + } + return(ret); +} + +/** + * xmlXPathEvalStep: + * @ctxt: the XPath Parser context + * + * [4] Step ::= AxisSpecifier NodeTest Predicate* + * | AbbreviatedStep + * + * [12] AbbreviatedStep ::= '.' | '..' + * + * [5] AxisSpecifier ::= AxisName '::' + * | AbbreviatedAxisSpecifier + * + * [13] AbbreviatedAxisSpecifier ::= '@'? + * + * Modified for XPtr range support as: + * + * [4xptr] Step ::= AxisSpecifier NodeTest Predicate* + * | AbbreviatedStep + * | 'range-to' '(' Expr ')' Predicate* + * + * Evaluate one step in a Location Path + * A location step of . is short for self::node(). This is + * particularly useful in conjunction with //. For example, the + * location path .//para is short for + * self::node()/descendant-or-self::node()/child::para + * and so will select all para descendant elements of the context + * node. + * Similarly, a location step of .. is short for parent::node(). + * For example, ../title is short for parent::node()/child::title + * and so will select the title children of the parent of the context + * node. + */ +void +xmlXPathEvalStep(xmlXPathParserContextPtr ctxt) { + SKIP_BLANKS; + if ((CUR == '.') && (NXT(1) == '.')) { + SKIP(2); + SKIP_BLANKS; + xmlXPathNodeCollectAndTest(ctxt, AXIS_PARENT, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + } else if (CUR == '.') { + NEXT; + SKIP_BLANKS; + } else { + xmlChar *name = NULL; + const xmlChar *prefix = NULL; + xmlXPathTestVal test; + xmlXPathAxisVal axis; + xmlXPathTypeVal type; + + /* + * The modification needed for XPointer change to the production + */ +#ifdef LIBXML_XPTR_ENABLED + if (ctxt->context->xptr) { + name = xmlXPathParseNCName(ctxt); + if ((name != NULL) && (xmlStrEqual(name, BAD_CAST "range-to"))) { + xmlFree(name); + SKIP_BLANKS; + if (CUR != '(') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + SKIP_BLANKS; + + xmlXPtrRangeToFunction(ctxt, 1); + CHECK_ERROR; + + SKIP_BLANKS; + if (CUR != ')') { + XP_ERROR(XPATH_EXPR_ERROR); + } + NEXT; + goto eval_predicates; + } + } +#endif + if (name == NULL) + name = xmlXPathParseNCName(ctxt); + if (name != NULL) { + axis = xmlXPathIsAxisName(name); + if (axis != 0) { + SKIP_BLANKS; + if ((CUR == ':') && (NXT(1) == ':')) { + SKIP(2); + xmlFree(name); + name = NULL; + } else { + /* an element name can conflict with an axis one :-\ */ + axis = AXIS_CHILD; + } + } else { + axis = AXIS_CHILD; + } + } else if (CUR == '@') { + NEXT; + axis = AXIS_ATTRIBUTE; + } else { + axis = AXIS_CHILD; + } + + CHECK_ERROR; + + name = xmlXPathEvalNodeTest(ctxt, &test, &type, &prefix, name); + if (test == 0) + return; + +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, + "Basis : computing new set\n"); +#endif + xmlXPathNodeCollectAndTest(ctxt, axis, test, type, prefix, name); +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "Basis : "); + xmlGenericErrorContextNodeSet(stdout, ctxt->value->nodesetval); +#endif + if (name != NULL) + xmlFree(name); + +eval_predicates: + SKIP_BLANKS; + while (CUR == '[') { + xmlXPathEvalPredicate(ctxt); + } + } +#ifdef DEBUG_STEP + xmlGenericError(xmlGenericErrorContext, "Step : "); + xmlGenericErrorContextNodeSet(xmlGenericErrorContext, + ctxt->value->nodesetval); +#endif +} + +/** + * xmlXPathEvalRelativeLocationPath: + * @ctxt: the XPath Parser context + * + * [3] RelativeLocationPath ::= Step + * | RelativeLocationPath '/' Step + * | AbbreviatedRelativeLocationPath + * [11] AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step + * + */ +void +#ifdef VMS +xmlXPathEvalRelLocationPath +#else +xmlXPathEvalRelativeLocationPath +#endif +(xmlXPathParserContextPtr ctxt) { + SKIP_BLANKS; + if ((CUR == '/') && (NXT(1) == '/')) { + SKIP(2); + SKIP_BLANKS; + xmlXPathNodeCollectAndTest(ctxt, AXIS_DESCENDANT_OR_SELF, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + } else if (CUR == '/') { + NEXT; + SKIP_BLANKS; + } + xmlXPathEvalStep(ctxt); + SKIP_BLANKS; + while (CUR == '/') { + if ((CUR == '/') && (NXT(1) == '/')) { + SKIP(2); + SKIP_BLANKS; + xmlXPathNodeCollectAndTest(ctxt, AXIS_DESCENDANT_OR_SELF, + NODE_TEST_TYPE, NODE_TYPE_NODE, NULL, NULL); + xmlXPathEvalStep(ctxt); + } else if (CUR == '/') { + NEXT; + SKIP_BLANKS; + xmlXPathEvalStep(ctxt); + } + SKIP_BLANKS; + } +} + +/** + * xmlXPathEvalLocationPath: + * @ctxt: the XPath Parser context + * + * [1] LocationPath ::= RelativeLocationPath + * | AbsoluteLocationPath + * [2] AbsoluteLocationPath ::= '/' RelativeLocationPath? + * | AbbreviatedAbsoluteLocationPath + * [10] AbbreviatedAbsoluteLocationPath ::= + * '//' RelativeLocationPath + * + * // is short for /descendant-or-self::node()/. For example, + * //para is short for /descendant-or-self::node()/child::para and + * so will select any para element in the document (even a para element + * that is a document element will be selected by //para since the + * document element node is a child of the root node); div//para is + * short for div/descendant-or-self::node()/child::para and so will + * select all para descendants of div children. + */ +void +xmlXPathEvalLocationPath(xmlXPathParserContextPtr ctxt) { + SKIP_BLANKS; + if (CUR != '/') { + xmlXPathEvalRelativeLocationPath(ctxt); + } else { + while (CUR == '/') { + if ((CUR == '/') && (NXT(1) == '/')) { + SKIP(2); + SKIP_BLANKS; + xmlXPathNodeCollectAndTest(ctxt, + AXIS_DESCENDANT_OR_SELF, NODE_TEST_TYPE, + NODE_TYPE_NODE, NULL, NULL); + xmlXPathEvalRelativeLocationPath(ctxt); + } else if (CUR == '/') { + NEXT; + SKIP_BLANKS; + if (CUR != 0) + xmlXPathEvalRelativeLocationPath(ctxt); + } + } + } +} + +/** + * xmlXPathEval: + * @str: the XPath expression + * @ctx: the XPath context + * + * Evaluate the XPath Location Path in the given context. + * + * Returns the xmlXPathObjectPtr resulting from the eveluation or NULL. + * the caller has to free the object. + */ +xmlXPathObjectPtr +xmlXPathEval(const xmlChar *str, xmlXPathContextPtr ctx) { + xmlXPathParserContextPtr ctxt; + xmlXPathObjectPtr res, tmp, init = NULL; + int stack = 0; + + xmlXPathInit(); + + CHECK_CONTEXT(ctx) + + ctxt = xmlXPathNewParserContext(str, ctx); + /**** TAG:9999 + if (ctx->node != NULL) { + init = xmlXPathNewNodeSet(ctx->node); + valuePush(ctxt, init); + } + ****/ + xmlXPathEvalExpr(ctxt); + + if (ctxt->value == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathEval: evaluation failed\n"); + res = NULL; + } else if (*ctxt->cur != 0) { + xmlXPatherror(ctxt, __FILE__, __LINE__, XPATH_EXPR_ERROR); + res = NULL; + } else { + res = valuePop(ctxt); + } + + do { + tmp = valuePop(ctxt); + if (tmp != NULL) { + if (tmp != init) + stack++; + xmlXPathFreeObject(tmp); + } + } while (tmp != NULL); + if ((stack != 0) && (res != NULL)) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathEval: %d object left on the stack\n", + stack); + } + if (ctxt->error != XPATH_EXPRESSION_OK) { + xmlXPathFreeObject(res); + res = NULL; + } + + xmlXPathFreeParserContext(ctxt); + return(res); +} + +/** + * xmlXPathEvalExpression: + * @str: the XPath expression + * @ctxt: the XPath context + * + * Evaluate the XPath expression in the given context. + * + * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL. + * the caller has to free the object. + */ +xmlXPathObjectPtr +xmlXPathEvalExpression(const xmlChar *str, xmlXPathContextPtr ctxt) { + xmlXPathParserContextPtr pctxt; + xmlXPathObjectPtr res, tmp; + int stack = 0; + + xmlXPathInit(); + + CHECK_CONTEXT(ctxt) + + pctxt = xmlXPathNewParserContext(str, ctxt); + xmlXPathEvalExpr(pctxt); + + if (*pctxt->cur != 0) { + xmlXPatherror(pctxt, __FILE__, __LINE__, XPATH_EXPR_ERROR); + res = NULL; + } else { + res = valuePop(pctxt); + } + do { + tmp = valuePop(pctxt); + if (tmp != NULL) { + xmlXPathFreeObject(tmp); + stack++; + } + } while (tmp != NULL); + if ((stack != 0) && (res != NULL)) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPathEvalExpression: %d object left on the stack\n", + stack); + } + xmlXPathFreeParserContext(pctxt); + return(res); +} + +/** + * xmlXPathRegisterAllFunctions: + * @ctxt: the XPath context + * + * Registers all default XPath functions in this context + */ +void +xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt) +{ + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"boolean", + xmlXPathBooleanFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"ceiling", + xmlXPathCeilingFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"count", + xmlXPathCountFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"concat", + xmlXPathConcatFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"contains", + xmlXPathContainsFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"id", + xmlXPathIdFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"false", + xmlXPathFalseFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"floor", + xmlXPathFloorFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"last", + xmlXPathLastFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"lang", + xmlXPathLangFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"local-name", + xmlXPathLocalNameFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"not", + xmlXPathNotFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"name", + xmlXPathNameFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"namespace-uri", + xmlXPathNamespaceURIFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"normalize-space", + xmlXPathNormalizeFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"number", + xmlXPathNumberFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"position", + xmlXPathPositionFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"round", + xmlXPathRoundFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string", + xmlXPathStringFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string-length", + xmlXPathStringLengthFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"starts-with", + xmlXPathStartsWithFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring", + xmlXPathSubstringFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-before", + xmlXPathSubstringBeforeFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-after", + xmlXPathSubstringAfterFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"sum", + xmlXPathSumFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"true", + xmlXPathTrueFunction); + xmlXPathRegisterFunc(ctxt, (const xmlChar *)"translate", + xmlXPathTranslateFunction); +} + +#endif /* LIBXML_XPATH_ENABLED */ diff --git a/xpath.h b/xpath.h new file mode 100644 index 00000000..f8fd861e --- /dev/null +++ b/xpath.h @@ -0,0 +1,278 @@ +/* + * xpath.c: interface for XML Path Language implementation + * + * Reference: W3C Working Draft 5 July 1999 + * http://www.w3.org/Style/XSL/Group/1999/07/xpath-19990705.html + * + * See COPYRIGHT for the status of this software + * + * Author: Daniel.Veillard@w3.org + */ + +#ifndef __XML_XPATH_H__ +#define __XML_XPATH_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _xmlXPathContext xmlXPathContext; +typedef xmlXPathContext *xmlXPathContextPtr; +typedef struct _xmlXPathParserContext xmlXPathParserContext; +typedef xmlXPathParserContext *xmlXPathParserContextPtr; + +/** + * The set of XPath error codes + */ + +typedef enum { + XPATH_EXPRESSION_OK = 0, + XPATH_NUMBER_ERROR, + XPATH_UNFINISHED_LITERAL_ERROR, + XPATH_START_LITERAL_ERROR, + XPATH_VARIABLE_REF_ERROR, + XPATH_UNDEF_VARIABLE_ERROR, + XPATH_INVALID_PREDICATE_ERROR, + XPATH_EXPR_ERROR, + XPATH_UNCLOSED_ERROR, + XPATH_UNKNOWN_FUNC_ERROR, + XPATH_INVALID_OPERAND, + XPATH_INVALID_TYPE, + XPATH_INVALID_ARITY, + XPATH_INVALID_CTXT_SIZE, + XPATH_INVALID_CTXT_POSITION, + XPATH_MEMORY_ERROR, + XPTR_SYNTAX_ERROR, + XPTR_RESOURCE_ERROR, + XPTR_SUB_RESOURCE_ERROR, + XPATH_UNDEF_PREFIX_ERROR +} xmlXPathError; + +/* + * A node-set (an unordered collection of nodes without duplicates) + */ +typedef struct _xmlNodeSet xmlNodeSet; +typedef xmlNodeSet *xmlNodeSetPtr; +struct _xmlNodeSet { + int nodeNr; /* number of nodes in the set */ + int nodeMax; /* size of the array as allocated */ + xmlNodePtr *nodeTab; /* array of nodes in no particular order */ +}; + +/* + * An expression is evaluated to yield an object, which + * has one of the following four basic types: + * - node-set + * - boolean + * - number + * - string + * + * @@ XPointer will add more types ! + */ + +typedef enum { + XPATH_UNDEFINED = 0, + XPATH_NODESET = 1, + XPATH_BOOLEAN = 2, + XPATH_NUMBER = 3, + XPATH_STRING = 4, + XPATH_POINT = 5, + XPATH_RANGE = 6, + XPATH_LOCATIONSET = 7, + XPATH_USERS = 8, + XPATH_XSLT_TREE = 9 /* An XSLT value tree, non modifiable */ +} xmlXPathObjectType; + +typedef struct _xmlXPathObject xmlXPathObject; +typedef xmlXPathObject *xmlXPathObjectPtr; +struct _xmlXPathObject { + xmlXPathObjectType type; + xmlNodeSetPtr nodesetval; + int boolval; + double floatval; + xmlChar *stringval; + void *user; + int index; + void *user2; + int index2; +}; + +/* + * A conversion function is associated to a type and used to cast + * the new type to primitive values. + */ +typedef int (*xmlXPathConvertFunc) (xmlXPathObjectPtr obj, int type); + +/* + * Extra type: a name and a conversion function. + */ + +typedef struct _xmlXPathType xmlXPathType; +typedef xmlXPathType *xmlXPathTypePtr; +struct _xmlXPathType { + const xmlChar *name; /* the type name */ + xmlXPathConvertFunc func; /* the conversion function */ +}; + +/* + * Extra variable: a name and a value. + */ + +typedef struct _xmlXPathVariable xmlXPathVariable; +typedef xmlXPathVariable *xmlXPathVariablePtr; +struct _xmlXPathVariable { + const xmlChar *name; /* the variable name */ + xmlXPathObjectPtr value; /* the value */ +}; + +/* + * an evaluation function, the parameters are on the context stack + */ + +typedef void (*xmlXPathEvalFunc)(xmlXPathParserContextPtr ctxt, int nargs); + +/* + * Extra function: a name and a evaluation function. + */ + +typedef struct _xmlXPathFunct xmlXPathFunct; +typedef xmlXPathFunct *xmlXPathFuncPtr; +struct _xmlXPathFunct { + const xmlChar *name; /* the function name */ + xmlXPathEvalFunc func; /* the evaluation function */ +}; + +/* + * An axis traversal function. To traverse an axis, the engine calls + * the first time with cur == NULL and repeat until the function returns + * NULL indicating the end of the axis traversal. + */ + +typedef xmlXPathObjectPtr (*xmlXPathAxisFunc) (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr cur); + +/* + * Extra axis: a name and an axis function. + */ + +typedef struct _xmlXPathAxis xmlXPathAxis; +typedef xmlXPathAxis *xmlXPathAxisPtr; +struct _xmlXPathAxis { + const xmlChar *name; /* the axis name */ + xmlXPathAxisFunc func; /* the search function */ +}; + +/* + * Expression evaluation occurs with respect to a context. + * he context consists of: + * - a node (the context node) + * - a node list (the context node list) + * - a set of variable bindings + * - a function library + * - the set of namespace declarations in scope for the expression + * Following the switch to hash tables, this need to be trimmed up at + * the next binary incompatible release. + */ + +struct _xmlXPathContext { + xmlDocPtr doc; /* The current document */ + xmlNodePtr node; /* The current node */ + + int nb_variables_unused; /* unused (hash table) */ + int max_variables_unused; /* unused (hash table) */ + xmlHashTablePtr varHash; /* Hash table of defined variables */ + + int nb_types; /* number of defined types */ + int max_types; /* max number of types */ + xmlXPathTypePtr types; /* Array of defined types */ + + int nb_funcs_unused; /* unused (hash table) */ + int max_funcs_unused; /* unused (hash table) */ + xmlHashTablePtr funcHash; /* Hash table of defined funcs */ + + int nb_axis; /* number of defined axis */ + int max_axis; /* max number of axis */ + xmlXPathAxisPtr axis; /* Array of defined axis */ + + /* the namespace nodes of the context node */ + xmlNsPtr *namespaces; /* Array of namespaces */ + int nsNr; /* number of namespace in scope */ + void *user; /* function to free */ + + /* extra variables */ + int contextSize; /* the context size */ + int proximityPosition; /* the proximity position */ + + /* extra stuff for XPointer */ + int xptr; /* it this an XPointer context */ + xmlNodePtr here; /* for here() */ + xmlNodePtr origin; /* for origin() */ + + /* the set of namespace declarations in scope for the expression */ + xmlHashTablePtr nsHash; /* The namespaces hash table */ + void *varLookupFunc; /* variable lookup func */ + void *varLookupData; /* variable lookup data */ + + /* Possibility to link in an extra item */ + void *extra; /* needed for XSLT */ +}; + +/* + * An XPath parser context, it contains pure parsing informations, + * an xmlXPathContext, and the stack of objects. + */ +struct _xmlXPathParserContext { + const xmlChar *cur; /* the current char being parsed */ + const xmlChar *base; /* the full expression */ + + int error; /* error code */ + + xmlXPathContextPtr context; /* the evaluation context */ + xmlXPathObjectPtr value; /* the current value */ + int valueNr; /* number of values stacked */ + int valueMax; /* max number of values stacked */ + xmlXPathObjectPtr *valueTab; /* stack of values */ +}; + +/* + * An XPath function + * The arguments (if any) are popped out of the context stack + * and the result is pushed on the stack. + */ + +typedef void (*xmlXPathFunction) (xmlXPathParserContextPtr ctxt, int nargs); + +/************************************************************************ + * * + * Public API * + * * + ************************************************************************/ + +/** + * Evaluation functions. + */ +void xmlXPathInit (void); +xmlXPathContextPtr xmlXPathNewContext (xmlDocPtr doc); +void xmlXPathFreeContext (xmlXPathContextPtr ctxt); +xmlXPathObjectPtr xmlXPathEval (const xmlChar *str, + xmlXPathContextPtr ctxt); +xmlXPathObjectPtr xmlXPathEvalXPtrExpr (const xmlChar *str, + xmlXPathContextPtr ctxt); +void xmlXPathFreeObject (xmlXPathObjectPtr obj); +xmlXPathObjectPtr xmlXPathEvalExpression (const xmlChar *str, + xmlXPathContextPtr ctxt); +xmlNodeSetPtr xmlXPathNodeSetCreate (xmlNodePtr val); +void xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj); +void xmlXPathFreeNodeSet (xmlNodeSetPtr obj); +xmlXPathObjectPtr xmlXPathObjectCopy (xmlXPathObjectPtr val); +int xmlXPathCmpNodes (xmlNodePtr node1, + xmlNodePtr node2); + + +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_XPATH_H__ */ diff --git a/xpathInternals.h b/xpathInternals.h new file mode 100644 index 00000000..51f6ad55 --- /dev/null +++ b/xpathInternals.h @@ -0,0 +1,236 @@ +/* + * xpath.c: internal interfaces for XML Path Language implementation + * used to build new modules on top of XPath + * + * See COPYRIGHT for the status of this software + * + * Author: Daniel.Veillard@w3.org + */ + +#ifndef __XML_XPATH_INTERNALS_H__ +#define __XML_XPATH_INTERNALS_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/************************************************************************ + * * + * Helpers * + * * + ************************************************************************/ + +#define CHECK_ERROR \ + if (ctxt->error != XPATH_EXPRESSION_OK) return + +#define CHECK_ERROR0 \ + if (ctxt->error != XPATH_EXPRESSION_OK) return(0) + +#define XP_ERROR(X) \ + { xmlXPatherror(ctxt, __FILE__, __LINE__, X); \ + ctxt->error = (X); return; } + +#define XP_ERROR0(X) \ + { xmlXPatherror(ctxt, __FILE__, __LINE__, X); \ + ctxt->error = (X); return(0); } + +#define CHECK_TYPE(typeval) \ + if ((ctxt->value == NULL) || (ctxt->value->type != typeval)) \ + XP_ERROR(XPATH_INVALID_TYPE) + +#define CHECK_ARITY(x) \ + if (nargs != (x)) \ + XP_ERROR(XPATH_INVALID_ARITY); + +#define CAST_TO_STRING \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_STRING)) \ + xmlXPathStringFunction(ctxt, 1); + +#define CAST_TO_NUMBER \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_NUMBER)) \ + xmlXPathNumberFunction(ctxt, 1); + +#define CAST_TO_BOOLEAN \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_BOOLEAN)) \ + xmlXPathBooleanFunction(ctxt, 1); + +/* + * Varibale Lookup forwarding + */ +typedef xmlXPathObjectPtr + (*xmlXPathVariableLookupFunc) (void *ctxt, + const xmlChar *name, + const xmlChar *ns_uri); + +void xmlXPathRegisterVariableLookup (xmlXPathContextPtr ctxt, + xmlXPathVariableLookupFunc f, + void *varCtxt); + +/* + * Error reporting + */ +void xmlXPatherror (xmlXPathParserContextPtr ctxt, + const char *file, + int line, + int no); + +void xmlXPathDebugDumpObject (FILE *output, + xmlXPathObjectPtr cur, + int depth); + +/** + * Extending a context + */ + +int xmlXPathRegisterNs (xmlXPathContextPtr ctxt, + const xmlChar *prefix, + const xmlChar *ns_uri); +const xmlChar * xmlXPathNsLookup (xmlXPathContextPtr ctxt, + const xmlChar *ns_uri); +void xmlXPathRegisteredNsCleanup (xmlXPathContextPtr ctxt); + +int xmlXPathRegisterFunc (xmlXPathContextPtr ctxt, + const xmlChar *name, + xmlXPathFunction f); +int xmlXPathRegisterFuncNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathFunction f); +int xmlXPathRegisterVariable (xmlXPathContextPtr ctxt, + const xmlChar *name, + xmlXPathObjectPtr value); +int xmlXPathRegisterVariableNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathObjectPtr value); +xmlXPathFunction xmlXPathFunctionLookup (xmlXPathContextPtr ctxt, + const xmlChar *name); +xmlXPathFunction xmlXPathFunctionLookupNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +void xmlXPathRegisteredFuncsCleanup(xmlXPathContextPtr ctxt); +xmlXPathObjectPtr xmlXPathVariableLookup (xmlXPathContextPtr ctxt, + const xmlChar *name); +xmlXPathObjectPtr xmlXPathVariableLookupNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +void xmlXPathRegisteredVariablesCleanup(xmlXPathContextPtr ctxt); + +/** + * Utilities to extend XPath + */ +xmlXPathParserContextPtr + xmlXPathNewParserContext (const xmlChar *str, + xmlXPathContextPtr ctxt); +void xmlXPathFreeParserContext (xmlXPathParserContextPtr ctxt); + +/* TODO: remap to xmlXPathValuePop and Push */ +xmlXPathObjectPtr valuePop (xmlXPathParserContextPtr ctxt); +int valuePush (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr value); + +xmlXPathObjectPtr xmlXPathNewString (const xmlChar *val); +xmlXPathObjectPtr xmlXPathNewCString (const char *val); +xmlXPathObjectPtr xmlXPathNewFloat (double val); +xmlXPathObjectPtr xmlXPathNewBoolean (int val); +xmlXPathObjectPtr xmlXPathNewNodeSet (xmlNodePtr val); +xmlXPathObjectPtr xmlXPathNewValueTree (xmlNodePtr val); +void xmlXPathNodeSetAdd (xmlNodeSetPtr cur, + xmlNodePtr val); + + +void xmlXPathIdFunction (xmlXPathParserContextPtr ctxt, + int nargs); +void xmlXPathRoot (xmlXPathParserContextPtr ctxt); +void xmlXPathEvalExpr (xmlXPathParserContextPtr ctxt); +xmlChar * xmlXPathParseName (xmlXPathParserContextPtr ctxt); +xmlChar * xmlXPathParseNCName (xmlXPathParserContextPtr ctxt); + +/* + * Debug + */ +#ifdef LIBXML_DEBUG_ENABLED +double xmlXPathStringEvalNumber(const xmlChar *str); +void xmlXPathDebugDumpObject(FILE *output, xmlXPathObjectPtr cur, int depth); +#endif +/* + * Existing functions + */ + +int xmlXPathEvaluatePredicateResult(xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr res); +void xmlXPathInit(void); +void xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt); +xmlNodeSetPtr xmlXPathNodeSetCreate(xmlNodePtr val); +void xmlXPathNodeSetAdd(xmlNodeSetPtr cur, xmlNodePtr val); +xmlNodeSetPtr xmlXPathNodeSetMerge(xmlNodeSetPtr val1, xmlNodeSetPtr val2); +void xmlXPathNodeSetDel(xmlNodeSetPtr cur, xmlNodePtr val); +void xmlXPathNodeSetRemove(xmlNodeSetPtr cur, int val); +void xmlXPathFreeNodeSet(xmlNodeSetPtr obj); +xmlXPathObjectPtr xmlXPathNewNodeSet(xmlNodePtr val); +xmlXPathObjectPtr xmlXPathNewNodeSetList(xmlNodeSetPtr val); +xmlXPathObjectPtr xmlXPathWrapNodeSet(xmlNodeSetPtr val); +void xmlXPathFreeNodeSetList(xmlXPathObjectPtr obj); + + +xmlXPathObjectPtr xmlXPathNewFloat(double val); +xmlXPathObjectPtr xmlXPathNewBoolean(int val); +xmlXPathObjectPtr xmlXPathNewString(const xmlChar *val); +xmlXPathObjectPtr xmlXPathNewCString(const char *val); +void xmlXPathFreeObject(xmlXPathObjectPtr obj); +xmlXPathContextPtr xmlXPathNewContext(xmlDocPtr doc); +void xmlXPathFreeContext(xmlXPathContextPtr ctxt); + +int xmlXPathEqualValues(xmlXPathParserContextPtr ctxt); +int xmlXPathCompareValues(xmlXPathParserContextPtr ctxt, int inf, int strict); +void xmlXPathValueFlipSign(xmlXPathParserContextPtr ctxt); +void xmlXPathAddValues(xmlXPathParserContextPtr ctxt); +void xmlXPathSubValues(xmlXPathParserContextPtr ctxt); +void xmlXPathMultValues(xmlXPathParserContextPtr ctxt); +void xmlXPathDivValues(xmlXPathParserContextPtr ctxt); +void xmlXPathModValues(xmlXPathParserContextPtr ctxt); + + +/* + * Some of the axis navigation routines + */ +xmlNodePtr xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, xmlNodePtr cur); +xmlNodePtr xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur); +xmlNodePtr xmlXPathNextPrecedingSibling(xmlXPathParserContextPtr ctxt, xmlNodePtr cur); +/* + * The official core of XPath functions + */ +void xmlXPathRoot(xmlXPathParserContextPtr ctxt); +void xmlXPathLastFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathPositionFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathCountFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathLocalNameFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNamespaceURIFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStringLengthFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathConcatFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathContainsFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathStartsWithFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringBeforeFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSubstringAfterFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNormalizeFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathTranslateFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNotFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathTrueFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathFalseFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathLangFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathNumberFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathSumFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathFloorFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathCeilingFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathRoundFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPathBooleanFunction(xmlXPathParserContextPtr ctxt, int nargs); +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_XPATH_INTERNALS_H__ */ diff --git a/xpointer.c b/xpointer.c new file mode 100644 index 00000000..7d2da1b1 --- /dev/null +++ b/xpointer.c @@ -0,0 +1,2903 @@ +/* + * xpointer.c : Code to handle XML Pointer + * + * World Wide Web Consortium Working Draft 03-March-1998 + * http://www.w3.org/TR/2000/CR-xptr-20000607 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifdef WIN32 +#include "win32config.h" +#else +#include "config.h" +#endif + +/** + * TODO: better handling of error cases, the full expression should + * be parsed beforehand instead of a progressive evaluation + * TODO: Access into entities references are not supported now ... + * need a start to be able to pop out of entities refs since + * parent is the endity declaration, not the ref. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef LIBXML_DEBUG_ENABLED +#include +#endif +#include + +#ifdef LIBXML_XPTR_ENABLED + +/* Add support of the xmlns() xpointer scheme to initialize the namespaces */ +#define XPTR_XMLNS_SCHEME + +/* #define DEBUG_RANGES */ + +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#define STRANGE \ + xmlGenericError(xmlGenericErrorContext, \ + "Internal error at %s:%d\n", \ + __FILE__, __LINE__); + +/************************************************************************ + * * + * A few helper functions for child sequences * + * * + ************************************************************************/ + +xmlNodePtr xmlXPtrAdvanceNode(xmlNodePtr cur); +/** + * xmlXPtrGetArity: + * @cur: the node + * + * Returns the number of child for an element, -1 in case of error + */ +int +xmlXPtrGetArity(xmlNodePtr cur) { + int i; + if (cur == NULL) + return(-1); + cur = cur->children; + for (i = 0;cur != NULL;cur = cur->next) { + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + i++; + } + } + return(i); +} + +/** + * xmlXPtrGetIndex: + * @cur: the node + * + * Returns the index of the node in its parent children list, -1 + * in case of error + */ +int +xmlXPtrGetIndex(xmlNodePtr cur) { + int i; + if (cur == NULL) + return(-1); + for (i = 1;cur != NULL;cur = cur->prev) { + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + i++; + } + } + return(i); +} + +/** + * xmlXPtrGetNthChild: + * @cur: the node + * @no: the child number + * + * Returns the @no'th element child of @cur or NULL + */ +xmlNodePtr +xmlXPtrGetNthChild(xmlNodePtr cur, int no) { + int i; + if (cur == NULL) + return(cur); + cur = cur->children; + for (i = 0;i <= no;cur = cur->next) { + if (cur == NULL) + return(cur); + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + i++; + if (i == no) + break; + } + } + return(cur); +} + +/************************************************************************ + * * + * Handling of XPointer specific types * + * * + ************************************************************************/ + +/** + * xmlXPtrCmpPoints: + * @node1: the first node + * @index1: the first index + * @node2: the second node + * @index2: the second index + * + * Compare two points w.r.t document order + * + * Returns -2 in case of error 1 if first point < second point, 0 if + * that's the same point, -1 otherwise + */ +int +xmlXPtrCmpPoints(xmlNodePtr node1, int index1, xmlNodePtr node2, int index2) { + if ((node1 == NULL) || (node2 == NULL)) + return(-2); + /* + * a couple of optimizations which will avoid computations in most cases + */ + if (node1 == node2) { + if (index1 < index2) + return(1); + if (index1 > index2) + return(-1); + return(0); + } + return(xmlXPathCmpNodes(node1, node2)); +} + +/** + * xmlXPtrNewPoint: + * @node: the xmlNodePtr + * @index: the index within the node + * + * Create a new xmlXPathObjectPtr of type point + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewPoint(xmlNodePtr node, int index) { + xmlXPathObjectPtr ret; + + if (node == NULL) + return(NULL); + if (index < 0) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewPoint: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_POINT; + ret->user = (void *) node; + ret->index = index; + return(ret); +} + +/** + * xmlXPtrRangeCheckOrder: + * @range: an object range + * + * Make sure the points in the range are in the right order + */ +void +xmlXPtrRangeCheckOrder(xmlXPathObjectPtr range) { + int tmp; + xmlNodePtr tmp2; + if (range == NULL) + return; + if (range->type != XPATH_RANGE) + return; + if (range->user2 == NULL) + return; + tmp = xmlXPtrCmpPoints(range->user, range->index, + range->user2, range->index2); + if (tmp == -1) { + tmp2 = range->user; + range->user = range->user2; + range->user2 = tmp2; + tmp = range->index; + range->index = range->index2; + range->index2 = tmp; + } +} + +/** + * xmlXPtrRangesEqual: + * @range1: the first range + * @range2: the second range + * + * Compare two ranges + * + * Return 1 if equal, 0 otherwise + */ +int +xmlXPtrRangesEqual(xmlXPathObjectPtr range1, xmlXPathObjectPtr range2) { + if (range1 == range2) + return(1); + if ((range1 == NULL) || (range2 == NULL)) + return(0); + if (range1->type != range2->type) + return(0); + if (range1->type != XPATH_RANGE) + return(0); + if (range1->user != range2->user) + return(0); + if (range1->index != range2->index) + return(0); + if (range1->user2 != range2->user2) + return(0); + if (range1->index2 != range2->index2) + return(0); + return(1); +} + +/** + * xmlXPtrNewRange: + * @start: the starting node + * @startindex: the start index + * @end: the ending point + * @endindex: the ending index + * + * Create a new xmlXPathObjectPtr of type range + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRange(xmlNodePtr start, int startindex, + xmlNodePtr end, int endindex) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + if (startindex < 0) + return(NULL); + if (endindex < 0) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangePoints: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = startindex; + ret->user2 = end; + ret->index2 = endindex; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewRangePoints: + * @start: the starting point + * @end: the ending point + * + * Create a new xmlXPathObjectPtr of type range using 2 Points + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangePoints(xmlXPathObjectPtr start, xmlXPathObjectPtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + if (start->type != XPATH_POINT) + return(NULL); + if (end->type != XPATH_POINT) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangePoints: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start->user; + ret->index = start->index; + ret->user2 = end->user; + ret->index2 = end->index; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewRangePointNode: + * @start: the starting point + * @end: the ending node + * + * Create a new xmlXPathObjectPtr of type range from a point to a node + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangePointNode(xmlXPathObjectPtr start, xmlNodePtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + if (start->type != XPATH_POINT) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangePointNode: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start->user; + ret->index = start->index; + ret->user2 = end; + ret->index2 = -1; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewRangeNodePoint: + * @start: the starting node + * @end: the ending point + * + * Create a new xmlXPathObjectPtr of type range from a node to a point + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangeNodePoint(xmlNodePtr start, xmlXPathObjectPtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + if (start->type != XPATH_POINT) + return(NULL); + if (end->type != XPATH_POINT) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangeNodePoint: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = -1; + ret->user2 = end->user; + ret->index2 = end->index; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewRangeNodes: + * @start: the starting node + * @end: the ending node + * + * Create a new xmlXPathObjectPtr of type range using 2 nodes + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangeNodes(xmlNodePtr start, xmlNodePtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangeNodes: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = -1; + ret->user2 = end; + ret->index2 = -1; + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +/** + * xmlXPtrNewCollapsedRange: + * @start: the starting and ending node + * + * Create a new xmlXPathObjectPtr of type range using a single nodes + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewCollapsedRange(xmlNodePtr start) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangeNodes: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = -1; + ret->user2 = NULL; + ret->index2 = -1; + return(ret); +} + +/** + * xmlXPtrNewRangeNodeObject: + * @start: the starting node + * @end: the ending object + * + * Create a new xmlXPathObjectPtr of type range from a not to an object + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewRangeNodeObject(xmlNodePtr start, xmlXPathObjectPtr end) { + xmlXPathObjectPtr ret; + + if (start == NULL) + return(NULL); + if (end == NULL) + return(NULL); + switch (end->type) { + case XPATH_POINT: + break; + case XPATH_NODESET: + /* + * Empty set ... + */ + if (end->nodesetval->nodeNr <= 0) + return(NULL); + break; + default: + TODO + return(NULL); + } + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewRangeNodeObject: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_RANGE; + ret->user = start; + ret->index = -1; + switch (end->type) { + case XPATH_POINT: + ret->user2 = end->user; + ret->index2 = end->index; + case XPATH_NODESET: { + ret->user2 = end->nodesetval->nodeTab[end->nodesetval->nodeNr - 1]; + ret->index2 = -1; + break; + } + default: + STRANGE + return(NULL); + } + xmlXPtrRangeCheckOrder(ret); + return(ret); +} + +#define XML_RANGESET_DEFAULT 10 + +/** + * xmlXPtrLocationSetCreate: + * @val: an initial xmlXPathObjectPtr, or NULL + * + * Create a new xmlLocationSetPtr of type double and of value @val + * + * Returns the newly created object. + */ +xmlLocationSetPtr +xmlXPtrLocationSetCreate(xmlXPathObjectPtr val) { + xmlLocationSetPtr ret; + + ret = (xmlLocationSetPtr) xmlMalloc(sizeof(xmlLocationSet)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetCreate: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlLocationSet)); + if (val != NULL) { + ret->locTab = (xmlXPathObjectPtr *) xmlMalloc(XML_RANGESET_DEFAULT * + sizeof(xmlXPathObjectPtr)); + if (ret->locTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetCreate: out of memory\n"); + return(NULL); + } + memset(ret->locTab, 0 , + XML_RANGESET_DEFAULT * (size_t) sizeof(xmlXPathObjectPtr)); + ret->locMax = XML_RANGESET_DEFAULT; + ret->locTab[ret->locNr++] = val; + } + return(ret); +} + +/** + * xmlXPtrLocationSetAdd: + * @cur: the initial range set + * @val: a new xmlXPathObjectPtr + * + * add a new xmlXPathObjectPtr ot an existing LocationSet + * If the location already exist in the set @val is freed. + */ +void +xmlXPtrLocationSetAdd(xmlLocationSetPtr cur, xmlXPathObjectPtr val) { + int i; + + if (val == NULL) return; + + /* + * check against doublons + */ + for (i = 0;i < cur->locNr;i++) { + if (xmlXPtrRangesEqual(cur->locTab[i], val)) { + xmlXPathFreeObject(val); + return; + } + } + + /* + * grow the locTab if needed + */ + if (cur->locMax == 0) { + cur->locTab = (xmlXPathObjectPtr *) xmlMalloc(XML_RANGESET_DEFAULT * + sizeof(xmlXPathObjectPtr)); + if (cur->locTab == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetAdd: out of memory\n"); + return; + } + memset(cur->locTab, 0 , + XML_RANGESET_DEFAULT * (size_t) sizeof(xmlXPathObjectPtr)); + cur->locMax = XML_RANGESET_DEFAULT; + } else if (cur->locNr == cur->locMax) { + xmlXPathObjectPtr *temp; + + cur->locMax *= 2; + temp = (xmlXPathObjectPtr *) xmlRealloc(cur->locTab, cur->locMax * + sizeof(xmlXPathObjectPtr)); + if (temp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetAdd: out of memory\n"); + return; + } + cur->locTab = temp; + } + cur->locTab[cur->locNr++] = val; +} + +/** + * xmlXPtrLocationSetMerge: + * @val1: the first LocationSet + * @val2: the second LocationSet + * + * Merges two rangesets, all ranges from @val2 are added to @val1 + * + * Returns val1 once extended or NULL in case of error. + */ +xmlLocationSetPtr +xmlXPtrLocationSetMerge(xmlLocationSetPtr val1, xmlLocationSetPtr val2) { + int i; + + if (val1 == NULL) return(NULL); + if (val2 == NULL) return(val1); + + /* + * !!!!! this can be optimized a lot, knowing that both + * val1 and val2 already have unicity of their values. + */ + + for (i = 0;i < val2->locNr;i++) + xmlXPtrLocationSetAdd(val1, val2->locTab[i]); + + return(val1); +} + +/** + * xmlXPtrLocationSetDel: + * @cur: the initial range set + * @val: an xmlXPathObjectPtr + * + * Removes an xmlXPathObjectPtr from an existing LocationSet + */ +void +xmlXPtrLocationSetDel(xmlLocationSetPtr cur, xmlXPathObjectPtr val) { + int i; + + if (cur == NULL) return; + if (val == NULL) return; + + /* + * check against doublons + */ + for (i = 0;i < cur->locNr;i++) + if (cur->locTab[i] == val) break; + + if (i >= cur->locNr) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrLocationSetDel: Range %s wasn't found in RangeList\n", + val->name); +#endif + return; + } + cur->locNr--; + for (;i < cur->locNr;i++) + cur->locTab[i] = cur->locTab[i + 1]; + cur->locTab[cur->locNr] = NULL; +} + +/** + * xmlXPtrLocationSetRemove: + * @cur: the initial range set + * @val: the index to remove + * + * Removes an entry from an existing LocationSet list. + */ +void +xmlXPtrLocationSetRemove(xmlLocationSetPtr cur, int val) { + if (cur == NULL) return; + if (val >= cur->locNr) return; + cur->locNr--; + for (;val < cur->locNr;val++) + cur->locTab[val] = cur->locTab[val + 1]; + cur->locTab[cur->locNr] = NULL; +} + +/** + * xmlXPtrFreeLocationSet: + * @obj: the xmlLocationSetPtr to free + * + * Free the LocationSet compound (not the actual ranges !). + */ +void +xmlXPtrFreeLocationSet(xmlLocationSetPtr obj) { + int i; + + if (obj == NULL) return; + if (obj->locTab != NULL) { + for (i = 0;i < obj->locNr; i++) { + xmlXPathFreeObject(obj->locTab[i]); + } +#ifdef DEBUG + memset(obj->locTab, 0xB , + (size_t) sizeof(xmlXPathObjectPtr) * obj->locMax); +#endif + xmlFree(obj->locTab); + } +#ifdef DEBUG + memset(obj, 0xB , (size_t) sizeof(xmlLocationSet)); +#endif + xmlFree(obj); +} + +/** + * xmlXPtrNewLocationSetNodes: + * @start: the start NodePtr value + * @end: the end NodePtr value or NULL + * + * Create a new xmlXPathObjectPtr of type LocationSet and initialize + * it with the single range made of the two nodes @start and @end + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewLocationSetNodes(xmlNodePtr start, xmlNodePtr end) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewLocationSetNodes: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_LOCATIONSET; + if (end == NULL) + ret->user = xmlXPtrLocationSetCreate(xmlXPtrNewCollapsedRange(start)); + else + ret->user = xmlXPtrLocationSetCreate(xmlXPtrNewRangeNodes(start,end)); + return(ret); +} + +/** + * xmlXPtrNewLocationSetNodeSet: + * @set: a node set + * + * Create a new xmlXPathObjectPtr of type LocationSet and initialize + * it with all the nodes from @set + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrNewLocationSetNodeSet(xmlNodeSetPtr set) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrNewLocationSetNodes: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_LOCATIONSET; + if (set != NULL) { + int i; + xmlLocationSetPtr newset; + + newset = xmlXPtrLocationSetCreate(NULL); + if (newset == NULL) + return(ret); + + for (i = 0;i < set->nodeNr;i++) + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewCollapsedRange(set->nodeTab[i])); + + ret->user = (void *) newset; + } + return(ret); +} + +/** + * xmlXPtrWrapLocationSet: + * @val: the LocationSet value + * + * Wrap the LocationSet @val in a new xmlXPathObjectPtr + * + * Returns the newly created object. + */ +xmlXPathObjectPtr +xmlXPtrWrapLocationSet(xmlLocationSetPtr val) { + xmlXPathObjectPtr ret; + + ret = (xmlXPathObjectPtr) xmlMalloc(sizeof(xmlXPathObject)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrWrapLocationSet: out of memory\n"); + return(NULL); + } + memset(ret, 0 , (size_t) sizeof(xmlXPathObject)); + ret->type = XPATH_LOCATIONSET; + ret->user = (void *) val; + return(ret); +} + +/************************************************************************ + * * + * The parser * + * * + ************************************************************************/ + +/* + * Macros for accessing the content. Those should be used only by the parser, + * and not exported. + * + * Dirty macros, i.e. one need to make assumption on the context to use them + * + * CUR_PTR return the current pointer to the xmlChar to be parsed. + * CUR returns the current xmlChar value, i.e. a 8 bit value + * in ISO-Latin or UTF-8. + * This should be used internally by the parser + * only to compare to ASCII values otherwise it would break when + * running with UTF-8 encoding. + * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only + * to compare on ASCII based substring. + * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined + * strings within the parser. + * CURRENT Returns the current char value, with the full decoding of + * UTF-8 if we are using this mode. It returns an int. + * NEXT Skip to the next character, this does the proper decoding + * in UTF-8 mode. It also pop-up unfinished entities on the fly. + * It returns the pointer to the current xmlChar. + */ + +#define CUR (*ctxt->cur) +#define SKIP(val) ctxt->cur += (val) +#define NXT(val) ctxt->cur[(val)] +#define CUR_PTR ctxt->cur + +#define SKIP_BLANKS \ + while (IS_BLANK(*(ctxt->cur))) NEXT + +#define CURRENT (*ctxt->cur) +#define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur) + +/* + * xmlXPtrGetChildNo: + * @ctxt: the XPointer Parser context + * @index: the child number + * + * Move the current node of the nodeset on the stack to the + * given child if found + */ +void +xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt, int index) { + xmlNodePtr cur = NULL; + xmlXPathObjectPtr obj; + xmlNodeSetPtr oldset; + + CHECK_TYPE(XPATH_NODESET); + obj = valuePop(ctxt); + oldset = obj->nodesetval; + if ((index <= 0) || (oldset == NULL) || (oldset->nodeNr != 1)) { + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPathNewNodeSet(NULL)); + return; + } + cur = xmlXPtrGetNthChild(oldset->nodeTab[0], index); + if (cur == NULL) { + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPathNewNodeSet(NULL)); + return; + } + oldset->nodeTab[0] = cur; + valuePush(ctxt, obj); +} + +/** + * xmlXPtrEvalXPtrPart: + * @ctxt: the XPointer Parser context + * @name: the preparsed Scheme for the XPtrPart + * + * XPtrPart ::= 'xpointer' '(' XPtrExpr ')' + * | Scheme '(' SchemeSpecificExpr ')' + * + * Scheme ::= NCName - 'xpointer' [VC: Non-XPointer schemes] + * + * SchemeSpecificExpr ::= StringWithBalancedParens + * + * StringWithBalancedParens ::= + * [^()]* ('(' StringWithBalancedParens ')' [^()]*)* + * [VC: Parenthesis escaping] + * + * XPtrExpr ::= Expr [VC: Parenthesis escaping] + * + * VC: Parenthesis escaping: + * The end of an XPointer part is signaled by the right parenthesis ")" + * character that is balanced with the left parenthesis "(" character + * that began the part. Any unbalanced parenthesis character inside the + * expression, even within literals, must be escaped with a circumflex (^) + * character preceding it. If the expression contains any literal + * occurrences of the circumflex, each must be escaped with an additional + * circumflex (that is, ^^). If the unescaped parentheses in the expression + * are not balanced, a syntax error results. + * + * Parse and evaluate an XPtrPart. Basically it generates the unescaped + * string and if the scheme is 'xpointer' it will call the XPath interprter. + * + * TODO: there is no new scheme registration mechanism + */ + +void +xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt, xmlChar *name) { + xmlChar *buffer, *cur; + int len; + int level; + + if (name == NULL) + name = xmlXPathParseName(ctxt); + if (name == NULL) + XP_ERROR(XPATH_EXPR_ERROR); + + if (CUR != '(') + XP_ERROR(XPATH_EXPR_ERROR); + NEXT; + level = 1; + + len = xmlStrlen(ctxt->cur); + len++; + buffer = (xmlChar *) xmlMalloc(len * sizeof (xmlChar)); + if (buffer == NULL) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrEvalXPtrPart: out of memory\n"); + return; + } + + cur = buffer; + while (CUR != 0) { + if (CUR == ')') { + level--; + if (level == 0) { + NEXT; + break; + } + *cur++ = CUR; + } else if (CUR == '(') { + level++; + *cur++ = CUR; + } else if (CUR == '^') { + NEXT; + if ((CUR == ')') || (CUR == '(') || (CUR == '^')) { + *cur++ = CUR; + } else { + *cur++ = '^'; + *cur++ = CUR; + } + } else { + *cur++ = CUR; + } + NEXT; + } + *cur = 0; + + if ((level != 0) && (CUR == 0)) { + xmlFree(buffer); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + + if (xmlStrEqual(name, (xmlChar *) "xpointer")) { + const xmlChar *left = CUR_PTR; + + CUR_PTR = buffer; + xmlXPathRoot(ctxt); + xmlXPathEvalExpr(ctxt); + CUR_PTR=left; +#ifdef XPTR_XMLNS_SCHEME + } else if (xmlStrEqual(name, (xmlChar *) "xmlns")) { + const xmlChar *left = CUR_PTR; + xmlChar *prefix; + xmlChar *URI; + xmlURIPtr value; + + CUR_PTR = buffer; + prefix = xmlXPathParseNCName(ctxt); + if (prefix == NULL) { + xmlFree(buffer); + xmlFree(name); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + SKIP_BLANKS; + if (CUR != '=') { + xmlFree(prefix); + xmlFree(buffer); + xmlFree(name); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + NEXT; + SKIP_BLANKS; + /* @@ check escaping in the XPointer WD */ + + value = xmlParseURI((const char *)ctxt->cur); + if (value == NULL) { + xmlFree(prefix); + xmlFree(buffer); + xmlFree(name); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + URI = xmlSaveUri(value); + xmlFreeURI(value); + if (URI == NULL) { + xmlFree(prefix); + xmlFree(buffer); + xmlFree(name); + XP_ERROR(XPATH_MEMORY_ERROR); + } + + xmlXPathRegisterNs(ctxt->context, prefix, URI); + CUR_PTR = left; +#endif /* XPTR_XMLNS_SCHEME */ + } else { + xmlGenericError(xmlGenericErrorContext, + "unsupported scheme '%s'\n", name); + } + xmlFree(buffer); + xmlFree(name); +} + +/** + * xmlXPtrEvalFullXPtr: + * @ctxt: the XPointer Parser context + * @name: the preparsed Scheme for the first XPtrPart + * + * FullXPtr ::= XPtrPart (S? XPtrPart)* + * + * As the specs says: + * ----------- + * When multiple XPtrParts are provided, they must be evaluated in + * left-to-right order. If evaluation of one part fails, the nexti + * is evaluated. The following conditions cause XPointer part failure: + * + * - An unknown scheme + * - A scheme that does not locate any sub-resource present in the resource + * - A scheme that is not applicable to the media type of the resource + * + * The XPointer application must consume a failed XPointer part and + * attempt to evaluate the next one, if any. The result of the first + * XPointer part whose evaluation succeeds is taken to be the fragment + * located by the XPointer as a whole. If all the parts fail, the result + * for the XPointer as a whole is a sub-resource error. + * ----------- + * + * Parse and evaluate a Full XPtr i.e. possibly a cascade of XPath based + * expressions or other shemes. + */ +void +xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt, xmlChar *name) { + if (name == NULL) + name = xmlXPathParseName(ctxt); + if (name == NULL) + XP_ERROR(XPATH_EXPR_ERROR); + while (name != NULL) { + xmlXPtrEvalXPtrPart(ctxt, name); + + /* in case of syntax error, break here */ + if (ctxt->error != XPATH_EXPRESSION_OK) + return; + + /* + * If the returned value is a non-empty nodeset + * or location set, return here. + */ + if (ctxt->value != NULL) { + xmlXPathObjectPtr obj = ctxt->value; + + switch (obj->type) { + case XPATH_LOCATIONSET: { + xmlLocationSetPtr loc = ctxt->value->user; + if ((loc != NULL) && (loc->locNr > 0)) + return; + break; + } + case XPATH_NODESET: { + xmlNodeSetPtr loc = ctxt->value->nodesetval; + if ((loc != NULL) && (loc->nodeNr > 0)) + return; + break; + } + default: + break; + } + + /* + * Evaluating to improper values is equivalent to + * a sub-resource error, clean-up the stack + */ + do { + obj = valuePop(ctxt); + if (obj != NULL) { + xmlXPathFreeObject(obj); + } + } while (obj != NULL); + } + + /* + * Is there another XPoointer part. + */ + SKIP_BLANKS; + name = xmlXPathParseName(ctxt); + } +} + +/** + * xmlXPtrEvalChildSeq: + * @ctxt: the XPointer Parser context + * @name: a possible ID name of the child sequence + * + * ChildSeq ::= '/1' ('/' [0-9]*)* + * | Name ('/' [0-9]*)+ + * + * Parse and evaluate a Child Sequence. This routine also handle the + * case of a Bare Name used to get a document ID. + */ +void +xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name) { + /* + * XPointer don't allow by syntax to adress in mutirooted trees + * this might prove useful in some cases, warn about it. + */ + if ((name == NULL) && (CUR == '/') && (NXT(1) != '1')) { + xmlGenericError(xmlGenericErrorContext, + "warning: ChildSeq not starting by /1\n"); + } + + if (name != NULL) { + valuePush(ctxt, xmlXPathNewString(name)); + xmlFree(name); + xmlXPathIdFunction(ctxt, 1); + CHECK_ERROR; + } + + while (CUR == '/') { + int child = 0; + NEXT; + + while ((CUR >= '0') && (CUR <= '9')) { + child = child * 10 + (CUR - '0'); + NEXT; + } + xmlXPtrGetChildNo(ctxt, child); + } +} + + +/** + * xmlXPtrEvalXPointer: + * @ctxt: the XPointer Parser context + * + * XPointer ::= Name + * | ChildSeq + * | FullXPtr + * + * Parse and evaluate an XPointer + */ +void +xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt) { + SKIP_BLANKS; + if (CUR == '/') { + xmlXPathRoot(ctxt); + xmlXPtrEvalChildSeq(ctxt, NULL); + } else { + xmlChar *name; + + name = xmlXPathParseName(ctxt); + if (name == NULL) + XP_ERROR(XPATH_EXPR_ERROR); + if (CUR == '(') { + xmlXPtrEvalFullXPtr(ctxt, name); + /* Short evaluation */ + return; + } else { + /* this handle both Bare Names and Child Sequences */ + xmlXPtrEvalChildSeq(ctxt, name); + } + } + SKIP_BLANKS; + if (CUR != 0) + XP_ERROR(XPATH_EXPR_ERROR); +} + + +/************************************************************************ + * * + * General routines * + * * + ************************************************************************/ + +void xmlXPtrRangeToFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrStringRangeFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrStartPointFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrEndPointFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrHereFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrOriginFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrRangeInsideFunction(xmlXPathParserContextPtr ctxt, int nargs); +void xmlXPtrRangeFunction(xmlXPathParserContextPtr ctxt, int nargs); + +/** + * xmlXPtrNewContext: + * @doc: the XML document + * @here: the node that directly contains the XPointer being evaluated or NULL + * @origin: the element from which a user or program initiated traversal of + * the link, or NULL. + * + * Create a new XPointer context + * + * Returns the xmlXPathContext just allocated. + */ +xmlXPathContextPtr +xmlXPtrNewContext(xmlDocPtr doc, xmlNodePtr here, xmlNodePtr origin) { + xmlXPathContextPtr ret; + + ret = xmlXPathNewContext(doc); + if (ret == NULL) + return(ret); + ret->xptr = 1; + ret->here = here; + ret->origin = origin; + + xmlXPathRegisterFunc(ret, (xmlChar *)"range-to", + xmlXPtrRangeToFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"range", + xmlXPtrRangeFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"range-inside", + xmlXPtrRangeInsideFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"string-range", + xmlXPtrStringRangeFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"start-point", + xmlXPtrStartPointFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"end-point", + xmlXPtrEndPointFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)"here", + xmlXPtrHereFunction); + xmlXPathRegisterFunc(ret, (xmlChar *)" origin", + xmlXPtrOriginFunction); + + return(ret); +} + +/** + * xmlXPtrEval: + * @str: the XPointer expression + * @ctx: the XPointer context + * + * Evaluate the XPath Location Path in the given context. + * + * Returns the xmlXPathObjectPtr resulting from the eveluation or NULL. + * the caller has to free the object. + */ +xmlXPathObjectPtr +xmlXPtrEval(const xmlChar *str, xmlXPathContextPtr ctx) { + xmlXPathParserContextPtr ctxt; + xmlXPathObjectPtr res = NULL, tmp; + xmlXPathObjectPtr init = NULL; + int stack = 0; + + xmlXPathInit(); + + if ((ctx == NULL) || (str == NULL)) + return(NULL); + + ctxt = xmlXPathNewParserContext(str, ctx); + /* TAG:9999 + if (ctx->node != NULL) { + init = xmlXPathNewNodeSet(ctx->node); + valuePush(ctxt, init); + } + */ + xmlXPtrEvalXPointer(ctxt); + + if ((ctxt->value != NULL) && + (ctxt->value->type != XPATH_NODESET) && + (ctxt->value->type != XPATH_LOCATIONSET)) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrEval: evaluation failed to return a node set\n"); + } else { + res = valuePop(ctxt); + } + + do { + tmp = valuePop(ctxt); + if (tmp != NULL) { + if (tmp != init) { + if (tmp->type == XPATH_NODESET) { + /* + * Evaluation may push a root nodeset which is unused + */ + xmlNodeSetPtr set; + set = tmp->nodesetval; + if ((set->nodeNr != 1) || + (set->nodeTab[0] != (xmlNodePtr) ctx->doc)) + stack++; + } else + stack++; + } + xmlXPathFreeObject(tmp); + } + } while (tmp != NULL); + if (stack != 0) { + xmlGenericError(xmlGenericErrorContext, + "xmlXPtrEval: %d object left on the stack\n", + stack); + } + if (ctxt->error != XPATH_EXPRESSION_OK) { + xmlXPathFreeObject(res); + res = NULL; + } + + xmlXPathFreeParserContext(ctxt); + return(res); +} + +/** + * xmlXPtrBuildRangeNodeList: + * @range: a range object + * + * Build a node list tree copy of the range + * + * Returns an xmlNodePtr list or NULL. + * the caller has to free the node tree. + */ +xmlNodePtr +xmlXPtrBuildRangeNodeList(xmlXPathObjectPtr range) { + /* pointers to generated nodes */ + xmlNodePtr list = NULL, last = NULL, parent = NULL, tmp; + /* pointers to traversal nodes */ + xmlNodePtr start, cur, end; + int index, index2; + + if (range == NULL) + return(NULL); + if (range->type != XPATH_RANGE) + return(NULL); + start = (xmlNodePtr) range->user; + + if (start == NULL) + return(NULL); + end = range->user2; + if (end == NULL) + return(xmlCopyNode(start, 1)); + + cur = start; + index = range->index; + index2 = range->index2; + while (cur != NULL) { + if (cur == end) { + if (cur->type == XML_TEXT_NODE) { + const xmlChar *content = cur->content; + int len; + + if (content == NULL) { + tmp = xmlNewTextLen(NULL, 0); + } else { + len = index2; + if ((cur == start) && (index > 1)) { + content += (index - 1); + len -= (index - 1); + index = 0; + } else { + len = index2; + } + tmp = xmlNewTextLen(content, len); + } + /* single sub text node selection */ + if (list == NULL) + return(tmp); + /* prune and return full set */ + if (last != NULL) + xmlAddNextSibling(last, tmp); + else + xmlAddChild(parent, tmp); + return(list); + } else { + tmp = xmlCopyNode(cur, 0); + if (list == NULL) + list = tmp; + else { + if (last != NULL) + xmlAddNextSibling(last, tmp); + else + xmlAddChild(parent, tmp); + } + last = NULL; + parent = tmp; + + if (index2 > 1) { + end = xmlXPtrGetNthChild(cur, index2 - 1); + index2 = 0; + } + if ((cur == start) && (index > 1)) { + cur = xmlXPtrGetNthChild(cur, index - 1); + index = 0; + } else { + cur = cur->children; + } + /* + * Now gather the remaining nodes from cur to end + */ + continue; /* while */ + } + } else if ((cur == start) && + (list == NULL) /* looks superfluous but ... */ ) { + if (cur->type == XML_TEXT_NODE) { + const xmlChar *content = cur->content; + + if (content == NULL) { + tmp = xmlNewTextLen(NULL, 0); + } else { + if (index > 1) { + content += (index - 1); + } + tmp = xmlNewText(content); + } + last = list = tmp; + } else { + if ((cur == start) && (index > 1)) { + tmp = xmlCopyNode(cur, 0); + list = tmp; + parent = tmp; + last = NULL; + cur = xmlXPtrGetNthChild(cur, index - 1); + index = 0; + /* + * Now gather the remaining nodes from cur to end + */ + continue; /* while */ + } + tmp = xmlCopyNode(cur, 1); + list = tmp; + parent = NULL; + last = tmp; + } + } else { + tmp = NULL; + switch (cur->type) { + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_NODE: + /* Do not copy DTD informations */ + break; + case XML_ENTITY_DECL: + TODO /* handle csossing entities -> stack needed */ + break; + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + /* don't consider it part of the tree content */ + break; + case XML_ATTRIBUTE_NODE: + /* Humm, should not happen ! */ + STRANGE + break; + default: + tmp = xmlCopyNode(cur, 1); + break; + } + if (tmp != NULL) { + if ((list == NULL) || ((last == NULL) && (parent == NULL))) { + STRANGE + return(NULL); + } + if (last != NULL) + xmlAddNextSibling(last, tmp); + else { + xmlAddChild(parent, tmp); + last = tmp; + } + } + } + /* + * Skip to next node in document order + */ + if ((list == NULL) || ((last == NULL) && (parent == NULL))) { + STRANGE + return(NULL); + } + cur = xmlXPtrAdvanceNode(cur); + } + return(list); +} + +/** + * xmlXPtrBuildNodeList: + * @obj: the XPointer result from the evaluation. + * + * Build a node list tree copy of the XPointer result. + * + * Returns an xmlNodePtr list or NULL. + * the caller has to free the node tree. + */ +xmlNodePtr +xmlXPtrBuildNodeList(xmlXPathObjectPtr obj) { + xmlNodePtr list = NULL, last = NULL; + int i; + + if (obj == NULL) + return(NULL); + switch (obj->type) { + case XPATH_NODESET: { + xmlNodeSetPtr set = obj->nodesetval; + if (set == NULL) + return(NULL); + for (i = 0;i < set->nodeNr;i++) { + if (last == NULL) + list = last = xmlCopyNode(set->nodeTab[i], 1); + else { + xmlAddNextSibling(last, xmlCopyNode(set->nodeTab[i], 1)); + if (last->next != NULL) + last = last->next; + } + } + break; + } + case XPATH_LOCATIONSET: { + xmlLocationSetPtr set = (xmlLocationSetPtr) obj->user; + if (set == NULL) + return(NULL); + for (i = 0;i < set->locNr;i++) { + if (last == NULL) + list = last = xmlXPtrBuildNodeList(set->locTab[i]); + else + xmlAddNextSibling(last, + xmlXPtrBuildNodeList(set->locTab[i])); + if (last != NULL) { + while (last->next != NULL) + last = last->next; + } + } + break; + } + case XPATH_RANGE: + return(xmlXPtrBuildRangeNodeList(obj)); + case XPATH_POINT: + return(xmlCopyNode(obj->user, 0)); + default: + break; + } + return(list); +} + +/************************************************************************ + * * + * XPointer functions * + * * + ************************************************************************/ + +/** + * xmlXPtrNbLocChildren: + * @node: an xmlNodePtr + * + * Count the number of location children of @node or the lenght of the + * string value in case of text/PI/Comments nodes + * + * Returns the number of location children + */ +int +xmlXPtrNbLocChildren(xmlNodePtr node) { + int ret = 0; + if (node == NULL) + return(-1); + switch (node->type) { + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_ELEMENT_NODE: + node = node->children; + while (node != NULL) { + if (node->type == XML_ELEMENT_NODE) + ret++; + node = node->next; + } + break; + case XML_ATTRIBUTE_NODE: + return(-1); + + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: +#ifndef XML_USE_BUFFER_CONTENT + ret = xmlStrlen(node->content); +#else + ret = xmlBufferLength(node->content); +#endif + break; + default: + return(-1); + } + return(ret); +} + +/** + * xmlXPtrHereFunction: + * @ctxt: the XPointer Parser context + * + * Function implementing here() operation + * as described in 5.4.3 + */ +void +xmlXPtrHereFunction(xmlXPathParserContextPtr ctxt, int nargs) { + if (ctxt->context->here == NULL) + XP_ERROR(XPTR_SYNTAX_ERROR); + + valuePush(ctxt, xmlXPtrNewLocationSetNodes(ctxt->context->here, NULL)); +} + +/** + * xmlXPtrOriginFunction: + * @ctxt: the XPointer Parser context + * + * Function implementing origin() operation + * as described in 5.4.3 + */ +void +xmlXPtrOriginFunction(xmlXPathParserContextPtr ctxt, int nargs) { + if (ctxt->context->origin == NULL) + XP_ERROR(XPTR_SYNTAX_ERROR); + + valuePush(ctxt, xmlXPtrNewLocationSetNodes(ctxt->context->origin, NULL)); +} + +/** + * xmlXPtrStartPointFunction: + * @ctxt: the XPointer Parser context + * + * Function implementing start-point() operation + * as described in 5.4.3 + * ---------------- + * location-set start-point(location-set) + * + * For each location x in the argument location-set, start-point adds a + * location of type point to the result location-set. That point represents + * the start point of location x and is determined by the following rules: + * + * - If x is of type point, the start point is x. + * - If x is of type range, the start point is the start point of x. + * - If x is of type root, element, text, comment, or processing instruction, + * - the container node of the start point is x and the index is 0. + * - If x is of type attribute or namespace, the function must signal a + * syntax error. + * ---------------- + * + */ +void +xmlXPtrStartPointFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr tmp, obj, point; + xmlLocationSetPtr newset = NULL; + xmlLocationSetPtr oldset = NULL; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + obj = valuePop(ctxt); + if (obj->type == XPATH_NODESET) { + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(obj->nodesetval); + xmlXPathFreeObject(obj); + obj = tmp; + } + + newset = xmlXPtrLocationSetCreate(NULL); + if (newset == NULL) { + xmlXPathFreeObject(obj); + XP_ERROR(XPATH_MEMORY_ERROR); + } + oldset = (xmlLocationSetPtr) obj->user; + if (oldset != NULL) { + int i; + + for (i = 0; i < oldset->locNr; i++) { + tmp = oldset->locTab[i]; + if (tmp == NULL) + continue; + point = NULL; + switch (tmp->type) { + case XPATH_POINT: + point = xmlXPtrNewPoint(tmp->user, tmp->index); + break; + case XPATH_RANGE: { + xmlNodePtr node = tmp->user; + if (node != NULL) { + if (node->type == XML_ATTRIBUTE_NODE) { + /* TODO: Namespace Nodes ??? */ + xmlXPathFreeObject(obj); + xmlXPtrFreeLocationSet(newset); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + point = xmlXPtrNewPoint(node, tmp->index); + } + break; + } + default: + /*** Should we raise an error ? + xmlXPathFreeObject(obj); + xmlXPathFreeObject(newset); + XP_ERROR(XPATH_INVALID_TYPE) + ***/ + break; + } + if (point != NULL) + xmlXPtrLocationSetAdd(newset, point); + } + } + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); +} + +/** + * xmlXPtrEndPointFunction: + * @ctxt: the XPointer Parser context + * + * Function implementing end-point() operation + * as described in 5.4.3 + * ---------------------------- + * location-set end-point(location-set) + * + * For each location x in the argument location-set, end-point adds a + * location of type point to the result location-set. That point representsi + * the end point of location x and is determined by the following rules: + * + * - If x is of type point, the resulting point is x. + * - If x is of type range, the resulting point is the end point of x. + * - If x is of type root or element, the container node of the resulting + * point is x and the index is the number of location children of x. + * - If x is of type text, comment, or processing instruction, the container + * node of the resulting point is x and the index is the length of thei + * string-value of x. + * - If x is of type attribute or namespace, the function must signal a + * syntax error. + * ---------------------------- + */ +void +xmlXPtrEndPointFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr tmp, obj, point; + xmlLocationSetPtr newset = NULL; + xmlLocationSetPtr oldset = NULL; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + obj = valuePop(ctxt); + if (obj->type == XPATH_NODESET) { + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(obj->nodesetval); + xmlXPathFreeObject(obj); + obj = tmp; + } + + newset = xmlXPtrLocationSetCreate(NULL); + oldset = (xmlLocationSetPtr) obj->user; + if (oldset != NULL) { + int i; + + for (i = 0; i < oldset->locNr; i++) { + tmp = oldset->locTab[i]; + if (tmp == NULL) + continue; + point = NULL; + switch (tmp->type) { + case XPATH_POINT: + point = xmlXPtrNewPoint(tmp->user, tmp->index); + break; + case XPATH_RANGE: { + xmlNodePtr node = tmp->user2; + if (node != NULL) { + if (node->type == XML_ATTRIBUTE_NODE) { + /* TODO: Namespace Nodes ??? */ + xmlXPathFreeObject(obj); + xmlXPtrFreeLocationSet(newset); + XP_ERROR(XPTR_SYNTAX_ERROR); + } + point = xmlXPtrNewPoint(node, tmp->index2); + } else if (tmp->user == NULL) { + point = xmlXPtrNewPoint(node, + xmlXPtrNbLocChildren(node)); + } + break; + } + default: + /*** Should we raise an error ? + xmlXPathFreeObject(obj); + xmlXPathFreeObject(newset); + XP_ERROR(XPATH_INVALID_TYPE) + ***/ + break; + } + if (point != NULL) + xmlXPtrLocationSetAdd(newset, point); + } + } + xmlXPathFreeObject(obj); + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); +} + + +/** + * xmlXPtrCoveringRange: + * @ctxt: the XPointer Parser context + * @loc: the location for which the covering range must be computed + * + * A covering range is a range that wholly encompasses a location + * Section 5.3.3. Covering Ranges for All Location Types + * http://www.w3.org/TR/xptr#N2267 + * + * Returns a new location or NULL in case of error + */ +xmlXPathObjectPtr +xmlXPtrCoveringRange(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr loc) { + if (loc == NULL) + return(NULL); + if ((ctxt == NULL) || (ctxt->context == NULL) || + (ctxt->context->doc == NULL)) + return(NULL); + switch (loc->type) { + case XPATH_POINT: + return(xmlXPtrNewRange(loc->user, loc->index, + loc->user, loc->index)); + case XPATH_RANGE: + if (loc->user2 != NULL) { + return(xmlXPtrNewRange(loc->user, loc->index, + loc->user2, loc->index2)); + } else { + xmlNodePtr node = (xmlNodePtr) loc->user; + if (node == (xmlNodePtr) ctxt->context->doc) { + return(xmlXPtrNewRange(node, 0, node, + xmlXPtrGetArity(node))); + } else { + switch (node->type) { + case XML_ATTRIBUTE_NODE: + /* !!! our model is slightly different than XPath */ + return(xmlXPtrNewRange(node, 0, node, + xmlXPtrGetArity(node))); + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: { + int index = xmlXPtrGetIndex(node); + + node = node->parent; + return(xmlXPtrNewRange(node, index - 1, + node, index + 1)); + } + default: + return(NULL); + } + } + } + default: + TODO /* missed one case ??? */ + } + return(NULL); +} + +/** + * xmlXPtrRangeFunction: + * @ctxt: the XPointer Parser context + * + * Function implementing the range() function 5.4.3 + * location-set range(location-set ) + * + * The range function returns ranges covering the locations in + * the argument location-set. For each location x in the argument + * location-set, a range location representing the covering range of + * x is added to the result location-set. + */ +void +xmlXPtrRangeFunction(xmlXPathParserContextPtr ctxt, int nargs) { + int i; + xmlXPathObjectPtr set; + xmlLocationSetPtr oldset; + xmlLocationSetPtr newset; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + set = valuePop(ctxt); + if (set->type == XPATH_NODESET) { + xmlXPathObjectPtr tmp; + + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(set->nodesetval); + xmlXPathFreeObject(set); + set = tmp; + } + oldset = (xmlLocationSetPtr) set->user; + + /* + * The loop is to compute the covering range for each item and add it + */ + newset = xmlXPtrLocationSetCreate(NULL); + for (i = 0;i < oldset->locNr;i++) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrCoveringRange(ctxt, oldset->locTab[i])); + } + + /* + * Save the new value and cleanup + */ + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + xmlXPathFreeObject(set); +} + +/** + * xmlXPtrInsideRange: + * @ctxt: the XPointer Parser context + * @loc: the location for which the inside range must be computed + * + * A inside range is a range described in the range-inside() description + * + * Returns a new location or NULL in case of error + */ +xmlXPathObjectPtr +xmlXPtrInsideRange(xmlXPathParserContextPtr ctxt, xmlXPathObjectPtr loc) { + if (loc == NULL) + return(NULL); + if ((ctxt == NULL) || (ctxt->context == NULL) || + (ctxt->context->doc == NULL)) + return(NULL); + switch (loc->type) { + case XPATH_POINT: { + xmlNodePtr node = (xmlNodePtr) loc->user; + switch (node->type) { + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: { + if (node->content == NULL) { + return(xmlXPtrNewRange(node, 0, node, 0)); + } else { +#ifndef XML_USE_BUFFER_CONTENT + return(xmlXPtrNewRange(node, 0, node, + xmlStrlen(node->content))); +#else + return(xmlXPtrNewRange(node, 0, node, + xmlBufferLength(node->content))); +#endif + } + } + case XML_ATTRIBUTE_NODE: + case XML_ELEMENT_NODE: + case XML_ENTITY_REF_NODE: + case XML_DOCUMENT_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: { + return(xmlXPtrNewRange(node, 0, node, + xmlXPtrGetArity(node))); + } + default: + return(NULL); + } + return(NULL); + } + case XPATH_RANGE: { + xmlNodePtr node = (xmlNodePtr) loc->user; + if (loc->user2 != NULL) { + return(xmlXPtrNewRange(node, loc->index, + loc->user2, loc->index2)); + } else { + switch (node->type) { + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: { + if (node->content == NULL) { + return(xmlXPtrNewRange(node, 0, node, 0)); + } else { +#ifndef XML_USE_BUFFER_CONTENT + return(xmlXPtrNewRange(node, 0, node, + xmlStrlen(node->content))); +#else + return(xmlXPtrNewRange(node, 0, node, + xmlBufferLength(node->content))); +#endif + } + } + case XML_ATTRIBUTE_NODE: + case XML_ELEMENT_NODE: + case XML_ENTITY_REF_NODE: + case XML_DOCUMENT_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: { + return(xmlXPtrNewRange(node, 0, node, + xmlXPtrGetArity(node))); + } + default: + return(NULL); + } + return(NULL); + } + } + default: + TODO /* missed one case ??? */ + } + return(NULL); +} + +/** + * xmlXPtrRangeInsideFunction: + * @ctxt: the XPointer Parser context + * + * Function implementing the range-inside() function 5.4.3 + * location-set range-inside(location-set ) + * + * The range-inside function returns ranges covering the contents of + * the locations in the argument location-set. For each location x in + * the argument location-set, a range location is added to the result + * location-set. If x is a range location, then x is added to the + * result location-set. If x is not a range location, then x is used + * as the container location of the start and end points of the range + * location to be added; the index of the start point of the range is + * zero; if the end point is a character point then its index is the + * length of the string-value of x, and otherwise is the number of + * location children of x. + * + */ +void +xmlXPtrRangeInsideFunction(xmlXPathParserContextPtr ctxt, int nargs) { + int i; + xmlXPathObjectPtr set; + xmlLocationSetPtr oldset; + xmlLocationSetPtr newset; + + CHECK_ARITY(1); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + set = valuePop(ctxt); + if (set->type == XPATH_NODESET) { + xmlXPathObjectPtr tmp; + + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(set->nodesetval); + xmlXPathFreeObject(set); + set = tmp; + } + oldset = (xmlLocationSetPtr) set->user; + + /* + * The loop is to compute the covering range for each item and add it + */ + newset = xmlXPtrLocationSetCreate(NULL); + for (i = 0;i < oldset->locNr;i++) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrInsideRange(ctxt, oldset->locTab[i])); + } + + /* + * Save the new value and cleanup + */ + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + xmlXPathFreeObject(set); +} + +/** + * xmlXPtrRangeToFunction: + * @ctxt: the XPointer Parser context + * + * Implement the range-to() XPointer function + */ +void +xmlXPtrRangeToFunction(xmlXPathParserContextPtr ctxt, int nargs) { + xmlXPathObjectPtr range; + const xmlChar *cur; + xmlXPathObjectPtr res, obj; + xmlXPathObjectPtr tmp; + xmlLocationSetPtr newset = NULL; + xmlNodeSetPtr oldset; + int i; + + CHECK_ARITY(1); + /* + * Save the expression pointer since we will have to evaluate + * it multiple times. Initialize the new set. + */ + CHECK_TYPE(XPATH_NODESET); + obj = valuePop(ctxt); + oldset = obj->nodesetval; + ctxt->context->node = NULL; + + cur = ctxt->cur; + newset = xmlXPtrLocationSetCreate(NULL); + + for (i = 0; i < oldset->nodeNr; i++) { + ctxt->cur = cur; + + /* + * Run the evaluation with a node list made of a single item + * in the nodeset. + */ + ctxt->context->node = oldset->nodeTab[i]; + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + + xmlXPathEvalExpr(ctxt); + CHECK_ERROR; + + /* + * The result of the evaluation need to be tested to + * decided whether the filter succeeded or not + */ + res = valuePop(ctxt); + range = xmlXPtrNewRangeNodeObject(oldset->nodeTab[i], res); + if (range != NULL) { + xmlXPtrLocationSetAdd(newset, range); + } + + /* + * Cleanup + */ + if (res != NULL) + xmlXPathFreeObject(res); + if (ctxt->value == tmp) { + res = valuePop(ctxt); + xmlXPathFreeObject(res); + } + + ctxt->context->node = NULL; + } + + /* + * The result is used as the new evaluation set. + */ + xmlXPathFreeObject(obj); + ctxt->context->node = NULL; + ctxt->context->contextSize = -1; + ctxt->context->proximityPosition = -1; + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); +} + +/** + * xmlXPtrAdvanceNode: + * @cur: the node + * + * Advance to the next element or text node in document order + * TODO: add a stack for entering/exiting entities + * + * Returns -1 in case of failure, 0 otherwise + */ +xmlNodePtr +xmlXPtrAdvanceNode(xmlNodePtr cur) { +next: + if (cur == NULL) + return(NULL); + if (cur->children != NULL) { + cur = cur->children ; + goto found; + } + if (cur->next != NULL) { + cur = cur->next; + goto found; + } + do { + cur = cur->parent; + if (cur == NULL) return(NULL); + if (cur->next != NULL) { + cur = cur->next; + goto found; + } + } while (cur != NULL); + +found: + if ((cur->type != XML_ELEMENT_NODE) && + (cur->type != XML_TEXT_NODE) && + (cur->type != XML_DOCUMENT_NODE) && + (cur->type != XML_HTML_DOCUMENT_NODE) && + (cur->type != XML_CDATA_SECTION_NODE)) + goto next; + if (cur->type == XML_ENTITY_REF_NODE) { + TODO + } + return(cur); +} + +/** + * xmlXPtrAdvanceChar: + * @node: the node + * @index: the index + * @bytes: the number of bytes + * + * Advance a point of the associated number of bytes (not UTF8 chars) + * + * Returns -1 in case of failure, 0 otherwise + */ +int +xmlXPtrAdvanceChar(xmlNodePtr *node, int *index, int bytes) { + xmlNodePtr cur; + int pos; + int len; + + if ((node == NULL) || (index == NULL)) + return(-1); + cur = *node; + if (cur == NULL) + return(-1); + pos = *index; + + while (bytes >= 0) { + /* + * First position to the beginning of the first text node + * corresponding to this point + */ + while ((cur != NULL) && + ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE))) { + if (pos > 0) { + cur = xmlXPtrGetNthChild(cur, pos); + pos = 0; + } else { + cur = xmlXPtrAdvanceNode(cur); + pos = 0; + } + } + + if (cur == NULL) { + *node = NULL; + *index = 0; + return(-1); + } + + /* + * if there is no move needed return the current value. + */ + if (pos == 0) pos = 1; + if (bytes == 0) { + *node = cur; + *index = pos; + return(0); + } + /* + * We should have a text (or cdata) node ... + */ + len = 0; + if (cur->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + len = xmlStrlen(cur->content); +#else + len = xmlBufferLength(cur->content); +#endif + } + if (pos > len) { + /* Strange, the index in the text node is greater than it's len */ + STRANGE + pos = len; + } + if (pos + bytes >= len) { + bytes -= (len - pos); + cur = xmlXPtrAdvanceNode(cur); + cur = 0; + } else if (pos + bytes < len) { + pos += bytes; + *node = cur; + *index = pos; + return(0); + } + } + return(-1); +} + +/** + * xmlXPtrMatchString: + * @string: the string to search + * @start: the start textnode + * @startindex: the start index + * @end: the end textnode IN/OUT + * @endindex: the end index IN/OUT + * + * Check whether the document contains @string at the position + * (@start, @startindex) and limited by the (@end, @endindex) point + * + * Returns -1 in case of failure, 0 if not found, 1 if found in which case + * (@start, @startindex) will indicate the position of the beginning + * of the range and (@end, @endindex) will endicate the end + * of the range + */ +int +xmlXPtrMatchString(const xmlChar *string, xmlNodePtr start, int startindex, + xmlNodePtr *end, int *endindex) { + xmlNodePtr cur; + int pos; /* 0 based */ + int len; /* in bytes */ + int stringlen; /* in bytes */ + int match; + + if (string == NULL) + return(-1); + if (start == NULL) + return(-1); + if ((end == NULL) || (endindex == NULL)) + return(-1); + cur = start; + if (cur == NULL) + return(-1); + pos = startindex - 1; + stringlen = xmlStrlen(string); + + while (stringlen > 0) { + if ((cur == *end) && (pos + stringlen > *endindex)) + return(0); + if (cur->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + len = xmlStrlen(cur->content); +#else + len = xmlBufferLength(cur->content); +#endif + if (len >= pos + stringlen) { +#ifndef XML_USE_BUFFER_CONTENT + match = (!xmlStrncmp(&cur->content[pos], string, stringlen)); +#else + len = (!xmlStrncmp(&xmlBufferContent(cur->content)[pos], + string, stringlen)); +#endif + if (match) { +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "found range %d bytes at index %d of ->", + stringlen, pos + 1); + xmlDebugDumpString(stdout, cur->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + *end = cur; + *endindex = pos + stringlen; + return(1); + } else { + return(0); + } + } else { + int sub = len - pos; +#ifndef XML_USE_BUFFER_CONTENT + match = (!xmlStrncmp(&cur->content[pos], string, sub)); +#else + len = (!xmlStrncmp(&xmlBufferContent(cur->content)[pos], + string, sub)); +#endif + if (match) { +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "found subrange %d bytes at index %d of ->", + sub, pos + 1); + xmlDebugDumpString(stdout, cur->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + string = &string[sub]; + stringlen -= sub; + } else { + return(0); + } + } + } + cur = xmlXPtrAdvanceNode(cur); + if (cur == NULL) + return(0); + pos = 0; + } + return(1); +} + +/** + * xmlXPtrSearchString: + * @string: the string to search + * @start: the start textnode IN/OUT + * @startindex: the start index IN/OUT + * @end: the end textnode + * @endindex: the end index + * + * Search the next occurence of @string within the document content + * until the (@end, @endindex) point is reached + * + * Returns -1 in case of failure, 0 if not found, 1 if found in which case + * (@start, @startindex) will indicate the position of the beginning + * of the range and (@end, @endindex) will endicate the end + * of the range + */ +int +xmlXPtrSearchString(const xmlChar *string, xmlNodePtr *start, int *startindex, + xmlNodePtr *end, int *endindex) { + xmlNodePtr cur; + const xmlChar *str; + int pos; /* 0 based */ + int len; /* in bytes */ + int stringlen; /* in bytes */ + xmlChar first; + + if (string == NULL) + return(-1); + if ((start == NULL) || (startindex == NULL)) + return(-1); + if ((end == NULL) || (endindex == NULL)) + return(-1); + cur = *start; + if (cur == NULL) + return(-1); + pos = *startindex - 1; + first = string[0]; + stringlen = xmlStrlen(string); + + while (cur != NULL) { + if (cur->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + len = xmlStrlen(cur->content); +#else + len = xmlBufferLength(cur->content); +#endif + while (pos <= len) { + if (first != 0) { +#ifndef XML_USE_BUFFER_CONTENT + str = xmlStrchr(&cur->content[pos], first); +#else + str = xmlStrchr(&xmlBufferContent(cur->content)[pos], + first); +#endif + if (str != NULL) { + pos = (str - (xmlChar *)(cur->content)); +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "found '%c' at index %d of ->", + first, pos + 1); + xmlDebugDumpString(stdout, cur->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + if (xmlXPtrMatchString(string, cur, pos + 1, + end, endindex)) { + *start = cur; + *startindex = pos + 1; + return(1); + } + pos++; + } else { + pos = len + 1; + } + } else { + /* + * An empty string is considered to match before each + * character of the string-value and after the final + * character. + */ +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "found '' at index %d of ->", + pos + 1); + xmlDebugDumpString(stdout, cur->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + *start = cur; + *startindex = pos + 1; + *end = cur; + *endindex = pos + 1; + return(1); + } + } + } + if ((cur == *end) && (pos >= *endindex)) + return(0); + cur = xmlXPtrAdvanceNode(cur); + if (cur == NULL) + return(0); + pos = 1; + } + return(0); +} + +/** + * xmlXPtrGetLastChar: + * @node: the node + * @index: the index + * + * Computes the point coordinates of the last char of this point + * + * Returns -1 in case of failure, 0 otherwise + */ +int +xmlXPtrGetLastChar(xmlNodePtr *node, int *index) { + xmlNodePtr cur; + int pos, len = 0; + + if ((node == NULL) || (index == NULL)) + return(-1); + cur = *node; + pos = *index; + + if (cur == NULL) + return(-1); + + if ((cur->type == XML_ELEMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE) || + (cur->type == XML_HTML_DOCUMENT_NODE)) { + if (pos > 0) { + cur = xmlXPtrGetNthChild(cur, pos); + pos = 0; + } + } + while (cur != NULL) { + if (cur->last != NULL) + cur = cur->last; + else if (cur->content != NULL) { +#ifndef XML_USE_BUFFER_CONTENT + len = xmlStrlen(cur->content); +#else + len = xmlBufferLength(cur->content); +#endif + break; + } else { + return(-1); + } + } + if (cur == NULL) + return(-1); + *node = cur; + *index = len; + return(0); +} + +/** + * xmlXPtrGetStartPoint: + * @obj: an range + * @node: the resulting node + * @index: the resulting index + * + * read the object and return the start point coordinates. + * + * Returns -1 in case of failure, 0 otherwise + */ +int +xmlXPtrGetStartPoint(xmlXPathObjectPtr obj, xmlNodePtr *node, int *index) { + if ((obj == NULL) || (node == NULL) || (index == NULL)) + return(-1); + + switch (obj->type) { + case XPATH_POINT: + *node = obj->user; + if (obj->index <= 0) + *index = 0; + else + *index = obj->index; + return(0); + case XPATH_RANGE: + *node = obj->user; + if (obj->index <= 0) + *index = 0; + else + *index = obj->index; + return(0); + default: + return(-1); + } + return(-1); +} + +/** + * xmlXPtrGetEndPoint: + * @obj: an range + * @node: the resulting node + * @index: the resulting index + * + * read the object and return the end point coordinates. + * + * Returns -1 in case of failure, 0 otherwise + */ +int +xmlXPtrGetEndPoint(xmlXPathObjectPtr obj, xmlNodePtr *node, int *index) { + if ((obj == NULL) || (node == NULL) || (index == NULL)) + return(-1); + + switch (obj->type) { + case XPATH_POINT: + *node = obj->user; + if (obj->index <= 0) + *index = 0; + else + *index = obj->index; + return(0); + case XPATH_RANGE: + *node = obj->user; + if (obj->index <= 0) + *index = 0; + else + *index = obj->index; + return(0); + default: + return(-1); + } + return(-1); +} + +/** + * xmlXPtrStringRangeFunction: + * @ctxt: the XPointer Parser context + * + * Function implementing the string-range() function + * range as described in 5.4.2 + * + * ------------------------------ + * [Definition: For each location in the location-set argument, + * string-range returns a set of string ranges, a set of substrings in a + * string. Specifically, the string-value of the location is searched for + * substrings that match the string argument, and the resulting location-set + * will contain a range location for each non-overlapping match.] + * An empty string is considered to match before each character of the + * string-value and after the final character. Whitespace in a string + * is matched literally, with no normalization except that provided by + * XML for line ends. The third argument gives the position of the first + * character to be in the resulting range, relative to the start of the + * match. The default value is 1, which makes the range start immediately + * before the first character of the matched string. The fourth argument + * gives the number of characters in the range; the default is that the + * range extends to the end of the matched string. + * + * Element boundaries, as well as entire embedded nodes such as processing + * instructions and comments, are ignored as defined in [XPath]. + * + * If the string in the second argument is not found in the string-value + * of the location, or if a value in the third or fourth argument indicates + * a string that is beyond the beginning or end of the document, the + * expression fails. + * + * The points of the range-locations in the returned location-set will + * all be character points. + * ------------------------------ + */ +void +xmlXPtrStringRangeFunction(xmlXPathParserContextPtr ctxt, int nargs) { + int i, startindex, endindex, fendindex; + xmlNodePtr start, end, fend; + xmlXPathObjectPtr set; + xmlLocationSetPtr oldset; + xmlLocationSetPtr newset; + xmlXPathObjectPtr string; + xmlXPathObjectPtr position = NULL; + xmlXPathObjectPtr number = NULL; + int found, pos, num; + + /* + * Grab the arguments + */ + if ((nargs < 2) || (nargs > 4)) + XP_ERROR(XPATH_INVALID_ARITY); + + if (nargs >= 4) { + CHECK_TYPE(XPATH_NUMBER); + number = valuePop(ctxt); + if (number != NULL) + num = number->floatval; + } + if (nargs >= 3) { + CHECK_TYPE(XPATH_NUMBER); + position = valuePop(ctxt); + if (position != NULL) + pos = position->floatval; + } + CHECK_TYPE(XPATH_STRING); + string = valuePop(ctxt); + if ((ctxt->value == NULL) || + ((ctxt->value->type != XPATH_LOCATIONSET) && + (ctxt->value->type != XPATH_NODESET))) + XP_ERROR(XPATH_INVALID_TYPE) + + set = valuePop(ctxt); + if (set->type == XPATH_NODESET) { + xmlXPathObjectPtr tmp; + + /* + * First convert to a location set + */ + tmp = xmlXPtrNewLocationSetNodeSet(set->nodesetval); + xmlXPathFreeObject(set); + set = tmp; + } + oldset = (xmlLocationSetPtr) set->user; + + /* + * The loop is to search for each element in the location set + * the list of location set corresponding to that search + */ + newset = xmlXPtrLocationSetCreate(NULL); + for (i = 0;i < oldset->locNr;i++) { +#ifdef DEBUG_RANGES + xmlXPathDebugDumpObject(stdout, oldset->locTab[i], 0); +#endif + + xmlXPtrGetStartPoint(oldset->locTab[i], &start, &startindex); + xmlXPtrGetEndPoint(oldset->locTab[i], &end, &endindex); + xmlXPtrAdvanceChar(&start, &startindex, 0); + xmlXPtrGetLastChar(&end, &endindex); + +#ifdef DEBUG_RANGES + xmlGenericError(xmlGenericErrorContext, + "from index %d of ->", startindex); + xmlDebugDumpString(stdout, start->content); + xmlGenericError(xmlGenericErrorContext, "\n"); + xmlGenericError(xmlGenericErrorContext, + "to index %d of ->", endindex); + xmlDebugDumpString(stdout, end->content); + xmlGenericError(xmlGenericErrorContext, "\n"); +#endif + do { + fend = end; + fendindex = endindex; + found = xmlXPtrSearchString(string->stringval, &start, &startindex, + &fend, &fendindex); + if (found == 1) { + if (position == NULL) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewRange(start, startindex, fend, fendindex)); + } else if (xmlXPtrAdvanceChar(&start, &startindex, + pos - 1) == 0) { + if ((number != NULL) && (num > 0)) { + int rindex; + xmlNodePtr rend; + rend = start; + rindex = startindex - 1; + if (xmlXPtrAdvanceChar(&rend, &rindex, + num) == 0) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewRange(start, startindex, + rend, rindex)); + } + } else if ((number != NULL) && (num <= 0)) { + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewRange(start, startindex, + start, startindex)); + } else { + xmlXPtrLocationSetAdd(newset, + xmlXPtrNewRange(start, startindex, + fend, fendindex)); + } + } + start = fend; + startindex = fendindex; + if (string->stringval[0] == 0) + startindex++; + } + } while (found == 1); + } + + /* + * Save the new value and cleanup + */ + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + xmlXPathFreeObject(set); + xmlXPathFreeObject(string); + if (position) xmlXPathFreeObject(position); + if (number) xmlXPathFreeObject(number); +} + +/** + * xmlXPtrEvalRangePredicate: + * @ctxt: the XPointer Parser context + * + * [8] Predicate ::= '[' PredicateExpr ']' + * [9] PredicateExpr ::= Expr + * + * Evaluate a predicate as in xmlXPathEvalPredicate() but for + * a Location Set instead of a node set + */ +void +xmlXPtrEvalRangePredicate(xmlXPathParserContextPtr ctxt) { + const xmlChar *cur; + xmlXPathObjectPtr res; + xmlXPathObjectPtr obj, tmp; + xmlLocationSetPtr newset = NULL; + xmlLocationSetPtr oldset; + int i; + + SKIP_BLANKS; + if (CUR != '[') { + XP_ERROR(XPATH_INVALID_PREDICATE_ERROR); + } + NEXT; + SKIP_BLANKS; + + /* + * Extract the old set, and then evaluate the result of the + * expression for all the element in the set. use it to grow + * up a new set. + */ + CHECK_TYPE(XPATH_LOCATIONSET); + obj = valuePop(ctxt); + oldset = obj->user; + ctxt->context->node = NULL; + + if ((oldset == NULL) || (oldset->locNr == 0)) { + ctxt->context->contextSize = 0; + ctxt->context->proximityPosition = 0; + xmlXPathEvalExpr(ctxt); + res = valuePop(ctxt); + if (res != NULL) + xmlXPathFreeObject(res); + valuePush(ctxt, obj); + CHECK_ERROR; + } else { + /* + * Save the expression pointer since we will have to evaluate + * it multiple times. Initialize the new set. + */ + cur = ctxt->cur; + newset = xmlXPtrLocationSetCreate(NULL); + + for (i = 0; i < oldset->locNr; i++) { + ctxt->cur = cur; + + /* + * Run the evaluation with a node list made of a single item + * in the nodeset. + */ + ctxt->context->node = oldset->locTab[i]->user; + tmp = xmlXPathNewNodeSet(ctxt->context->node); + valuePush(ctxt, tmp); + ctxt->context->contextSize = oldset->locNr; + ctxt->context->proximityPosition = i + 1; + + xmlXPathEvalExpr(ctxt); + CHECK_ERROR; + + /* + * The result of the evaluation need to be tested to + * decided whether the filter succeeded or not + */ + res = valuePop(ctxt); + if (xmlXPathEvaluatePredicateResult(ctxt, res)) { + xmlXPtrLocationSetAdd(newset, + xmlXPathObjectCopy(oldset->locTab[i])); + } + + /* + * Cleanup + */ + if (res != NULL) + xmlXPathFreeObject(res); + if (ctxt->value == tmp) { + res = valuePop(ctxt); + xmlXPathFreeObject(res); + } + + ctxt->context->node = NULL; + } + + /* + * The result is used as the new evaluation set. + */ + xmlXPathFreeObject(obj); + ctxt->context->node = NULL; + ctxt->context->contextSize = -1; + ctxt->context->proximityPosition = -1; + valuePush(ctxt, xmlXPtrWrapLocationSet(newset)); + } + if (CUR != ']') { + XP_ERROR(XPATH_INVALID_PREDICATE_ERROR); + } + + NEXT; + SKIP_BLANKS; +} + +#else +#endif + diff --git a/xpointer.h b/xpointer.h new file mode 100644 index 00000000..786fb5a0 --- /dev/null +++ b/xpointer.h @@ -0,0 +1,57 @@ +/* + * xpointer.h : API to handle XML Pointers + * + * World Wide Web Consortium Working Draft 03-March-1998 + * http://www.w3.org/TR/1998/WD-xptr-19980303 + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#ifndef __XML_XPTR_H__ +#define __XML_XPTR_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A Location Set + */ +typedef struct _xmlLocationSet xmlLocationSet; +typedef xmlLocationSet *xmlLocationSetPtr; +struct _xmlLocationSet { + int locNr; /* number of locations in the set */ + int locMax; /* size of the array as allocated */ + xmlXPathObjectPtr *locTab;/* array of locations */ +}; + +/* + * Handling of location sets + */ + +void xmlXPtrFreeLocationSet (xmlLocationSetPtr obj); +xmlLocationSetPtr xmlXPtrLocationSetMerge (xmlLocationSetPtr val1, + xmlLocationSetPtr val2); + +/* + * Functions + */ +xmlXPathContextPtr xmlXPtrNewContext (xmlDocPtr doc, + xmlNodePtr here, + xmlNodePtr origin); +xmlXPathObjectPtr xmlXPtrEval (const xmlChar *str, + xmlXPathContextPtr ctx); +void xmlXPtrRangeToFunction (xmlXPathParserContextPtr ctxt, + int nargs); +xmlNodePtr xmlXPtrBuildNodeList (xmlXPathObjectPtr obj); +void xmlXPtrEvalRangePredicate (xmlXPathParserContextPtr ctxt); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XPTR_H__ */