diff --git a/ChangeLog b/ChangeLog index 9f373f73..00a70358 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Fri Feb 2 18:04:35 CET 2001 Daniel Veillard + + * uri.c: rewrite of xmlNormalizeURIPath from Paul D. Smith + * test/URI/smith.uri result/URI/smith.uri Makefile.am: + added the new tests for URI normalization + * testURI.c: fixed stoopid bugs + * result/VC/OneID3 result/VC/UniqueElementTypeDeclaration: + the URI in the error messages are now properly normalized + Fri Feb 2 09:18:53 CET 2001 Daniel Veillard * uri.c: applied Marc Sanfacon's patch for xmlNormalizeURIPath diff --git a/Makefile.am b/Makefile.am index f920cd57..d67942a8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -237,6 +237,19 @@ URItests : testURI diff $(srcdir)/result/URI/$$name result.$$name ; \ rm result.$$name ; \ fi ; fi ; done) + @(for i in $(srcdir)/test/URI/*.uri ; do \ + name=`basename $$i`; \ + if [ ! -d $$i ] ; then \ + if [ ! -f $(srcdir)/result/URI/$$name ] ; then \ + echo New test file $$name ; \ + $(top_builddir)/testURI < $$i > $(srcdir)/result/URI/$$name ; \ + else \ + echo Testing $$name ; \ + $(top_builddir)/testURI < $$i > result.$$name ; \ + grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ + diff $(srcdir)/result/URI/$$name result.$$name ; \ + rm result.$$name ; \ + fi ; fi ; done) XPathtests : testXPath @echo "##" diff --git a/aclocal.m4 b/aclocal.m4 index 869e5fcc..e3726759 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -620,31 +620,35 @@ esac ]) # AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for -# the libltdl convenience library, adds --enable-ltdl-convenience to -# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor -# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed -# to be `${top_builddir}/libltdl'. Make sure you start DIR with -# '${top_builddir}/' (note the single quotes!) if your package is not -# flat, and, if you're not using automake, define top_builddir as -# appropriate in the Makefiles. +# the libltdl convenience library and INCLTDL to the include flags for +# the libltdl header and adds --enable-ltdl-convenience to the +# configure arguments. Note that LIBLTDL and INCLTDL are not +# AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called. If DIR is not +# provided, it is assumed to be `libltdl'. LIBLTDL will be prefixed +# with '${top_builddir}/' and INCLTDL will be prefixed with +# '${top_srcdir}/' (note the single quotes!). If your package is not +# flat and you're not using automake, define top_builddir and +# top_srcdir appropriately in the Makefiles. AC_DEFUN(AC_LIBLTDL_CONVENIENCE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl case "$enable_ltdl_convenience" in no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;; "") enable_ltdl_convenience=yes ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;; esac - LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdlc.la - INCLTDL=ifelse($#,1,-I$1,['-I${top_builddir}/libltdl']) + LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la + INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) ]) # AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for -# the libltdl installable library, and adds --enable-ltdl-install to -# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor -# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed -# to be `${top_builddir}/libltdl'. Make sure you start DIR with -# '${top_builddir}/' (note the single quotes!) if your package is not -# flat, and, if you're not using automake, define top_builddir as -# appropriate in the Makefiles. +# the libltdl installable library and INCLTDL to the include flags for +# the libltdl header and adds --enable-ltdl-install to the configure +# arguments. Note that LIBLTDL and INCLTDL are not AC_SUBSTed, nor is +# AC_CONFIG_SUBDIRS called. If DIR is not provided and an installed +# libltdl is not found, it is assumed to be `libltdl'. LIBLTDL will +# be prefixed with '${top_builddir}/' and INCLTDL will be prefixed +# with '${top_srcdir}/' (note the single quotes!). If your package is +# not flat and you're not using automake, define top_builddir and +# top_srcdir appropriately in the Makefiles. # In the future, this macro may have to be called after AC_PROG_LIBTOOL. AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl AC_CHECK_LIB(ltdl, main, @@ -657,8 +661,8 @@ AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl ]) if test x"$enable_ltdl_install" = x"yes"; then ac_configure_args="$ac_configure_args --enable-ltdl-install" - LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdl.la - INCLTDL=ifelse($#,1,-I$1,['-I${top_builddir}/libltdl']) + LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la + INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) else ac_configure_args="$ac_configure_args --enable-ltdl-install=no" LIBLTDL="-lltdl" diff --git a/result/URI/smith.uri b/result/URI/smith.uri new file mode 100644 index 00000000..d336d54d --- /dev/null +++ b/result/URI/smith.uri @@ -0,0 +1,15 @@ +/bar +bar +bar +bar +baz + +foo/ +foo +foo +../foo./ +../foo/ +/foo +../foo +../../foo +../../../foo diff --git a/result/VC/OneID3 b/result/VC/OneID3 index ac81a9b2..d1742de5 100644 --- a/result/VC/OneID3 +++ b/result/VC/OneID3 @@ -1,3 +1,3 @@ -./test/VC/dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val +test/VC/dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val ^ diff --git a/result/VC/UniqueElementTypeDeclaration b/result/VC/UniqueElementTypeDeclaration index a255ef61..d2ff38bc 100644 --- a/result/VC/UniqueElementTypeDeclaration +++ b/result/VC/UniqueElementTypeDeclaration @@ -1,3 +1,3 @@ -./test/VC/dtds/a.dtd:1: validity error: Redefinition of element a +test/VC/dtds/a.dtd:1: validity error: Redefinition of element a ^ diff --git a/test/URI/smith.uri b/test/URI/smith.uri new file mode 100644 index 00000000..6cc6c875 --- /dev/null +++ b/test/URI/smith.uri @@ -0,0 +1,15 @@ +/foo/../bar +foo/../bar +./foo/../bar +foo/./../bar +foo/bar/.././../baz +foo/.. +foo/bar/.. +./foo +././foo +.././foo./ +.././foo/. +/foo +../foo +../../foo +../../../foo diff --git a/testURI.c b/testURI.c index 349ca028..5ec8157c 100644 --- a/testURI.c +++ b/testURI.c @@ -27,11 +27,8 @@ int main(int argc, char **argv) { const char *base = NULL; xmlChar *composite; - if (argv[arg] == NULL) { - printf("Usage: %s [-base URI] URI ...\n", argv[0]); - exit(0); - } - if ((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base"))) { + if ((argv[arg] != NULL) && + ((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base")))) { arg++; base = argv[arg]; if (base != NULL) @@ -64,6 +61,7 @@ int main(int argc, char **argv) { if (ret != 0) printf("%s : error %d\n", str, ret); else { + xmlNormalizeURIPath(uri->path); xmlPrintURI(stdout, uri); printf("\n"); } @@ -99,5 +97,5 @@ int main(int argc, char **argv) { } xmlFreeURI(uri); xmlMemoryDump(); - exit(0); + return(0); } diff --git a/uri.c b/uri.c index 536652d3..31875f7f 100644 --- a/uri.c +++ b/uri.c @@ -606,6 +606,7 @@ xmlFreeURI(xmlURIPtr uri) { * * ************************************************************************/ +#if 0 /** * xmlNormalizeURIPath: * @path: pointer to the path string @@ -739,6 +740,184 @@ xmlNormalizeURIPath(char *path) { } return(0); } +#else +/** + * xmlNormalizeURIPath: + * @path: pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +int +xmlNormalizeURIPath(char *path) { + char *cur, *out; + + if (path == NULL) + return(-1); + + /* Skip all initial "/" chars. We want to get to the beginning of the + * first non-empty segment. + */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* Keep everything we've seen so far. */ + out = cur; + + /* + * Analyze each segment in sequence for cases (c) and (d). + */ + while (cur[0] != '\0') { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((cur[0] == '.') && (cur[1] == '/')) { + cur += 2; + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((cur[0] == '.') && (cur[1] == '\0')) + break; + + /* Otherwise keep the segment. */ + while (cur[0] != '/') { + if (cur[0] == '\0') + goto done_cd; + (out++)[0] = (cur++)[0]; + } + (out++)[0] = (cur++)[0]; + } + done_cd: + out[0] = '\0'; + + /* Reset to the beginning of the first segment for the next sequence. */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* + * Analyze each segment in sequence for cases (e) and (f). + * + * e) All occurrences of "/../", where is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + * + * f) If the buffer string ends with "/..", where + * is a complete path segment not equal to "..", that + * "/.." is removed. + * + * To satisfy the "iterative" clause in (e), we need to collapse the + * string every time we find something that needs to be removed. Thus, + * we don't need to keep two pointers into the string: we only need a + * "current position" pointer. + */ + while (1) { + char *segp; + + /* At the beginning of each iteration of this loop, "cur" points to + * the first character of the segment we want to examine. + */ + + /* Find the end of the current segment. */ + segp = cur; + while ((segp[0] != '/') && (segp[0] != '\0')) + ++segp; + + /* If this is the last segment, we're done (we need at least two + * segments to meet the criteria for the (e) and (f) cases). + */ + if (segp[0] == '\0') + break; + + /* If the first segment is "..", or if the next segment _isn't_ "..", + * keep this segment and try the next one. + */ + ++segp; + if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) + || ((segp[0] != '.') || (segp[1] != '.') + || ((segp[2] != '/') && (segp[2] != '\0')))) { + cur = segp; + continue; + } + + /* If we get here, remove this segment and the next one and back up + * to the previous segment (if there is one), to implement the + * "iteratively" clause. It's pretty much impossible to back up + * while maintaining two pointers into the buffer, so just compact + * the whole buffer now. + */ + + /* If this is the end of the buffer, we're done. */ + if (segp[2] == '\0') { + cur[0] = '\0'; + break; + } + strcpy(cur, segp + 3); + + /* If there are no previous segments, then keep going from here. */ + segp = cur; + while ((segp > path) && ((--segp)[0] == '/')) + ; + if (segp == path) + continue; + + /* "segp" is pointing to the end of a previous segment; find it's + * start. We need to back up to the previous segment and start + * over with that to handle things like "foo/bar/../..". If we + * don't do this, then on the first pass we'll remove the "bar/..", + * but be pointing at the second ".." so we won't realize we can also + * remove the "foo/..". + */ + cur = segp; + while ((cur > path) && (cur[-1] != '/')) + --cur; + } + out[0] = '\0'; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + if (path[0] == '/') { + cur = path; + while ((cur[1] == '.') && (cur[2] == '.') + && ((cur[3] == '/') || (cur[3] == '\0'))) + cur += 3; + + if (cur != path) { + out = path; + while (cur[0] != '\0') + (out++)[0] = (cur++)[0]; + out[0] = 0; + } + } + + return(0); +} +#endif /** * xmlURIUnescapeString: