diff options
author | Daniel Veillard <veillard@src.gnome.org> | 2001-02-02 17:07:32 +0000 |
---|---|---|
committer | Daniel Veillard <veillard@src.gnome.org> | 2001-02-02 17:07:32 +0000 |
commit | b6e7fdbac64423e63474fbb4b95770209f280797 (patch) | |
tree | f16b34c69d032795113b040a0dd76c7459fe41e5 | |
parent | ea28ce621cc19a1444b98965a311693f0acdc7a6 (diff) | |
download | android_external_libxml2-b6e7fdbac64423e63474fbb4b95770209f280797.tar.gz android_external_libxml2-b6e7fdbac64423e63474fbb4b95770209f280797.tar.bz2 android_external_libxml2-b6e7fdbac64423e63474fbb4b95770209f280797.zip |
Rewrite/cleanup/tests of URI normalization:
- uri.c: rewrite of xmlNormalizeURIPath from Paul D. Smith
- test/URI/smith.uri result/URI/smith.uri Makefile.am:
added the new tests for URI normalization
- testURI.c: fixed stoopid bugs
- result/VC/OneID3 result/VC/UniqueElementTypeDeclaration:
the URI in the error messages are now properly normalized
Daniel
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | Makefile.am | 13 | ||||
-rw-r--r-- | aclocal.m4 | 40 | ||||
-rw-r--r-- | result/URI/smith.uri | 15 | ||||
-rw-r--r-- | result/VC/OneID3 | 2 | ||||
-rw-r--r-- | result/VC/UniqueElementTypeDeclaration | 2 | ||||
-rw-r--r-- | test/URI/smith.uri | 15 | ||||
-rw-r--r-- | testURI.c | 10 | ||||
-rw-r--r-- | uri.c | 179 |
9 files changed, 259 insertions, 26 deletions
@@ -1,3 +1,12 @@ +Fri Feb 2 18:04:35 CET 2001 Daniel Veillard <Daniel.Veillard@imag.fr> + + * uri.c: rewrite of xmlNormalizeURIPath from Paul D. Smith + * test/URI/smith.uri result/URI/smith.uri Makefile.am: + added the new tests for URI normalization + * testURI.c: fixed stoopid bugs + * result/VC/OneID3 result/VC/UniqueElementTypeDeclaration: + the URI in the error messages are now properly normalized + Fri Feb 2 09:18:53 CET 2001 Daniel Veillard <Daniel.Veillard@imag.fr> * uri.c: applied Marc Sanfacon's patch for xmlNormalizeURIPath diff --git a/Makefile.am b/Makefile.am index f920cd57..d67942a8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -237,6 +237,19 @@ URItests : testURI diff $(srcdir)/result/URI/$$name result.$$name ; \ rm result.$$name ; \ fi ; fi ; done) + @(for i in $(srcdir)/test/URI/*.uri ; do \ + name=`basename $$i`; \ + if [ ! -d $$i ] ; then \ + if [ ! -f $(srcdir)/result/URI/$$name ] ; then \ + echo New test file $$name ; \ + $(top_builddir)/testURI < $$i > $(srcdir)/result/URI/$$name ; \ + else \ + echo Testing $$name ; \ + $(top_builddir)/testURI < $$i > result.$$name ; \ + grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ + diff $(srcdir)/result/URI/$$name result.$$name ; \ + rm result.$$name ; \ + fi ; fi ; done) XPathtests : testXPath @echo "##" @@ -620,31 +620,35 @@ esac ]) # AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for -# the libltdl convenience library, adds --enable-ltdl-convenience to -# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor -# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed -# to be `${top_builddir}/libltdl'. Make sure you start DIR with -# '${top_builddir}/' (note the single quotes!) if your package is not -# flat, and, if you're not using automake, define top_builddir as -# appropriate in the Makefiles. +# the libltdl convenience library and INCLTDL to the include flags for +# the libltdl header and adds --enable-ltdl-convenience to the +# configure arguments. Note that LIBLTDL and INCLTDL are not +# AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called. If DIR is not +# provided, it is assumed to be `libltdl'. LIBLTDL will be prefixed +# with '${top_builddir}/' and INCLTDL will be prefixed with +# '${top_srcdir}/' (note the single quotes!). If your package is not +# flat and you're not using automake, define top_builddir and +# top_srcdir appropriately in the Makefiles. AC_DEFUN(AC_LIBLTDL_CONVENIENCE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl case "$enable_ltdl_convenience" in no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;; "") enable_ltdl_convenience=yes ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;; esac - LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdlc.la - INCLTDL=ifelse($#,1,-I$1,['-I${top_builddir}/libltdl']) + LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la + INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) ]) # AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for -# the libltdl installable library, and adds --enable-ltdl-install to -# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor -# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed -# to be `${top_builddir}/libltdl'. Make sure you start DIR with -# '${top_builddir}/' (note the single quotes!) if your package is not -# flat, and, if you're not using automake, define top_builddir as -# appropriate in the Makefiles. +# the libltdl installable library and INCLTDL to the include flags for +# the libltdl header and adds --enable-ltdl-install to the configure +# arguments. Note that LIBLTDL and INCLTDL are not AC_SUBSTed, nor is +# AC_CONFIG_SUBDIRS called. If DIR is not provided and an installed +# libltdl is not found, it is assumed to be `libltdl'. LIBLTDL will +# be prefixed with '${top_builddir}/' and INCLTDL will be prefixed +# with '${top_srcdir}/' (note the single quotes!). If your package is +# not flat and you're not using automake, define top_builddir and +# top_srcdir appropriately in the Makefiles. # In the future, this macro may have to be called after AC_PROG_LIBTOOL. AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl AC_CHECK_LIB(ltdl, main, @@ -657,8 +661,8 @@ AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl ]) if test x"$enable_ltdl_install" = x"yes"; then ac_configure_args="$ac_configure_args --enable-ltdl-install" - LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdl.la - INCLTDL=ifelse($#,1,-I$1,['-I${top_builddir}/libltdl']) + LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la + INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) else ac_configure_args="$ac_configure_args --enable-ltdl-install=no" LIBLTDL="-lltdl" diff --git a/result/URI/smith.uri b/result/URI/smith.uri new file mode 100644 index 00000000..d336d54d --- /dev/null +++ b/result/URI/smith.uri @@ -0,0 +1,15 @@ +/bar +bar +bar +bar +baz + +foo/ +foo +foo +../foo./ +../foo/ +/foo +../foo +../../foo +../../../foo diff --git a/result/VC/OneID3 b/result/VC/OneID3 index ac81a9b2..d1742de5 100644 --- a/result/VC/OneID3 +++ b/result/VC/OneID3 @@ -1,3 +1,3 @@ -./test/VC/dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val +test/VC/dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val <!ATTLIST doc val ID #IMPLIED> ^ diff --git a/result/VC/UniqueElementTypeDeclaration b/result/VC/UniqueElementTypeDeclaration index a255ef61..d2ff38bc 100644 --- a/result/VC/UniqueElementTypeDeclaration +++ b/result/VC/UniqueElementTypeDeclaration @@ -1,3 +1,3 @@ -./test/VC/dtds/a.dtd:1: validity error: Redefinition of element a +test/VC/dtds/a.dtd:1: validity error: Redefinition of element a <!ELEMENT a (#PCDATA | b | c)*> ^ diff --git a/test/URI/smith.uri b/test/URI/smith.uri new file mode 100644 index 00000000..6cc6c875 --- /dev/null +++ b/test/URI/smith.uri @@ -0,0 +1,15 @@ +/foo/../bar +foo/../bar +./foo/../bar +foo/./../bar +foo/bar/.././../baz +foo/.. +foo/bar/.. +./foo +././foo +.././foo./ +.././foo/. +/foo +../foo +../../foo +../../../foo @@ -27,11 +27,8 @@ int main(int argc, char **argv) { const char *base = NULL; xmlChar *composite; - if (argv[arg] == NULL) { - printf("Usage: %s [-base URI] URI ...\n", argv[0]); - exit(0); - } - if ((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base"))) { + if ((argv[arg] != NULL) && + ((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base")))) { arg++; base = argv[arg]; if (base != NULL) @@ -64,6 +61,7 @@ int main(int argc, char **argv) { if (ret != 0) printf("%s : error %d\n", str, ret); else { + xmlNormalizeURIPath(uri->path); xmlPrintURI(stdout, uri); printf("\n"); } @@ -99,5 +97,5 @@ int main(int argc, char **argv) { } xmlFreeURI(uri); xmlMemoryDump(); - exit(0); + return(0); } @@ -606,6 +606,7 @@ xmlFreeURI(xmlURIPtr uri) { * * ************************************************************************/ +#if 0 /** * xmlNormalizeURIPath: * @path: pointer to the path string @@ -739,6 +740,184 @@ xmlNormalizeURIPath(char *path) { } return(0); } +#else +/** + * xmlNormalizeURIPath: + * @path: pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +int +xmlNormalizeURIPath(char *path) { + char *cur, *out; + + if (path == NULL) + return(-1); + + /* Skip all initial "/" chars. We want to get to the beginning of the + * first non-empty segment. + */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* Keep everything we've seen so far. */ + out = cur; + + /* + * Analyze each segment in sequence for cases (c) and (d). + */ + while (cur[0] != '\0') { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((cur[0] == '.') && (cur[1] == '/')) { + cur += 2; + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((cur[0] == '.') && (cur[1] == '\0')) + break; + + /* Otherwise keep the segment. */ + while (cur[0] != '/') { + if (cur[0] == '\0') + goto done_cd; + (out++)[0] = (cur++)[0]; + } + (out++)[0] = (cur++)[0]; + } + done_cd: + out[0] = '\0'; + + /* Reset to the beginning of the first segment for the next sequence. */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* + * Analyze each segment in sequence for cases (e) and (f). + * + * e) All occurrences of "<segment>/../", where <segment> is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + * + * f) If the buffer string ends with "<segment>/..", where <segment> + * is a complete path segment not equal to "..", that + * "<segment>/.." is removed. + * + * To satisfy the "iterative" clause in (e), we need to collapse the + * string every time we find something that needs to be removed. Thus, + * we don't need to keep two pointers into the string: we only need a + * "current position" pointer. + */ + while (1) { + char *segp; + + /* At the beginning of each iteration of this loop, "cur" points to + * the first character of the segment we want to examine. + */ + + /* Find the end of the current segment. */ + segp = cur; + while ((segp[0] != '/') && (segp[0] != '\0')) + ++segp; + + /* If this is the last segment, we're done (we need at least two + * segments to meet the criteria for the (e) and (f) cases). + */ + if (segp[0] == '\0') + break; + + /* If the first segment is "..", or if the next segment _isn't_ "..", + * keep this segment and try the next one. + */ + ++segp; + if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) + || ((segp[0] != '.') || (segp[1] != '.') + || ((segp[2] != '/') && (segp[2] != '\0')))) { + cur = segp; + continue; + } + + /* If we get here, remove this segment and the next one and back up + * to the previous segment (if there is one), to implement the + * "iteratively" clause. It's pretty much impossible to back up + * while maintaining two pointers into the buffer, so just compact + * the whole buffer now. + */ + + /* If this is the end of the buffer, we're done. */ + if (segp[2] == '\0') { + cur[0] = '\0'; + break; + } + strcpy(cur, segp + 3); + + /* If there are no previous segments, then keep going from here. */ + segp = cur; + while ((segp > path) && ((--segp)[0] == '/')) + ; + if (segp == path) + continue; + + /* "segp" is pointing to the end of a previous segment; find it's + * start. We need to back up to the previous segment and start + * over with that to handle things like "foo/bar/../..". If we + * don't do this, then on the first pass we'll remove the "bar/..", + * but be pointing at the second ".." so we won't realize we can also + * remove the "foo/..". + */ + cur = segp; + while ((cur > path) && (cur[-1] != '/')) + --cur; + } + out[0] = '\0'; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + if (path[0] == '/') { + cur = path; + while ((cur[1] == '.') && (cur[2] == '.') + && ((cur[3] == '/') || (cur[3] == '\0'))) + cur += 3; + + if (cur != path) { + out = path; + while (cur[0] != '\0') + (out++)[0] = (cur++)[0]; + out[0] = 0; + } + } + + return(0); +} +#endif /** * xmlURIUnescapeString: |