diff options
author | Daniel Veillard <veillard@src.gnome.org> | 2005-08-07 10:46:19 +0000 |
---|---|---|
committer | Daniel Veillard <veillard@src.gnome.org> | 2005-08-07 10:46:19 +0000 |
commit | 336a8e13bf013d6cf260c8b78a1129cef1e3662c (patch) | |
tree | d029856887b8da081a6ee5d9517d04cce1cde9e3 | |
parent | 29f6100e69b3ac907dc16fa5423ce9468820e518 (diff) | |
download | android_external_libxml2-336a8e13bf013d6cf260c8b78a1129cef1e3662c.tar.gz android_external_libxml2-336a8e13bf013d6cf260c8b78a1129cef1e3662c.tar.bz2 android_external_libxml2-336a8e13bf013d6cf260c8b78a1129cef1e3662c.zip |
get rid of the dependancy on a locally installed DTD try to cleanup the
* test/relaxng/docbook_0.xml: get rid of the dependancy on a locally
installed DTD
* uri.c include/libxml/uri.h xmlIO.c nanoftp.c nanohttp.c: try to
cleanup the Path/URI conversion mess, needed fixing in various
layers and a new API to the uri module which also fixes #306861
* runtest.c: integrated a regression test specific to check the
URI conversions done before calling the I/O handlers.
Daniel
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | include/libxml/uri.h | 9 | ||||
-rw-r--r-- | nanoftp.c | 6 | ||||
-rw-r--r-- | nanohttp.c | 4 | ||||
-rw-r--r-- | runtest.c | 180 | ||||
-rw-r--r-- | test/relaxng/docbook_0.xml | 3 | ||||
-rw-r--r-- | uri.c | 148 | ||||
-rw-r--r-- | xmlIO.c | 1 |
8 files changed, 326 insertions, 35 deletions
@@ -1,3 +1,13 @@ +Sun Aug 7 12:39:35 CEST 2005 Daniel Veillard <daniel@veillard.com> + + * test/relaxng/docbook_0.xml: get rid of the dependancy on a locally + installed DTD + * uri.c include/libxml/uri.h xmlIO.c nanoftp.c nanohttp.c: try to + cleanup the Path/URI conversion mess, needed fixing in various + layers and a new API to the uri module which also fixes #306861 + * runtest.c: integrated a regression test specific to check the + URI conversions done before calling the I/O handlers. + Sat Aug 6 11:06:24 CEST 2005 Daniel Veillard <daniel@veillard.com> * doc/XSLT.html doc/xml.html: small doc fix for #312647 diff --git a/include/libxml/uri.h b/include/libxml/uri.h index 90ed06ec..89e36535 100644 --- a/include/libxml/uri.h +++ b/include/libxml/uri.h @@ -47,13 +47,16 @@ struct _xmlURI { XMLPUBFUN xmlURIPtr XMLCALL xmlCreateURI (void); XMLPUBFUN xmlChar * XMLCALL - xmlBuildURI (const xmlChar *URI, - const xmlChar *base); + xmlBuildURI (const xmlChar *URI, + const xmlChar *base); XMLPUBFUN xmlChar * XMLCALL xmlBuildRelativeURI (const xmlChar *URI, - const xmlChar *base); + const xmlChar *base); XMLPUBFUN xmlURIPtr XMLCALL xmlParseURI (const char *str); +XMLPUBFUN xmlURIPtr XMLCALL + xmlParseURIRaw (const char *str, + int raw); XMLPUBFUN int XMLCALL xmlParseURIReference (xmlURIPtr uri, const char *str); @@ -316,7 +316,7 @@ xmlNanoFTPScanURL(void *ctx, const char *URL) { } if (URL == NULL) return; - uri = xmlParseURI(URL); + uri = xmlParseURIRaw(URL, 1); if (uri == NULL) return; @@ -377,7 +377,7 @@ xmlNanoFTPUpdateURL(void *ctx, const char *URL) { if (ctxt->hostname == NULL) return(-1); - uri = xmlParseURI(URL); + uri = xmlParseURIRaw(URL, 1); if (uri == NULL) return(-1); @@ -440,7 +440,7 @@ xmlNanoFTPScanProxy(const char *URL) { #endif if (URL == NULL) return; - uri = xmlParseURI(URL); + uri = xmlParseURIRaw(URL, 1); if ((uri == NULL) || (uri->scheme == NULL) || (strcmp(uri->scheme, "ftp")) || (uri->server == NULL)) { __xmlIOErr(XML_FROM_FTP, XML_FTP_URL_SYNTAX, "Syntax Error\n"); @@ -293,7 +293,7 @@ xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) { } if (URL == NULL) return; - uri = xmlParseURI(URL); + uri = xmlParseURIRaw(URL, 1); if (uri == NULL) return; @@ -346,7 +346,7 @@ xmlNanoHTTPScanProxy(const char *URL) { #endif if (URL == NULL) return; - uri = xmlParseURI(URL); + uri = xmlParseURIRaw(URL, 1); if ((uri == NULL) || (uri->scheme == NULL) || (strcmp(uri->scheme, "http")) || (uri->server == NULL)) { __xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n"); @@ -2692,6 +2692,178 @@ uriBaseTest(const char *filename, "http://foo.com/path/to/index.html?orig#help")); } +static int urip_success = 1; +static int urip_current = 0; +static const char *urip_testURLs[] = { + "urip://example.com/a b.html", + "urip://example.com/a%20b.html", + "file:///path/to/a b.html", + "file:///path/to/a%20b.html", + "/path/to/a b.html", + "/path/to/a%20b.html", + "urip://example.com/résumé.html", + "urip://example.com/test?a=1&b=2%263&c=4#foo", + NULL +}; +static const char *urip_rcvsURLs[] = { + /* it is an URI the strings must be escaped */ + "urip://example.com/a%20b.html", + /* check that % escaping is not broken */ + "urip://example.com/a%20b.html", + /* it's an URI path the strings must be escaped */ + "file:///path/to/a%20b.html", + /* check that % escaping is not broken */ + "file:///path/to/a%20b.html", + /* this is not an URI, this is a path, so this should not be escaped */ + "/path/to/a b.html", + /* check that paths with % are not broken */ + "/path/to/a%20b.html", + /* out of context the encoding can't be guessed byte by byte conversion */ + "urip://example.com/r%E9sum%E9.html", + /* verify we don't destroy URIs especially the query part */ + "urip://example.com/test?a=1&b=2%263&c=4#foo", + NULL +}; +static const char *urip_res = "<list/>"; +static const char *urip_cur = NULL; +static int urip_rlen; + +/** + * uripMatch: + * @URI: an URI to test + * + * Check for an urip: query + * + * Returns 1 if yes and 0 if another Input module should be used + */ +static int +uripMatch(const char * URI) { + if ((URI == NULL) || (!strcmp(URI, "file:///etc/xml/catalog"))) + return(0); + /* Verify we received the escaped URL */ + if (strcmp(urip_rcvsURLs[urip_current], URI)) + urip_success = 0; + return(1); +} + +/** + * uripOpen: + * @URI: an URI to test + * + * Return a pointer to the urip: query handler, in this example simply + * the urip_current pointer... + * + * Returns an Input context or NULL in case or error + */ +static void * +uripOpen(const char * URI) { + if ((URI == NULL) || (!strcmp(URI, "file:///etc/xml/catalog"))) + return(NULL); + /* Verify we received the escaped URL */ + if (strcmp(urip_rcvsURLs[urip_current], URI)) + urip_success = 0; + urip_cur = urip_res; + urip_rlen = strlen(urip_res); + return((void *) urip_cur); +} + +/** + * uripClose: + * @context: the read context + * + * Close the urip: query handler + * + * Returns 0 or -1 in case of error + */ +static int +uripClose(void * context) { + if (context == NULL) return(-1); + urip_cur = NULL; + urip_rlen = 0; + return(0); +} + +/** + * uripRead: + * @context: the read context + * @buffer: where to store data + * @len: number of bytes to read + * + * Implement an urip: query read. + * + * Returns the number of bytes read or -1 in case of error + */ +static int +uripRead(void * context, char * buffer, int len) { + const char *ptr = (const char *) context; + + if ((context == NULL) || (buffer == NULL) || (len < 0)) + return(-1); + + if (len > urip_rlen) len = urip_rlen; + memcpy(buffer, ptr, len); + urip_rlen -= len; + return(len); +} + +static int +urip_checkURL(const char *URL) { + xmlDocPtr doc; + + doc = xmlReadFile(URL, NULL, 0); + if (doc == NULL) + return(-1); + xmlFreeDoc(doc); + return(1); +} + +/** + * uriPathTest: + * @filename: ignored + * @result: ignored + * @err: ignored + * + * Run a set of tests to check how Path and URI are handled before + * being passed to the I/O layer + * + * Returns 0 in case of success, an error code otherwise + */ +static int +uriPathTest(const char *filename ATTRIBUTE_UNUSED, + const char *result ATTRIBUTE_UNUSED, + const char *err ATTRIBUTE_UNUSED, + int options ATTRIBUTE_UNUSED) { + int parsed; + int failures = 0; + + /* + * register the new I/O handlers + */ + if (xmlRegisterInputCallbacks(uripMatch, uripOpen, uripRead, uripClose) < 0) + { + fprintf(stderr, "failed to register HTTP handler\n"); + return(-1); + } + + for (urip_current = 0;urip_testURLs[urip_current] != NULL;urip_current++) { + urip_success = 1; + parsed = urip_checkURL(urip_testURLs[urip_current]); + if (urip_success != 1) { + fprintf(stderr, "failed the URL passing test for %s", + urip_testURLs[urip_current]); + failures++; + } else if (parsed != 1) { + fprintf(stderr, "failed the parsing test for %s", + urip_testURLs[urip_current]); + failures++; + } + nb_tests++; + } + + xmlPopInputCallbacks(); + return(failures); +} + #ifdef LIBXML_SCHEMAS_ENABLED /************************************************************************ * * @@ -4039,6 +4211,9 @@ testDesc testDescriptions[] = { { "URI base composition tests" , uriBaseTest, "./test/URI/*.data", "result/URI/", "", NULL, 0 }, + { "Path URI conversion tests" , + uriPathTest, NULL, NULL, NULL, NULL, + 0 }, #ifdef LIBXML_SCHEMAS_ENABLED { "Schemas regression tests" , schemasTest, "./test/schemas/*_*.xsd", NULL, NULL, NULL, @@ -4170,6 +4345,7 @@ launchTests(testDescPtr tst) { } static int verbose = 0; +static int tests_quiet = 0; static int runtest(int i) { @@ -4179,7 +4355,7 @@ runtest(int i) { old_errors = nb_errors; old_tests = nb_tests; old_leaks = nb_leaks; - if (testDescriptions[i].desc != NULL) + if ((tests_quiet == 0) && (testDescriptions[i].desc != NULL)) printf("## %s\n", testDescriptions[i].desc); res = launchTests(&testDescriptions[i]); if (res != 0) @@ -4207,6 +4383,8 @@ main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { for (a = 1; a < argc;a++) { if (!strcmp(argv[a], "-v")) verbose = 1; + else if (!strcmp(argv[a], "-quiet")) + tests_quiet = 1; else { for (i = 0; testDescriptions[i].func != NULL; i++) { if (strstr(testDescriptions[i].desc, argv[a])) { diff --git a/test/relaxng/docbook_0.xml b/test/relaxng/docbook_0.xml index 963d8e7e..ede051df 100644 --- a/test/relaxng/docbook_0.xml +++ b/test/relaxng/docbook_0.xml @@ -1,6 +1,5 @@ <?xml version="1.0"?> -<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" - "../dtd/4.1.2/docbookx.dtd" [ +<!DOCTYPE article [ <!ENTITY version "1.0.53"> <!ENTITY mdash "--"> <!ENTITY hellip "..."> @@ -185,6 +185,8 @@ * path = [ abs_path | opaque_part ] */ +#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) + /************************************************************************ * * * Generic URI structure functions * @@ -1086,7 +1088,10 @@ xmlParseURIFragment(xmlURIPtr uri, const char **str) if (uri != NULL) { if (uri->fragment != NULL) xmlFree(uri->fragment); - uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); + if (uri->cleanup & 2) + uri->fragment = STRNDUP(*str, cur - *str); + else + uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); } *str = cur; return (0); @@ -1111,12 +1116,16 @@ xmlParseURIQuery(xmlURIPtr uri, const char **str) if (str == NULL) return (-1); - while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) + while ((IS_URIC(cur)) || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) NEXT(cur); if (uri != NULL) { if (uri->query != NULL) xmlFree(uri->query); - uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); + if (uri->cleanup & 2) + uri->query = STRNDUP(*str, cur - *str); + else + uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); } *str = cur; return (0); @@ -1147,8 +1156,7 @@ xmlParseURIScheme(xmlURIPtr uri, const char **str) { while (IS_SCHEME(*cur)) cur++; if (uri != NULL) { if (uri->scheme != NULL) xmlFree(uri->scheme); - /* !!! strndup */ - uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL); + uri->scheme = STRNDUP(*str, cur - *str); } *str = cur; return(0); @@ -1174,16 +1182,21 @@ xmlParseURIOpaquePart(xmlURIPtr uri, const char **str) return (-1); cur = *str; - if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) { + if (!((IS_URIC_NO_SLASH(cur)) || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) { return (3); } NEXT(cur); - while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) + while ((IS_URIC(cur)) || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) NEXT(cur); if (uri != NULL) { if (uri->opaque != NULL) xmlFree(uri->opaque); - uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); + if (uri->cleanup & 2) + uri->opaque = STRNDUP(*str, cur - *str); + else + uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL); } *str = cur; return (0); @@ -1235,7 +1248,10 @@ xmlParseURIServer(xmlURIPtr uri, const char **str) { if (*cur == '@') { if (uri != NULL) { if (uri->user != NULL) xmlFree(uri->user); - uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); + if (uri->cleanup & 2) + uri->path = STRNDUP(*str, cur - *str); + else + uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); } cur++; } else { @@ -1349,7 +1365,10 @@ xmlParseURIServer(xmlURIPtr uri, const char **str) { uri->authority = NULL; if (host[0] != '[') { /* it's not an IPV6 addr */ if (uri->server != NULL) xmlFree(uri->server); - uri->server = xmlURIUnescapeString(host, cur - host, NULL); + if (uri->cleanup & 2) + uri->server = STRNDUP(host, cur - host); + else + uri->server = xmlURIUnescapeString(host, cur - host, NULL); } } /* @@ -1392,16 +1411,21 @@ xmlParseURIRelSegment(xmlURIPtr uri, const char **str) return (-1); cur = *str; - if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) { + if (!((IS_SEGMENT(cur)) || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) { return (3); } NEXT(cur); - while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) + while ((IS_SEGMENT(cur)) || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) NEXT(cur); if (uri != NULL) { if (uri->path != NULL) xmlFree(uri->path); - uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); + if (uri->cleanup & 2) + uri->path = STRNDUP(*str, cur - *str); + else + uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); } *str = cur; return (0); @@ -1432,11 +1456,13 @@ xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) cur = *str; do { - while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) + while ((IS_PCHAR(cur)) || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) NEXT(cur); while (*cur == ';') { cur++; - while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur)))) + while ((IS_PCHAR(cur)) || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) NEXT(cur); } if (*cur != '/') @@ -1472,8 +1498,13 @@ xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) len2++; } path[len2] = 0; - if (cur - *str > 0) - xmlURIUnescapeString(*str, cur - *str, &path[len2]); + if (cur - *str > 0) { + if (uri->cleanup & 2) { + memcpy(&path[len2], *str, cur - *str); + path[len2 + (cur - *str)] = 0; + } else + xmlURIUnescapeString(*str, cur - *str, &path[len2]); + } if (uri->path != NULL) xmlFree(uri->path); uri->path = path; @@ -1538,7 +1569,10 @@ xmlParseURIAuthority(xmlURIPtr uri, const char **str) { if (uri->user != NULL) xmlFree(uri->user); uri->user = NULL; if (uri->authority != NULL) xmlFree(uri->authority); - uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); + if (uri->cleanup & 2) + uri->authority = STRNDUP(*str, cur - *str); + else + uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL); } *str = cur; return(0); @@ -1761,6 +1795,38 @@ xmlParseURI(const char *str) { return(uri); } +/** + * xmlParseURIRaw: + * @str: the URI string to analyze + * @raw: if 1 unescaping of URI pieces are disabled + * + * Parse an URI but allows to keep intact the original fragments. + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns a newly built xmlURIPtr or NULL in case of error + */ +xmlURIPtr +xmlParseURIRaw(const char *str, int raw) { + xmlURIPtr uri; + int ret; + + if (str == NULL) + return(NULL); + uri = xmlCreateURI(); + if (uri != NULL) { + if (raw) { + uri->cleanup |= 2; + } + ret = xmlParseURIReference(uri, str); + if (ret) { + xmlFreeURI(uri); + return(NULL); + } + } + return(uri); +} + /************************************************************************ * * * Public functions * @@ -2229,8 +2295,9 @@ xmlCanonicPath(const xmlChar *path) int i = 0; xmlChar *p = NULL; #endif - xmlChar *ret; xmlURIPtr uri; + xmlChar *ret; + const xmlChar *absuri; if (path == NULL) return(NULL); @@ -2239,12 +2306,47 @@ xmlCanonicPath(const xmlChar *path) return xmlStrdup(path); } + absuri = xmlStrstr(path, BAD_CAST "://"); + if (absuri != NULL) { + int l, j; + unsigned char c; + xmlChar *escURI; + + /* + * this looks like an URI where some parts have not been + * escaped leading to a parsing problem check that the first + * part matches a protocol. + */ + l = absuri - path; + if ((l <= 0) || (l > 20)) + goto path_processing; + for (j = 0;j < l;j++) { + c = path[j]; + if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) + goto path_processing; + } + + escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); + if (escURI != NULL) { + uri = xmlParseURI((const char *) escURI); + if (uri != NULL) { + xmlFreeURI(uri); + return escURI; + } + xmlFreeURI(uri); + } + } + +path_processing: +#if defined(_WIN32) && !defined(__CYGWIN__) + /* + * This really need to be cleaned up by someone with a Windows box + */ uri = xmlCreateURI(); if (uri == NULL) { return(NULL); } -#if defined(_WIN32) && !defined(__CYGWIN__) len = xmlStrlen(path); if ((len > 2) && IS_WINDOWS_PATH(path)) { uri->scheme = xmlStrdup(BAD_CAST "file"); @@ -2261,15 +2363,15 @@ xmlCanonicPath(const xmlChar *path) *p = '/'; p++; } -#else - uri->path = (char *) xmlStrdup((const xmlChar *) path); -#endif if (uri->path == NULL) { xmlFreeURI(uri); return(NULL); } ret = xmlSaveUri(uri); xmlFreeURI(uri); +#else + ret = xmlStrdup((const xmlChar *) path); +#endif return(ret); } @@ -3533,7 +3533,6 @@ xmlGetExternalEntityLoader(void) { * * Load an external entity, note that the use of this function for * unparsed entities may generate problems - * TODO: a more generic External entity API must be designed * * Returns the xmlParserInputPtr or NULL */ |