aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--include/libxml/uri.h9
-rw-r--r--nanoftp.c6
-rw-r--r--nanohttp.c4
-rw-r--r--runtest.c180
-rw-r--r--test/relaxng/docbook_0.xml3
-rw-r--r--uri.c148
-rw-r--r--xmlIO.c1
8 files changed, 326 insertions, 35 deletions
diff --git a/ChangeLog b/ChangeLog
index d8f36e46..c62cc7ac 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sun Aug 7 12:39:35 CEST 2005 Daniel Veillard <daniel@veillard.com>
+
+ * test/relaxng/docbook_0.xml: get rid of the dependancy on a locally
+ installed DTD
+ * uri.c include/libxml/uri.h xmlIO.c nanoftp.c nanohttp.c: try to
+ cleanup the Path/URI conversion mess, needed fixing in various
+ layers and a new API to the uri module which also fixes #306861
+ * runtest.c: integrated a regression test specific to check the
+ URI conversions done before calling the I/O handlers.
+
Sat Aug 6 11:06:24 CEST 2005 Daniel Veillard <daniel@veillard.com>
* doc/XSLT.html doc/xml.html: small doc fix for #312647
diff --git a/include/libxml/uri.h b/include/libxml/uri.h
index 90ed06ec..89e36535 100644
--- a/include/libxml/uri.h
+++ b/include/libxml/uri.h
@@ -47,13 +47,16 @@ struct _xmlURI {
XMLPUBFUN xmlURIPtr XMLCALL
xmlCreateURI (void);
XMLPUBFUN xmlChar * XMLCALL
- xmlBuildURI (const xmlChar *URI,
- const xmlChar *base);
+ xmlBuildURI (const xmlChar *URI,
+ const xmlChar *base);
XMLPUBFUN xmlChar * XMLCALL
xmlBuildRelativeURI (const xmlChar *URI,
- const xmlChar *base);
+ const xmlChar *base);
XMLPUBFUN xmlURIPtr XMLCALL
xmlParseURI (const char *str);
+XMLPUBFUN xmlURIPtr XMLCALL
+ xmlParseURIRaw (const char *str,
+ int raw);
XMLPUBFUN int XMLCALL
xmlParseURIReference (xmlURIPtr uri,
const char *str);
diff --git a/nanoftp.c b/nanoftp.c
index b7dc7532..cd16e5b3 100644
--- a/nanoftp.c
+++ b/nanoftp.c
@@ -316,7 +316,7 @@ xmlNanoFTPScanURL(void *ctx, const char *URL) {
}
if (URL == NULL) return;
- uri = xmlParseURI(URL);
+ uri = xmlParseURIRaw(URL, 1);
if (uri == NULL)
return;
@@ -377,7 +377,7 @@ xmlNanoFTPUpdateURL(void *ctx, const char *URL) {
if (ctxt->hostname == NULL)
return(-1);
- uri = xmlParseURI(URL);
+ uri = xmlParseURIRaw(URL, 1);
if (uri == NULL)
return(-1);
@@ -440,7 +440,7 @@ xmlNanoFTPScanProxy(const char *URL) {
#endif
if (URL == NULL) return;
- uri = xmlParseURI(URL);
+ uri = xmlParseURIRaw(URL, 1);
if ((uri == NULL) || (uri->scheme == NULL) ||
(strcmp(uri->scheme, "ftp")) || (uri->server == NULL)) {
__xmlIOErr(XML_FROM_FTP, XML_FTP_URL_SYNTAX, "Syntax Error\n");
diff --git a/nanohttp.c b/nanohttp.c
index def66b37..4b229b0e 100644
--- a/nanohttp.c
+++ b/nanohttp.c
@@ -293,7 +293,7 @@ xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
}
if (URL == NULL) return;
- uri = xmlParseURI(URL);
+ uri = xmlParseURIRaw(URL, 1);
if (uri == NULL)
return;
@@ -346,7 +346,7 @@ xmlNanoHTTPScanProxy(const char *URL) {
#endif
if (URL == NULL) return;
- uri = xmlParseURI(URL);
+ uri = xmlParseURIRaw(URL, 1);
if ((uri == NULL) || (uri->scheme == NULL) ||
(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
diff --git a/runtest.c b/runtest.c
index f6e98404..37c804d2 100644
--- a/runtest.c
+++ b/runtest.c
@@ -2692,6 +2692,178 @@ uriBaseTest(const char *filename,
"http://foo.com/path/to/index.html?orig#help"));
}
+static int urip_success = 1;
+static int urip_current = 0;
+static const char *urip_testURLs[] = {
+ "urip://example.com/a b.html",
+ "urip://example.com/a%20b.html",
+ "file:///path/to/a b.html",
+ "file:///path/to/a%20b.html",
+ "/path/to/a b.html",
+ "/path/to/a%20b.html",
+ "urip://example.com/résumé.html",
+ "urip://example.com/test?a=1&b=2%263&c=4#foo",
+ NULL
+};
+static const char *urip_rcvsURLs[] = {
+ /* it is an URI the strings must be escaped */
+ "urip://example.com/a%20b.html",
+ /* check that % escaping is not broken */
+ "urip://example.com/a%20b.html",
+ /* it's an URI path the strings must be escaped */
+ "file:///path/to/a%20b.html",
+ /* check that % escaping is not broken */
+ "file:///path/to/a%20b.html",
+ /* this is not an URI, this is a path, so this should not be escaped */
+ "/path/to/a b.html",
+ /* check that paths with % are not broken */
+ "/path/to/a%20b.html",
+ /* out of context the encoding can't be guessed byte by byte conversion */
+ "urip://example.com/r%E9sum%E9.html",
+ /* verify we don't destroy URIs especially the query part */
+ "urip://example.com/test?a=1&b=2%263&c=4#foo",
+ NULL
+};
+static const char *urip_res = "<list/>";
+static const char *urip_cur = NULL;
+static int urip_rlen;
+
+/**
+ * uripMatch:
+ * @URI: an URI to test
+ *
+ * Check for an urip: query
+ *
+ * Returns 1 if yes and 0 if another Input module should be used
+ */
+static int
+uripMatch(const char * URI) {
+ if ((URI == NULL) || (!strcmp(URI, "file:///etc/xml/catalog")))
+ return(0);
+ /* Verify we received the escaped URL */
+ if (strcmp(urip_rcvsURLs[urip_current], URI))
+ urip_success = 0;
+ return(1);
+}
+
+/**
+ * uripOpen:
+ * @URI: an URI to test
+ *
+ * Return a pointer to the urip: query handler, in this example simply
+ * the urip_current pointer...
+ *
+ * Returns an Input context or NULL in case or error
+ */
+static void *
+uripOpen(const char * URI) {
+ if ((URI == NULL) || (!strcmp(URI, "file:///etc/xml/catalog")))
+ return(NULL);
+ /* Verify we received the escaped URL */
+ if (strcmp(urip_rcvsURLs[urip_current], URI))
+ urip_success = 0;
+ urip_cur = urip_res;
+ urip_rlen = strlen(urip_res);
+ return((void *) urip_cur);
+}
+
+/**
+ * uripClose:
+ * @context: the read context
+ *
+ * Close the urip: query handler
+ *
+ * Returns 0 or -1 in case of error
+ */
+static int
+uripClose(void * context) {
+ if (context == NULL) return(-1);
+ urip_cur = NULL;
+ urip_rlen = 0;
+ return(0);
+}
+
+/**
+ * uripRead:
+ * @context: the read context
+ * @buffer: where to store data
+ * @len: number of bytes to read
+ *
+ * Implement an urip: query read.
+ *
+ * Returns the number of bytes read or -1 in case of error
+ */
+static int
+uripRead(void * context, char * buffer, int len) {
+ const char *ptr = (const char *) context;
+
+ if ((context == NULL) || (buffer == NULL) || (len < 0))
+ return(-1);
+
+ if (len > urip_rlen) len = urip_rlen;
+ memcpy(buffer, ptr, len);
+ urip_rlen -= len;
+ return(len);
+}
+
+static int
+urip_checkURL(const char *URL) {
+ xmlDocPtr doc;
+
+ doc = xmlReadFile(URL, NULL, 0);
+ if (doc == NULL)
+ return(-1);
+ xmlFreeDoc(doc);
+ return(1);
+}
+
+/**
+ * uriPathTest:
+ * @filename: ignored
+ * @result: ignored
+ * @err: ignored
+ *
+ * Run a set of tests to check how Path and URI are handled before
+ * being passed to the I/O layer
+ *
+ * Returns 0 in case of success, an error code otherwise
+ */
+static int
+uriPathTest(const char *filename ATTRIBUTE_UNUSED,
+ const char *result ATTRIBUTE_UNUSED,
+ const char *err ATTRIBUTE_UNUSED,
+ int options ATTRIBUTE_UNUSED) {
+ int parsed;
+ int failures = 0;
+
+ /*
+ * register the new I/O handlers
+ */
+ if (xmlRegisterInputCallbacks(uripMatch, uripOpen, uripRead, uripClose) < 0)
+ {
+ fprintf(stderr, "failed to register HTTP handler\n");
+ return(-1);
+ }
+
+ for (urip_current = 0;urip_testURLs[urip_current] != NULL;urip_current++) {
+ urip_success = 1;
+ parsed = urip_checkURL(urip_testURLs[urip_current]);
+ if (urip_success != 1) {
+ fprintf(stderr, "failed the URL passing test for %s",
+ urip_testURLs[urip_current]);
+ failures++;
+ } else if (parsed != 1) {
+ fprintf(stderr, "failed the parsing test for %s",
+ urip_testURLs[urip_current]);
+ failures++;
+ }
+ nb_tests++;
+ }
+
+ xmlPopInputCallbacks();
+ return(failures);
+}
+
#ifdef LIBXML_SCHEMAS_ENABLED
/************************************************************************
* *
@@ -4039,6 +4211,9 @@ testDesc testDescriptions[] = {
{ "URI base composition tests" ,
uriBaseTest, "./test/URI/*.data", "result/URI/", "", NULL,
0 },
+ { "Path URI conversion tests" ,
+ uriPathTest, NULL, NULL, NULL, NULL,
+ 0 },
#ifdef LIBXML_SCHEMAS_ENABLED
{ "Schemas regression tests" ,
schemasTest, "./test/schemas/*_*.xsd", NULL, NULL, NULL,
@@ -4170,6 +4345,7 @@ launchTests(testDescPtr tst) {
}
static int verbose = 0;
+static int tests_quiet = 0;
static int
runtest(int i) {
@@ -4179,7 +4355,7 @@ runtest(int i) {
old_errors = nb_errors;
old_tests = nb_tests;
old_leaks = nb_leaks;
- if (testDescriptions[i].desc != NULL)
+ if ((tests_quiet == 0) && (testDescriptions[i].desc != NULL))
printf("## %s\n", testDescriptions[i].desc);
res = launchTests(&testDescriptions[i]);
if (res != 0)
@@ -4207,6 +4383,8 @@ main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
for (a = 1; a < argc;a++) {
if (!strcmp(argv[a], "-v"))
verbose = 1;
+ else if (!strcmp(argv[a], "-quiet"))
+ tests_quiet = 1;
else {
for (i = 0; testDescriptions[i].func != NULL; i++) {
if (strstr(testDescriptions[i].desc, argv[a])) {
diff --git a/test/relaxng/docbook_0.xml b/test/relaxng/docbook_0.xml
index 963d8e7e..ede051df 100644
--- a/test/relaxng/docbook_0.xml
+++ b/test/relaxng/docbook_0.xml
@@ -1,6 +1,5 @@
<?xml version="1.0"?>
-<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
- "../dtd/4.1.2/docbookx.dtd" [
+<!DOCTYPE article [
<!ENTITY version "1.0.53">
<!ENTITY mdash "--">
<!ENTITY hellip "...">
diff --git a/uri.c b/uri.c
index 1fdbaf88..a00415c5 100644
--- a/uri.c
+++ b/uri.c
@@ -185,6 +185,8 @@
* path = [ abs_path | opaque_part ]
*/
+#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
+
/************************************************************************
* *
* Generic URI structure functions *
@@ -1086,7 +1088,10 @@ xmlParseURIFragment(xmlURIPtr uri, const char **str)
if (uri != NULL) {
if (uri->fragment != NULL)
xmlFree(uri->fragment);
- uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
+ if (uri->cleanup & 2)
+ uri->fragment = STRNDUP(*str, cur - *str);
+ else
+ uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
@@ -1111,12 +1116,16 @@ xmlParseURIQuery(xmlURIPtr uri, const char **str)
if (str == NULL)
return (-1);
- while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
+ while ((IS_URIC(cur)) ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
if (uri != NULL) {
if (uri->query != NULL)
xmlFree(uri->query);
- uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
+ if (uri->cleanup & 2)
+ uri->query = STRNDUP(*str, cur - *str);
+ else
+ uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
@@ -1147,8 +1156,7 @@ xmlParseURIScheme(xmlURIPtr uri, const char **str) {
while (IS_SCHEME(*cur)) cur++;
if (uri != NULL) {
if (uri->scheme != NULL) xmlFree(uri->scheme);
- /* !!! strndup */
- uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
+ uri->scheme = STRNDUP(*str, cur - *str);
}
*str = cur;
return(0);
@@ -1174,16 +1182,21 @@ xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
return (-1);
cur = *str;
- if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
+ if (!((IS_URIC_NO_SLASH(cur)) ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
return (3);
}
NEXT(cur);
- while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
+ while ((IS_URIC(cur)) ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
if (uri != NULL) {
if (uri->opaque != NULL)
xmlFree(uri->opaque);
- uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
+ if (uri->cleanup & 2)
+ uri->opaque = STRNDUP(*str, cur - *str);
+ else
+ uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
@@ -1235,7 +1248,10 @@ xmlParseURIServer(xmlURIPtr uri, const char **str) {
if (*cur == '@') {
if (uri != NULL) {
if (uri->user != NULL) xmlFree(uri->user);
- uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
+ if (uri->cleanup & 2)
+ uri->path = STRNDUP(*str, cur - *str);
+ else
+ uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
}
cur++;
} else {
@@ -1349,7 +1365,10 @@ xmlParseURIServer(xmlURIPtr uri, const char **str) {
uri->authority = NULL;
if (host[0] != '[') { /* it's not an IPV6 addr */
if (uri->server != NULL) xmlFree(uri->server);
- uri->server = xmlURIUnescapeString(host, cur - host, NULL);
+ if (uri->cleanup & 2)
+ uri->server = STRNDUP(host, cur - host);
+ else
+ uri->server = xmlURIUnescapeString(host, cur - host, NULL);
}
}
/*
@@ -1392,16 +1411,21 @@ xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
return (-1);
cur = *str;
- if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
+ if (!((IS_SEGMENT(cur)) ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
return (3);
}
NEXT(cur);
- while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
+ while ((IS_SEGMENT(cur)) ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
if (uri != NULL) {
if (uri->path != NULL)
xmlFree(uri->path);
- uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
+ if (uri->cleanup & 2)
+ uri->path = STRNDUP(*str, cur - *str);
+ else
+ uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
@@ -1432,11 +1456,13 @@ xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
cur = *str;
do {
- while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
+ while ((IS_PCHAR(cur)) ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
while (*cur == ';') {
cur++;
- while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
+ while ((IS_PCHAR(cur)) ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
}
if (*cur != '/')
@@ -1472,8 +1498,13 @@ xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
len2++;
}
path[len2] = 0;
- if (cur - *str > 0)
- xmlURIUnescapeString(*str, cur - *str, &path[len2]);
+ if (cur - *str > 0) {
+ if (uri->cleanup & 2) {
+ memcpy(&path[len2], *str, cur - *str);
+ path[len2 + (cur - *str)] = 0;
+ } else
+ xmlURIUnescapeString(*str, cur - *str, &path[len2]);
+ }
if (uri->path != NULL)
xmlFree(uri->path);
uri->path = path;
@@ -1538,7 +1569,10 @@ xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
if (uri->user != NULL) xmlFree(uri->user);
uri->user = NULL;
if (uri->authority != NULL) xmlFree(uri->authority);
- uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
+ if (uri->cleanup & 2)
+ uri->authority = STRNDUP(*str, cur - *str);
+ else
+ uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return(0);
@@ -1761,6 +1795,38 @@ xmlParseURI(const char *str) {
return(uri);
}
+/**
+ * xmlParseURIRaw:
+ * @str: the URI string to analyze
+ * @raw: if 1 unescaping of URI pieces are disabled
+ *
+ * Parse an URI but allows to keep intact the original fragments.
+ *
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly built xmlURIPtr or NULL in case of error
+ */
+xmlURIPtr
+xmlParseURIRaw(const char *str, int raw) {
+ xmlURIPtr uri;
+ int ret;
+
+ if (str == NULL)
+ return(NULL);
+ uri = xmlCreateURI();
+ if (uri != NULL) {
+ if (raw) {
+ uri->cleanup |= 2;
+ }
+ ret = xmlParseURIReference(uri, str);
+ if (ret) {
+ xmlFreeURI(uri);
+ return(NULL);
+ }
+ }
+ return(uri);
+}
+
/************************************************************************
* *
* Public functions *
@@ -2229,8 +2295,9 @@ xmlCanonicPath(const xmlChar *path)
int i = 0;
xmlChar *p = NULL;
#endif
- xmlChar *ret;
xmlURIPtr uri;
+ xmlChar *ret;
+ const xmlChar *absuri;
if (path == NULL)
return(NULL);
@@ -2239,12 +2306,47 @@ xmlCanonicPath(const xmlChar *path)
return xmlStrdup(path);
}
+ absuri = xmlStrstr(path, BAD_CAST "://");
+ if (absuri != NULL) {
+ int l, j;
+ unsigned char c;
+ xmlChar *escURI;
+
+ /*
+ * this looks like an URI where some parts have not been
+ * escaped leading to a parsing problem check that the first
+ * part matches a protocol.
+ */
+ l = absuri - path;
+ if ((l <= 0) || (l > 20))
+ goto path_processing;
+ for (j = 0;j < l;j++) {
+ c = path[j];
+ if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
+ goto path_processing;
+ }
+
+ escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
+ if (escURI != NULL) {
+ uri = xmlParseURI((const char *) escURI);
+ if (uri != NULL) {
+ xmlFreeURI(uri);
+ return escURI;
+ }
+ xmlFreeURI(uri);
+ }
+ }
+
+path_processing:
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ /*
+ * This really need to be cleaned up by someone with a Windows box
+ */
uri = xmlCreateURI();
if (uri == NULL) {
return(NULL);
}
-#if defined(_WIN32) && !defined(__CYGWIN__)
len = xmlStrlen(path);
if ((len > 2) && IS_WINDOWS_PATH(path)) {
uri->scheme = xmlStrdup(BAD_CAST "file");
@@ -2261,15 +2363,15 @@ xmlCanonicPath(const xmlChar *path)
*p = '/';
p++;
}
-#else
- uri->path = (char *) xmlStrdup((const xmlChar *) path);
-#endif
if (uri->path == NULL) {
xmlFreeURI(uri);
return(NULL);
}
ret = xmlSaveUri(uri);
xmlFreeURI(uri);
+#else
+ ret = xmlStrdup((const xmlChar *) path);
+#endif
return(ret);
}
diff --git a/xmlIO.c b/xmlIO.c
index 0902a2fc..1e73b6df 100644
--- a/xmlIO.c
+++ b/xmlIO.c
@@ -3533,7 +3533,6 @@ xmlGetExternalEntityLoader(void) {
*
* Load an external entity, note that the use of this function for
* unparsed entities may generate problems
- * TODO: a more generic External entity API must be designed
*
* Returns the xmlParserInputPtr or NULL
*/