aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--HTMLparser.c49
-rw-r--r--HTMLtree.c49
-rw-r--r--METADATA8
-rw-r--r--configure.ac23
-rw-r--r--error.c22
-rw-r--r--fuzz/.gitignore8
-rw-r--r--fuzz/Makefile.am99
-rw-r--r--fuzz/fuzz.c100
-rw-r--r--fuzz/fuzz.h11
-rw-r--r--fuzz/genSeed.c407
-rw-r--r--fuzz/htmlSeed.c36
-rw-r--r--fuzz/schemaSeed.c34
-rw-r--r--fuzz/testFuzzer.c109
-rw-r--r--fuzz/xml.c12
-rw-r--r--fuzz/xmlSeed.c28
-rw-r--r--fuzz/xpath.c48
-rw-r--r--fuzz/xpath.dict94
-rw-r--r--fuzz/xpath.options3
-rw-r--r--include/libxml/parser.h2
-rw-r--r--parser.c29
-rw-r--r--parserInternals.c6
-rw-r--r--result/XInclude/fallback3.xml8
-rw-r--r--result/XInclude/fallback3.xml.err0
-rw-r--r--result/XInclude/fallback3.xml.rdr25
-rw-r--r--result/XInclude/fallback4.xml10
-rw-r--r--result/XInclude/fallback4.xml.err0
-rw-r--r--result/XInclude/fallback4.xml.rdr29
-rw-r--r--result/XInclude/fallback5.xml51
-rw-r--r--result/XInclude/fallback5.xml.rdr116
-rw-r--r--result/XInclude/fallback6.xml1
-rw-r--r--result/XInclude/fallback6.xml.rdr0
-rw-r--r--result/XInclude/ns1.xml10
-rw-r--r--result/XInclude/ns1.xml.rdr23
-rw-r--r--runtest.c8
-rw-r--r--test/XInclude/docs/fallback3.xml9
-rw-r--r--test/XInclude/docs/fallback4.xml7
-rw-r--r--test/XInclude/docs/fallback5.xml83
-rw-r--r--test/XInclude/docs/fallback6.xml6
-rw-r--r--test/XInclude/docs/ns1.xml12
-rw-r--r--xinclude.c185
-rw-r--r--xmllint.c8
-rw-r--r--xmlreader.c15
-rw-r--r--xmlsave.c8
-rw-r--r--xmlschemastypes.c10
-rw-r--r--xmlstring.c21
-rw-r--r--xpath.c32
46 files changed, 1399 insertions, 455 deletions
diff --git a/HTMLparser.c b/HTMLparser.c
index b9812985..de624f8d 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -296,7 +296,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
#define UPPER (toupper(*ctxt->input->cur))
-#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
+#define SKIP(val) ctxt->input->cur += (val),ctxt->input->col+=(val)
#define NXT(val) ctxt->input->cur[(val)]
@@ -330,7 +330,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
- ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
+ ctxt->token = 0; ctxt->input->cur += l; \
} while (0)
/************
@@ -597,7 +597,6 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
ctxt->input->line++; ctxt->input->col = 1;
} else ctxt->input->col++;
ctxt->input->cur++;
- ctxt->nbChars++;
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
}
@@ -2495,7 +2494,6 @@ htmlParseName(htmlParserCtxtPtr ctxt) {
count = in - ctxt->input->cur;
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
ctxt->input->cur = in;
- ctxt->nbChars += count;
ctxt->input->col += count;
return(ret);
}
@@ -3852,8 +3850,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
while ((CUR != 0) &&
(CUR != '>') &&
((CUR != '/') || (NXT(1) != '>'))) {
- long cons = ctxt->nbChars;
-
GROW;
attname = htmlParseAttribute(ctxt, &attvalue);
if (attname != NULL) {
@@ -3920,12 +3916,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
failed:
SKIP_BLANKS;
- if (cons == ctxt->nbChars) {
- htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
- "htmlParseStartTag: problem parsing attributes\n",
- NULL, NULL);
- break;
- }
}
/*
@@ -4161,8 +4151,6 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
currentNode = xmlStrdup(ctxt->name);
depth = ctxt->nameNr;
while (1) {
- long cons = ctxt->nbChars;
-
GROW;
if (ctxt->instate == XML_PARSER_EOF)
@@ -4282,15 +4270,6 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
else {
htmlParseCharData(ctxt);
}
-
- if (cons == ctxt->nbChars) {
- if (ctxt->node != NULL) {
- htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
- "detected an error in element content\n",
- NULL, NULL);
- }
- break;
- }
}
GROW;
}
@@ -4561,8 +4540,6 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
currentNode = xmlStrdup(ctxt->name);
depth = ctxt->nameNr;
while (1) {
- long cons = ctxt->nbChars;
-
GROW;
if (ctxt->instate == XML_PARSER_EOF)
@@ -4696,15 +4673,6 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
else {
htmlParseCharData(ctxt);
}
-
- if (cons == ctxt->nbChars) {
- if (ctxt->node != NULL) {
- htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
- "detected an error in element content\n",
- NULL, NULL);
- }
- break;
- }
}
GROW;
}
@@ -4968,7 +4936,6 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
ctxt->vctxt.warning = xmlParserValidityWarning;
ctxt->record_info = 0;
ctxt->validate = 0;
- ctxt->nbChars = 0;
ctxt->checkIndex = 0;
ctxt->catalogs = NULL;
xmlInitNodeInfoSeq(&ctxt->node_seq);
@@ -5702,7 +5669,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
}
case XML_PARSER_CONTENT: {
xmlChar chr[2] = { 0, 0 };
- long cons;
/*
* Handle preparsed entities and charRef
@@ -5747,7 +5713,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
goto done;
cur = in->cur[0];
next = in->cur[1];
- cons = ctxt->nbChars;
if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
(xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
/*
@@ -5877,15 +5842,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
}
}
}
- if (cons == ctxt->nbChars) {
- if (ctxt->node != NULL) {
- htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
- "detected an error in element content\n",
- NULL, NULL);
- }
- NEXT;
- break;
- }
break;
}
@@ -6622,7 +6578,6 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
ctxt->vctxt.error = xmlParserValidityError;
ctxt->vctxt.warning = xmlParserValidityWarning;
ctxt->record_info = 0;
- ctxt->nbChars = 0;
ctxt->checkIndex = 0;
ctxt->inSubset = 0;
ctxt->errNo = XML_ERR_OK;
diff --git a/HTMLtree.c b/HTMLtree.c
index 8d236bb3..cdb7f86a 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -706,49 +706,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
+ xmlChar *escaped;
xmlChar *tmp = value;
- /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
- xmlBufCCat(buf->buffer, "\"");
while (IS_BLANK_CH(*tmp)) tmp++;
- /* URI Escape everything, except server side includes. */
- for ( ; ; ) {
- xmlChar *escaped;
- xmlChar endChar;
- xmlChar *end = NULL;
- xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
- if (start != NULL) {
- end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
- if (end != NULL) {
- *start = '\0';
- }
- }
-
- /* Escape the whole string, or until start (set to '\0'). */
- escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
- if (escaped != NULL) {
- xmlBufCat(buf->buffer, escaped);
- xmlFree(escaped);
- } else {
- xmlBufCat(buf->buffer, tmp);
- }
-
- if (end == NULL) { /* Everything has been written. */
- break;
- }
-
- /* Do not escape anything within server side includes. */
- *start = '<'; /* Restore the first character of "<!--". */
- end += 3; /* strlen("-->") */
- endChar = *end;
- *end = '\0';
- xmlBufCat(buf->buffer, start);
- *end = endChar;
- tmp = end;
+ /*
+ * the < and > have already been escaped at the entity level
+ * And doing so here breaks server side includes
+ */
+ escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
+ if (escaped != NULL) {
+ xmlBufWriteQuotedString(buf->buffer, escaped);
+ xmlFree(escaped);
+ } else {
+ xmlBufWriteQuotedString(buf->buffer, value);
}
-
- xmlBufCCat(buf->buffer, "\"");
} else {
xmlBufWriteQuotedString(buf->buffer, value);
}
diff --git a/METADATA b/METADATA
index 01cd930a..71cd8429 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://github.com/GNOME/libxml2/archive/905820a44c0c895c02124ecacff735794509f4fe.zip"
+ value: "https://github.com/GNOME/libxml2/archive/f0fd1b67fc883a24cdd039abb3d4fe4696104d72.zip"
}
- version: "905820a44c0c895c02124ecacff735794509f4fe"
+ version: "f0fd1b67fc883a24cdd039abb3d4fe4696104d72"
license_type: NOTICE
last_upgrade_date {
year: 2020
- month: 7
- day: 31
+ month: 9
+ day: 1
}
}
diff --git a/configure.ac b/configure.ac
index 32c47ac8..a4c675bb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,15 +1,20 @@
dnl Process this file with autoconf to produce a configure script.
AC_PREREQ([2.63])
-AC_INIT
+
+m4_define([MAJOR_VERSION], 2)
+m4_define([MINOR_VERSION], 9)
+m4_define([MICRO_VERSION], 10)
+
+AC_INIT([libxml2],[MAJOR_VERSION.MINOR_VERSION.MICRO_VERSION])
AC_CONFIG_SRCDIR([entities.c])
AC_CONFIG_HEADERS([config.h])
AM_MAINTAINER_MODE([enable])
AC_CONFIG_MACRO_DIR([m4])
AC_CANONICAL_HOST
-LIBXML_MAJOR_VERSION=2
-LIBXML_MINOR_VERSION=9
-LIBXML_MICRO_VERSION=10
+LIBXML_MAJOR_VERSION=MAJOR_VERSION
+LIBXML_MINOR_VERSION=MINOR_VERSION
+LIBXML_MICRO_VERSION=MICRO_VERSION
LIBXML_MICRO_VERSION_SUFFIX=
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION$LIBXML_MICRO_VERSION_SUFFIX
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
@@ -50,7 +55,7 @@ AC_SUBST(LIBXML_VERSION_EXTRA)
VERSION=${LIBXML_VERSION}
-AM_INIT_AUTOMAKE(libxml2, $VERSION)
+AM_INIT_AUTOMAKE([foreign])
# Support silent build rules, requires at least automake-1.11. Disable
# by either passing --disable-silent-rules to configure or passing V=1
@@ -1514,8 +1519,8 @@ else
AC_CHECK_HEADER(unicode/ucnv.h,
AC_MSG_CHECKING(for icu)
- AC_TRY_LINK([#include <unicode/ucnv.h>],[
- UConverter *utf = ucnv_open("UTF-8", NULL);],[
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <unicode/ucnv.h>]], [[
+ UConverter *utf = ucnv_open("UTF-8", NULL);]])],[
AC_MSG_RESULT(yes)
have_libicu=yes],[
AC_MSG_RESULT(no)
@@ -1526,8 +1531,8 @@ else
LDFLAGS="${LDFLAGS} ${ICU_LIBS}"
LIBS="${LIBS} -licucore"
- AC_TRY_LINK([#include <unicode/ucnv.h>],[
- UConverter *utf = ucnv_open("UTF-8", NULL);],[
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <unicode/ucnv.h>]], [[
+ UConverter *utf = ucnv_open("UTF-8", NULL);]])],[
AC_MSG_RESULT(yes)
have_libicu=yes
ICU_LIBS="${ICU_LIBS} -licucore"
diff --git a/error.c b/error.c
index 3e41e173..9ff1c2ba 100644
--- a/error.c
+++ b/error.c
@@ -557,6 +557,7 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel,
* of the usual "base" (doc->URL) for the node (bug 152623).
*/
xmlNodePtr prev = baseptr;
+ char *href = NULL;
int inclcount = 0;
while (prev != NULL) {
if (prev->prev == NULL)
@@ -564,21 +565,20 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel,
else {
prev = prev->prev;
if (prev->type == XML_XINCLUDE_START) {
- if (--inclcount < 0)
- break;
+ if (inclcount > 0) {
+ --inclcount;
+ } else {
+ href = (char *) xmlGetProp(prev, BAD_CAST "href");
+ if (href != NULL)
+ break;
+ }
} else if (prev->type == XML_XINCLUDE_END)
inclcount++;
}
}
- if (prev != NULL) {
- if (prev->type == XML_XINCLUDE_START) {
- prev->type = XML_ELEMENT_NODE;
- to->file = (char *) xmlGetProp(prev, BAD_CAST "href");
- prev->type = XML_XINCLUDE_START;
- } else {
- to->file = (char *) xmlGetProp(prev, BAD_CAST "href");
- }
- } else
+ if (href != NULL)
+ to->file = href;
+ else
#endif
to->file = (char *) xmlStrdup(baseptr->doc->URL);
if ((to->file == NULL) && (node != NULL) && (node->doc != NULL)) {
diff --git a/fuzz/.gitignore b/fuzz/.gitignore
index eecb4aea..02c74b11 100644
--- a/fuzz/.gitignore
+++ b/fuzz/.gitignore
@@ -1,13 +1,13 @@
corpus/
+genSeed
html
-htmlSeed
regexp
schema
-schemaSeed
seed/html*
-seed/xml*
seed/schema*
+seed/xml*
+seed/xpath*
testFuzzer
uri
xml
-xmlSeed
+xpath
diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am
index 9a1225db..49b95541 100644
--- a/fuzz/Makefile.am
+++ b/fuzz/Makefile.am
@@ -1,4 +1,5 @@
-EXTRA_PROGRAMS = html htmlSeed regexp uri schema schemaSeed xml xmlSeed
+AUTOMAKE_OPTIONS = -Wno-syntax
+EXTRA_PROGRAMS = genSeed html regexp schema uri xml xpath
check_PROGRAMS = testFuzzer
CLEANFILES = $(EXTRA_PROGRAMS)
AM_CPPFLAGS = -I$(top_srcdir)/include
@@ -6,41 +7,43 @@ DEPENDENCIES = $(top_builddir)/libxml2.la
LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) $(M_LIBS) $(WIN32_EXTRA_LIBADD)
XML_MAX_LEN = 80000
+# Single quotes to avoid wildcard expansion by the shell
XML_SEED_CORPUS_SRC = \
- $(top_srcdir)/test/* \
- $(top_srcdir)/test/errors/*.xml \
- $(top_srcdir)/test/errors10/*.xml \
- $(top_srcdir)/test/namespaces/* \
- $(top_srcdir)/test/valid/*.xml \
- $(top_srcdir)/test/xmlid/* \
- $(top_srcdir)/test/VC/* \
- $(top_srcdir)/test/VCM/*
+ '$(top_srcdir)/test/*' \
+ '$(top_srcdir)/test/errors/*.xml' \
+ '$(top_srcdir)/test/errors10/*.xml' \
+ '$(top_srcdir)/test/namespaces/*' \
+ '$(top_srcdir)/test/valid/*.xml' \
+ '$(top_srcdir)/test/VC/*' \
+ '$(top_srcdir)/test/VCM/*' \
+ '$(top_srcdir)/test/XInclude/docs/*' \
+ '$(top_srcdir)/test/xmlid/*'
testFuzzer_SOURCES = testFuzzer.c fuzz.c
-tests: testFuzzer$(EXEEXT)
+.PHONY: tests corpus clean-corpus
+
+corpus: seed/html.stamp seed/schema.stamp seed/xml.stamp seed/xpath.stamp
+
+tests: testFuzzer$(EXEEXT) corpus
@echo "## Running fuzzer tests"
@./testFuzzer$(EXEEXT)
-# XML fuzzer
+clean-corpus:
+ rm -rf seed/html.stamp seed/html
+ rm -rf seed/schema.stamp seed/schema
+ rm -rf seed/xml.stamp seed/xml
+ rm -rf seed/xpath.stamp seed/xpath
+
+# Seed corpus
-xmlSeed_SOURCES = xmlSeed.c fuzz.c
+genSeed_SOURCES = genSeed.c fuzz.c
-seed/xml.stamp: xmlSeed$(EXEEXT)
+# XML fuzzer
+
+seed/xml.stamp: genSeed$(EXEEXT)
@mkdir -p seed/xml
- @for i in $(XML_SEED_CORPUS_SRC); do \
- if [ -f $$i ]; then \
- echo Processing seed $$i; \
- base=$$(basename $$i) \
- outfile=$(abs_builddir)/seed/xml/$$base; \
- pushd $$(dirname $$i) >/dev/null; \
- $(abs_builddir)/xmlSeed$(EXEEXT) $$base > $$outfile; \
- popd >/dev/null; \
- if [ "$$(wc -c < $$outfile)" -gt $(XML_MAX_LEN) ]; then \
- rm $$outfile; \
- fi; \
- fi; \
- done
+ @./genSeed$(EXEEXT) xml $(XML_SEED_CORPUS_SRC)
@touch seed/xml.stamp
xml_SOURCES = xml.c fuzz.c
@@ -56,16 +59,9 @@ fuzz-xml: xml$(EXEEXT) seed/xml.stamp
# HTML fuzzer
-htmlSeed_SOURCES = htmlSeed.c fuzz.c
-
-seed/html.stamp: htmlSeed$(EXEEXT)
+seed/html.stamp: genSeed$(EXEEXT)
@mkdir -p seed/html
- @for i in $(top_srcdir)/test/HTML/*; do \
- if [ -f $$i ]; then \
- echo Processing seed $$i; \
- ./htmlSeed$(EXEEXT) $$i > seed/html/$$(basename $$i); \
- fi; \
- done
+ @./genSeed$(EXEEXT) html '$(top_srcdir)/test/HTML/*'
@touch seed/html.stamp
html_SOURCES = html.c fuzz.c
@@ -106,20 +102,9 @@ fuzz-uri: uri$(EXEEXT)
# XML Schema fuzzer
-schemaSeed_SOURCES = schemaSeed.c fuzz.c
-
-seed/schema.stamp: schemaSeed$(EXEEXT)
+seed/schema.stamp: genSeed$(EXEEXT)
@mkdir -p seed/schema
- @for i in ../test/schemas/*.xsd; do \
- if [ -f $$i ]; then \
- echo Processing seed $$i; \
- base=$$(basename $$i) \
- outfile=$(abs_builddir)/seed/schema/$$base; \
- pushd $$(dirname $$i) >/dev/null; \
- $(abs_builddir)/schemaSeed$(EXEEXT) $$base > $$outfile; \
- popd >/dev/null; \
- fi; \
- done
+ @./genSeed$(EXEEXT) schema '$(top_srcdir)/test/schemas/*.xsd'
@touch seed/schema.stamp
schema_SOURCES = schema.c fuzz.c
@@ -133,3 +118,21 @@ fuzz-schema: schema$(EXEEXT) seed/schema.stamp
-timeout=20 \
corpus/schema seed/schema
+# XPath fuzzer
+
+seed/xpath.stamp: genSeed$(EXEEXT)
+ @mkdir -p seed/xpath
+ @./genSeed$(EXEEXT) xpath "$(top_builddir)/test/XPath"
+ @touch seed/xpath.stamp
+
+xpath_SOURCES = xpath.c fuzz.c
+xpath_LDFLAGS = -fsanitize=fuzzer
+
+fuzz-xpath: xpath$(EXEEXT) seed/xpath.stamp
+ @mkdir -p corpus/xpath
+ ./xpath$(EXEEXT) \
+ -dict=xpath.dict \
+ -max_len=10000 \
+ -timeout=20 \
+ corpus/xpath seed/xpath
+
diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c
index ba7c9cad..b5dfa185 100644
--- a/fuzz/fuzz.c
+++ b/fuzz/fuzz.c
@@ -4,8 +4,11 @@
* See Copyright for the status of this software.
*/
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/stat.h>
+
#include <libxml/hash.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
@@ -69,11 +72,6 @@ xmlFuzzDataInit(const char *data, size_t size) {
fuzzData.mainEntity = NULL;
}
-static void
-xmlFreeEntityEntry(void *value, const xmlChar *name) {
- xmlFree(value);
-}
-
/**
* xmlFuzzDataFree:
*
@@ -82,7 +80,7 @@ xmlFreeEntityEntry(void *value, const xmlChar *name) {
void
xmlFuzzDataCleanup(void) {
xmlFree(fuzzData.outBuf);
- xmlHashFree(fuzzData.entities, xmlFreeEntityEntry);
+ xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator);
}
/**
@@ -122,20 +120,24 @@ xmlFuzzReadRemaining(size_t *size) {
}
/*
- * Write a random-length string to stdout in a format similar to
+ * xmlFuzzWriteString:
+ * @out: output file
+ * @str: string to write
+ *
+ * Write a random-length string to file in a format similar to
* FuzzedDataProvider. Backslash followed by newline marks the end of the
* string. Two backslashes are used to escape a backslash.
*/
-static void
-xmlFuzzWriteString(const char *str) {
+void
+xmlFuzzWriteString(FILE *out, const char *str) {
for (; *str; str++) {
int c = (unsigned char) *str;
- putchar(c);
+ putc(c, out);
if (c == '\\')
- putchar(c);
+ putc(c, out);
}
- putchar('\\');
- putchar('\n');
+ putc('\\', out);
+ putc('\n', out);
}
/**
@@ -150,7 +152,7 @@ xmlFuzzWriteString(const char *str) {
*
* Returns a zero-terminated string or NULL if the fuzz data is exhausted.
*/
-static const char *
+const char *
xmlFuzzReadString(size_t *size) {
const char *out = fuzzData.outPtr;
@@ -186,47 +188,6 @@ xmlFuzzReadString(size_t *size) {
return(NULL);
}
-/*
- * A custom entity loader that writes all external DTDs or entities to a
- * single file in the format expected by xmlFuzzEntityLoader.
- */
-xmlParserInputPtr
-xmlFuzzEntityRecorder(const char *URL, const char *ID,
- xmlParserCtxtPtr ctxt) {
- xmlParserInputPtr in;
- static const int chunkSize = 16384;
- int len;
-
- in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
- if (in == NULL)
- return(NULL);
-
- if (fuzzData.entities == NULL) {
- fuzzData.entities = xmlHashCreate(4);
- } else if (xmlHashLookup(fuzzData.entities,
- (const xmlChar *) URL) != NULL) {
- return(in);
- }
-
- do {
- len = xmlParserInputBufferGrow(in->buf, chunkSize);
- if (len < 0) {
- fprintf(stderr, "Error reading %s\n", URL);
- xmlFreeInputStream(in);
- return(NULL);
- }
- } while (len > 0);
-
- xmlFuzzWriteString(URL);
- xmlFuzzWriteString((char *) xmlBufContent(in->buf->buffer));
-
- xmlFreeInputStream(in);
-
- xmlHashAddEntry(fuzzData.entities, (const xmlChar *) URL, NULL);
-
- return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
-}
-
/**
* xmlFuzzReadEntities:
*
@@ -357,3 +318,32 @@ xmlFuzzExtractStrings(const char *data, size_t size, char **strings,
return(ret);
}
+char *
+xmlSlurpFile(const char *path, size_t *sizeRet) {
+ FILE *file;
+ struct stat statbuf;
+ char *data;
+ size_t size;
+
+ if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
+ return(NULL);
+ size = statbuf.st_size;
+ file = fopen(path, "rb");
+ if (file == NULL)
+ return(NULL);
+ data = xmlMalloc(size + 1);
+ if (data != NULL) {
+ if (fread(data, 1, size, file) != size) {
+ xmlFree(data);
+ data = NULL;
+ } else {
+ data[size] = 0;
+ if (sizeRet != NULL)
+ *sizeRet = size;
+ }
+ }
+ fclose(file);
+
+ return(data);
+}
+
diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h
index 7e7fc29c..8716af93 100644
--- a/fuzz/fuzz.h
+++ b/fuzz/fuzz.h
@@ -8,6 +8,7 @@
#define __XML_FUZZERCOMMON_H__
#include <stddef.h>
+#include <stdio.h>
#include <libxml/parser.h>
#ifdef __cplusplus
@@ -36,8 +37,11 @@ xmlFuzzReadInt(void);
const char *
xmlFuzzReadRemaining(size_t *size);
-xmlParserInputPtr
-xmlFuzzEntityRecorder(const char *URL, const char *ID, xmlParserCtxtPtr ctxt);
+void
+xmlFuzzWriteString(FILE *out, const char *str);
+
+const char *
+xmlFuzzReadString(size_t *size);
void
xmlFuzzReadEntities(void);
@@ -55,6 +59,9 @@ size_t
xmlFuzzExtractStrings(const char *data, size_t size, char **strings,
size_t numStrings);
+char *
+xmlSlurpFile(const char *path, size_t *size);
+
#ifdef __cplusplus
}
#endif
diff --git a/fuzz/genSeed.c b/fuzz/genSeed.c
new file mode 100644
index 00000000..68fb87a1
--- /dev/null
+++ b/fuzz/genSeed.c
@@ -0,0 +1,407 @@
+/*
+ * xmlSeed.c: Generate the XML seed corpus for fuzzing.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <glob.h>
+#include <libgen.h>
+#include <sys/stat.h>
+
+#ifdef _WIN32
+#include <direct.h>
+#else
+#include <unistd.h>
+#endif
+
+#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
+#include <libxml/HTMLparser.h>
+#include <libxml/xinclude.h>
+#include <libxml/xmlschemas.h>
+#include "fuzz.h"
+
+#define PATH_SIZE 500
+#define SEED_BUF_SIZE 16384
+#define EXPR_SIZE 4500
+
+typedef int
+(*fileFunc)(const char *base, FILE *out);
+
+typedef int
+(*mainFunc)(const char *arg);
+
+static struct {
+ FILE *out;
+ xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
+ xmlExternalEntityLoader oldLoader;
+ fileFunc processFile;
+ const char *fuzzer;
+ int counter;
+ char cwd[PATH_SIZE];
+} globalData;
+
+/*
+ * A custom entity loader that writes all external DTDs or entities to a
+ * single file in the format expected by xmlFuzzEntityLoader.
+ */
+static xmlParserInputPtr
+fuzzEntityRecorder(const char *URL, const char *ID,
+ xmlParserCtxtPtr ctxt) {
+ xmlParserInputPtr in;
+ static const int chunkSize = 16384;
+ int len;
+
+ in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
+ if (in == NULL)
+ return(NULL);
+
+ if (globalData.entities == NULL) {
+ globalData.entities = xmlHashCreate(4);
+ } else if (xmlHashLookup(globalData.entities,
+ (const xmlChar *) URL) != NULL) {
+ return(in);
+ }
+
+ do {
+ len = xmlParserInputBufferGrow(in->buf, chunkSize);
+ if (len < 0) {
+ fprintf(stderr, "Error reading %s\n", URL);
+ xmlFreeInputStream(in);
+ return(NULL);
+ }
+ } while (len > 0);
+
+ xmlFuzzWriteString(globalData.out, URL);
+ xmlFuzzWriteString(globalData.out,
+ (char *) xmlBufContent(in->buf->buffer));
+
+ xmlFreeInputStream(in);
+
+ xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, NULL);
+
+ return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
+}
+
+static void
+fuzzRecorderInit(FILE *out) {
+ globalData.out = out;
+ globalData.entities = xmlHashCreate(8);
+ globalData.oldLoader = xmlGetExternalEntityLoader();
+ xmlSetExternalEntityLoader(fuzzEntityRecorder);
+}
+
+static void
+fuzzRecorderCleanup() {
+ xmlSetExternalEntityLoader(globalData.oldLoader);
+ xmlHashFree(globalData.entities, xmlHashDefaultDeallocator);
+ globalData.out = NULL;
+ globalData.entities = NULL;
+ globalData.oldLoader = NULL;
+}
+
+static int
+processXml(const char *docFile, FILE *out) {
+ int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
+ xmlDocPtr doc;
+
+ fwrite(&opts, sizeof(opts), 1, out);
+
+ fuzzRecorderInit(out);
+
+ doc = xmlReadFile(docFile, NULL, opts);
+ xmlXIncludeProcessFlags(doc, opts);
+ xmlFreeDoc(doc);
+
+ fuzzRecorderCleanup();
+
+ return(0);
+}
+
+static int
+processHtml(const char *docFile, FILE *out) {
+ char buf[SEED_BUF_SIZE];
+ FILE *file;
+ size_t size;
+ int opts = 0;
+
+ fwrite(&opts, sizeof(opts), 1, out);
+
+ /* Copy file */
+ file = fopen(docFile, "rb");
+ if (file == NULL) {
+ fprintf(stderr, "couldn't open %s\n", docFile);
+ return(0);
+ }
+ do {
+ size = fread(buf, 1, SEED_BUF_SIZE, file);
+ if (size > 0)
+ fwrite(buf, 1, size, out);
+ } while (size == SEED_BUF_SIZE);
+ fclose(file);
+
+ return(0);
+}
+
+static int
+processSchema(const char *docFile, FILE *out) {
+ xmlSchemaPtr schema;
+ xmlSchemaParserCtxtPtr pctxt;
+
+ fuzzRecorderInit(out);
+
+ pctxt = xmlSchemaNewParserCtxt(docFile);
+ xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
+ schema = xmlSchemaParse(pctxt);
+ xmlSchemaFreeParserCtxt(pctxt);
+ xmlSchemaFree(schema);
+
+ fuzzRecorderCleanup();
+
+ return(0);
+}
+
+static int
+processPattern(const char *pattern) {
+ glob_t globbuf;
+ int ret = 0;
+ int res, i;
+
+ res = glob(pattern, 0, NULL, &globbuf);
+ if (res == GLOB_NOMATCH)
+ return(0);
+ if (res != 0) {
+ fprintf(stderr, "couldn't match pattern %s\n", pattern);
+ return(-1);
+ }
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ struct stat statbuf;
+ char outPath[PATH_SIZE];
+ char *dirBuf = NULL;
+ char *baseBuf = NULL;
+ const char *path, *dir, *base;
+ FILE *out = NULL;
+ int dirChanged = 0;
+ size_t size;
+
+ path = globbuf.gl_pathv[i];
+
+ if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
+ continue;
+
+ dirBuf = (char *) xmlCharStrdup(path);
+ baseBuf = (char *) xmlCharStrdup(path);
+ if ((dirBuf == NULL) || (baseBuf == NULL)) {
+ fprintf(stderr, "memory allocation failed\n");
+ ret = -1;
+ goto error;
+ }
+ dir = dirname(dirBuf);
+ base = basename(baseBuf);
+
+ size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
+ globalData.fuzzer, base);
+ if (size >= PATH_SIZE) {
+ fprintf(stderr, "creating path failed\n");
+ ret = -1;
+ goto error;
+ }
+ out = fopen(outPath, "wb");
+ if (out == NULL) {
+ fprintf(stderr, "couldn't open %s for writing\n", outPath);
+ ret = -1;
+ goto error;
+ }
+ if (chdir(dir) != 0) {
+ fprintf(stderr, "couldn't chdir to %s\n", dir);
+ ret = -1;
+ goto error;
+ }
+ dirChanged = 1;
+ if (globalData.processFile(base, out) != 0)
+ ret = -1;
+
+error:
+ if (out != NULL)
+ fclose(out);
+ xmlFree(dirBuf);
+ xmlFree(baseBuf);
+ if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
+ fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
+ ret = -1;
+ break;
+ }
+ }
+
+ globfree(&globbuf);
+ return(ret);
+}
+
+static int
+processXPath(const char *testDir, const char *prefix, const char *name,
+ const char *data, const char *subdir, int xptr) {
+ char pattern[PATH_SIZE];
+ glob_t globbuf;
+ size_t i, size;
+ int ret = 0, res;
+
+ size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
+ testDir, subdir, prefix);
+ if (size >= PATH_SIZE)
+ return(-1);
+ res = glob(pattern, 0, NULL, &globbuf);
+ if (res == GLOB_NOMATCH)
+ return(0);
+ if (res != 0) {
+ fprintf(stderr, "couldn't match pattern %s\n", pattern);
+ return(-1);
+ }
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ char *path = globbuf.gl_pathv[i];
+ struct stat statbuf;
+ FILE *in;
+ char expr[EXPR_SIZE];
+
+ if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
+ continue;
+
+ in = fopen(path, "rb");
+ if (in == NULL) {
+ ret = -1;
+ continue;
+ }
+
+ while (fgets(expr, EXPR_SIZE, in) > 0) {
+ char outPath[PATH_SIZE];
+ FILE *out;
+ int j;
+
+ for (j = 0; expr[j] != 0; j++)
+ if (expr[j] == '\r' || expr[j] == '\n')
+ break;
+ expr[j] = 0;
+
+ size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
+ name, globalData.counter);
+ if (size >= PATH_SIZE) {
+ ret = -1;
+ continue;
+ }
+ out = fopen(outPath, "wb");
+ if (out == NULL) {
+ ret = -1;
+ continue;
+ }
+
+ if (xptr) {
+ xmlFuzzWriteString(out, expr);
+ } else {
+ char xptrExpr[EXPR_SIZE+100];
+
+ /* Wrap XPath expressions as XPointer */
+ snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
+ xmlFuzzWriteString(out, xptrExpr);
+ }
+
+ xmlFuzzWriteString(out, data);
+
+ fclose(out);
+ globalData.counter++;
+ }
+
+ fclose(in);
+ }
+
+ globfree(&globbuf);
+
+ return(ret);
+}
+
+int
+processXPathDir(const char *testDir) {
+ char pattern[PATH_SIZE];
+ glob_t globbuf;
+ size_t i, size;
+ int ret = 0;
+
+ globalData.counter = 1;
+ if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
+ ret = -1;
+
+ size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
+ if (size >= PATH_SIZE)
+ return(1);
+ if (glob(pattern, 0, NULL, &globbuf) != 0)
+ return(1);
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ char *path = globbuf.gl_pathv[i];
+ char *data;
+ const char *docFile;
+
+ data = xmlSlurpFile(path, NULL);
+ if (data == NULL) {
+ ret = -1;
+ continue;
+ }
+ docFile = basename(path);
+
+ globalData.counter = 1;
+ if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
+ ret = -1;
+ if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
+ ret = -1;
+
+ xmlFree(data);
+ }
+
+ globfree(&globbuf);
+
+ return(ret);
+}
+
+int
+main(int argc, const char **argv) {
+ mainFunc processArg = processPattern;
+ const char *fuzzer;
+ int ret = 0;
+ int xpath = 0;
+ int i;
+
+ if (argc < 3) {
+ fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
+ return(1);
+ }
+
+ xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
+
+ fuzzer = argv[1];
+ if (strcmp(fuzzer, "html") == 0) {
+ globalData.processFile = processHtml;
+ } else if (strcmp(fuzzer, "schema") == 0) {
+ globalData.processFile = processSchema;
+ } else if (strcmp(fuzzer, "xml") == 0) {
+ globalData.processFile = processXml;
+ } else if (strcmp(fuzzer, "xpath") == 0) {
+ processArg = processXPathDir;
+ } else {
+ fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
+ return(1);
+ }
+ globalData.fuzzer = fuzzer;
+
+ if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
+ fprintf(stderr, "couldn't get current directory\n");
+ return(1);
+ }
+
+ for (i = 2; i < argc; i++)
+ processArg(argv[i]);
+
+ return(ret);
+}
+
diff --git a/fuzz/htmlSeed.c b/fuzz/htmlSeed.c
deleted file mode 100644
index f3213e2e..00000000
--- a/fuzz/htmlSeed.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * htmlSeed.c: Generate the HTML seed corpus for fuzzing.
- *
- * See Copyright for the status of this software.
- */
-
-#include <stdio.h>
-
-#define SEED_BUF_SIZE 16384
-
-int
-main(int argc, char **argv) {
- int opts = 0;
- FILE *file;
- char buf[SEED_BUF_SIZE];
- size_t size;
-
- if (argc != 2) {
- fprintf(stderr, "Usage: htmlSeed [FILE]\n");
- return(1);
- }
-
- fwrite(&opts, sizeof(opts), 1, stdout);
-
- /* Copy file */
- file = fopen(argv[1], "rb");
- do {
- size = fread(buf, 1, SEED_BUF_SIZE, file);
- if (size > 0)
- fwrite(buf, 1, size, stdout);
- } while (size == SEED_BUF_SIZE);
- fclose(file);
-
- return(0);
-}
-
diff --git a/fuzz/schemaSeed.c b/fuzz/schemaSeed.c
deleted file mode 100644
index 4e2c6bc6..00000000
--- a/fuzz/schemaSeed.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * xmlSeed.c: Generate the XML seed corpus for fuzzing.
- *
- * See Copyright for the status of this software.
- */
-
-#include <stdio.h>
-#include <libxml/xmlschemas.h>
-#include "fuzz.h"
-
-int
-main(int argc, char **argv) {
- xmlSchemaPtr schema;
- xmlSchemaParserCtxtPtr pctxt;
-
- if (argc != 2) {
- fprintf(stderr, "Usage: schemaSeed [XSD]\n");
- return(1);
- }
-
- xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
- xmlSetExternalEntityLoader(xmlFuzzEntityRecorder);
-
- pctxt = xmlSchemaNewParserCtxt(argv[1]);
- xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
- schema = xmlSchemaParse(pctxt);
- xmlSchemaFreeParserCtxt(pctxt);
-
- xmlSchemaFree(schema);
- xmlFuzzDataCleanup();
-
- return(0);
-}
-
diff --git a/fuzz/testFuzzer.c b/fuzz/testFuzzer.c
index f6be7b8f..678f3243 100644
--- a/fuzz/testFuzzer.c
+++ b/fuzz/testFuzzer.c
@@ -6,13 +6,93 @@
*/
#include <string.h>
+#include <glob.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlstring.h>
#include "fuzz.h"
-int
-main() {
+#define LLVMFuzzerInitialize fuzzHtmlInit
+#define LLVMFuzzerTestOneInput fuzzHtml
+#include "html.c"
+#undef LLVMFuzzerInitialize
+#undef LLVMFuzzerTestOneInput
+
+#define LLVMFuzzerInitialize fuzzRegexpInit
+#define LLVMFuzzerTestOneInput fuzzRegexp
+#include "regexp.c"
+#undef LLVMFuzzerInitialize
+#undef LLVMFuzzerTestOneInput
+
+#define LLVMFuzzerInitialize fuzzSchemaInit
+#define LLVMFuzzerTestOneInput fuzzSchema
+#include "schema.c"
+#undef LLVMFuzzerInitialize
+#undef LLVMFuzzerTestOneInput
+
+#define LLVMFuzzerInitialize fuzzUriInit
+#define LLVMFuzzerTestOneInput fuzzUri
+#include "uri.c"
+#undef LLVMFuzzerInitialize
+#undef LLVMFuzzerTestOneInput
+
+#define LLVMFuzzerInitialize fuzzXmlInit
+#define LLVMFuzzerTestOneInput fuzzXml
+#include "xml.c"
+#undef LLVMFuzzerInitialize
+#undef LLVMFuzzerTestOneInput
+
+#define LLVMFuzzerInitialize fuzzXPathInit
+#define LLVMFuzzerTestOneInput fuzzXPath
+#include "xpath.c"
+#undef LLVMFuzzerInitialize
+#undef LLVMFuzzerTestOneInput
+
+typedef int
+(*initFunc)(int *argc, char ***argv);
+typedef int
+(*fuzzFunc)(const char *data, size_t size);
+
+int numInputs;
+
+static int
+testFuzzer(initFunc init, fuzzFunc fuzz, const char *pattern) {
+ glob_t globbuf;
+ int ret = -1;
+ int i;
+
+ if (glob(pattern, 0, NULL, &globbuf) != 0) {
+ fprintf(stderr, "pattern %s matches no files\n", pattern);
+ return(-1);
+ }
+
+ if (init != NULL)
+ init(NULL, NULL);
+
+ for (i = 0; i < globbuf.gl_pathc; i++) {
+ const char *path = globbuf.gl_pathv[i];
+ char *data;
+ size_t size;
+
+ data = xmlSlurpFile(path, &size);
+ if (data == NULL) {
+ fprintf(stderr, "couldn't read %s\n", path);
+ goto error;
+ }
+ fuzz(data, size);
+ xmlFree(data);
+
+ numInputs++;
+ }
+
+ ret = 0;
+error:
+ globfree(&globbuf);
+ return(ret);
+}
+
+static int
+testEntityLoader() {
static const char data[] =
"doc.xml\\\n"
"<!DOCTYPE doc SYSTEM \"doc.dtd\">\n"
@@ -53,3 +133,28 @@ main() {
return(ret);
}
+int
+main() {
+ int ret = 0;
+
+ if (testEntityLoader() != 0)
+ ret = 1;
+ if (testFuzzer(fuzzHtmlInit, fuzzHtml, "seed/html/*") != 0)
+ ret = 1;
+ if (testFuzzer(fuzzRegexpInit, fuzzRegexp, "seed/regexp/*") != 0)
+ ret = 1;
+ if (testFuzzer(fuzzSchemaInit, fuzzSchema, "seed/schema/*") != 0)
+ ret = 1;
+ if (testFuzzer(NULL, fuzzUri, "seed/uri/*") != 0)
+ ret = 1;
+ if (testFuzzer(fuzzXmlInit, fuzzXml, "seed/xml/*") != 0)
+ ret = 1;
+ if (testFuzzer(fuzzXPathInit, fuzzXPath, "seed/xpath/*") != 0)
+ ret = 1;
+
+ if (ret == 0)
+ printf("Successfully tested %d inputs\n", numInputs);
+
+ return(ret);
+}
+
diff --git a/fuzz/xml.c b/fuzz/xml.c
index 50dd967d..09867cf7 100644
--- a/fuzz/xml.c
+++ b/fuzz/xml.c
@@ -7,6 +7,7 @@
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xmlerror.h>
+#include <libxml/xinclude.h>
#include <libxml/xmlreader.h>
#include "fuzz.h"
@@ -27,7 +28,7 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
xmlParserCtxtPtr ctxt;
xmlTextReaderPtr reader;
xmlChar *out;
- const char *docBuffer;
+ const char *docBuffer, *docUrl;
size_t docSize, consumed, chunkSize;
int opts, outSize;
@@ -38,6 +39,7 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
xmlFuzzReadEntities();
docBuffer = xmlFuzzMainEntity(&docSize);
+ docUrl = xmlFuzzMainUrl();
if (docBuffer == NULL) {
xmlFuzzDataCleanup();
return(0);
@@ -45,7 +47,9 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
/* Pull parser */
- doc = xmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
+ doc = xmlReadMemory(docBuffer, docSize, docUrl, NULL, opts);
+ if (opts & XML_PARSE_XINCLUDE)
+ xmlXIncludeProcessFlags(doc, opts);
/* Also test the serializer. */
xmlDocDumpMemory(doc, &out, &outSize);
xmlFree(out);
@@ -53,7 +57,7 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
/* Push parser */
- ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
+ ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, docUrl);
xmlCtxtUseOptions(ctxt, opts);
for (consumed = 0; consumed < docSize; consumed += chunkSize) {
@@ -64,6 +68,8 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) {
}
xmlParseChunk(ctxt, NULL, 0, 1);
+ if (opts & XML_PARSE_XINCLUDE)
+ xmlXIncludeProcessFlags(ctxt->myDoc, opts);
xmlFreeDoc(ctxt->myDoc);
xmlFreeParserCtxt(ctxt);
diff --git a/fuzz/xmlSeed.c b/fuzz/xmlSeed.c
deleted file mode 100644
index 5ce97d0b..00000000
--- a/fuzz/xmlSeed.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * xmlSeed.c: Generate the XML seed corpus for fuzzing.
- *
- * See Copyright for the status of this software.
- */
-
-#include <stdio.h>
-#include "fuzz.h"
-
-int
-main(int argc, char **argv) {
- int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
-
- if (argc != 2) {
- fprintf(stderr, "Usage: xmlSeed [FILE]\n");
- return(1);
- }
-
- fwrite(&opts, sizeof(opts), 1, stdout);
-
- xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
- xmlSetExternalEntityLoader(xmlFuzzEntityRecorder);
- xmlFreeDoc(xmlReadFile(argv[1], NULL, opts));
- xmlFuzzDataCleanup();
-
- return(0);
-}
-
diff --git a/fuzz/xpath.c b/fuzz/xpath.c
new file mode 100644
index 00000000..767acb98
--- /dev/null
+++ b/fuzz/xpath.c
@@ -0,0 +1,48 @@
+/*
+ * xpath.c: a libFuzzer target to test XPath and XPointer expressions.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <libxml/parser.h>
+#include <libxml/xpointer.h>
+#include "fuzz.h"
+
+int
+LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
+ char ***argv ATTRIBUTE_UNUSED) {
+ xmlInitParser();
+ xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
+
+ return 0;
+}
+
+int
+LLVMFuzzerTestOneInput(const char *data, size_t size) {
+ xmlDocPtr doc;
+ const char *expr, *xml;
+ size_t exprSize, xmlSize;
+
+ xmlFuzzDataInit(data, size);
+
+ expr = xmlFuzzReadString(&exprSize);
+ xml = xmlFuzzReadString(&xmlSize);
+
+ /* Recovery mode allows more input to be fuzzed. */
+ doc = xmlReadMemory(xml, xmlSize, NULL, NULL, XML_PARSE_RECOVER);
+ if (doc != NULL) {
+ xmlXPathContextPtr xpctxt = xmlXPathNewContext(doc);
+
+ /* Operation limit to avoid timeout */
+ xpctxt->opLimit = 500000;
+
+ xmlXPathFreeObject(xmlXPtrEval(BAD_CAST expr, xpctxt));
+ xmlXPathFreeContext(xpctxt);
+ }
+ xmlFreeDoc(doc);
+
+ xmlFuzzDataCleanup();
+
+ return(0);
+}
+
diff --git a/fuzz/xpath.dict b/fuzz/xpath.dict
new file mode 100644
index 00000000..4fe375fb
--- /dev/null
+++ b/fuzz/xpath.dict
@@ -0,0 +1,94 @@
+# XML
+
+elem_a="<a></a>"
+elem_b="<b></b>"
+elem_c="<c></c>"
+elem_d="<d></d>"
+elem_empty="<a/>"
+elem_ns_a="<a:a xmlns:a='a'></a:a>"
+elem_ns_b="<b:b xmlns:b='b'></b:b>"
+
+attr_a=" a='a'"
+attr_b=" b='b'"
+
+ns_decl=" xmlns:a='a'"
+ns_default=" xmlns='a'"
+ns_prefix_a="a:"
+ns_prefix_b="b:"
+
+cdata_section="<![CDATA[ ]]>"
+
+comment="<!-- -->"
+
+pi="<?a?>"
+
+# XPath
+
+axis_ancestor="ancestor::"
+axis_ancestor_or_self="ancestor-or-self::"
+axis_attribute="attribute::"
+axis_attribute_abbrev="@"
+axis_child="child::"
+axis_descendant="descendant::"
+axis_descendant_or_self="descendant-or-self::"
+axis_following="following::"
+axis_following_sibling="following-sibling::"
+axis_namespace="namespace::"
+axis_parent="parent::"
+axis_preceding="preceding::"
+axis_preceding_siblings="preceding-sibling::"
+axis_self="self::"
+
+node_test_ns="a:"
+
+val_num="=(1.0)"
+val_str_sq="=('a')"
+val_str_dq="=(\"a\")"
+val_node_set="=(*)"
+val_elem="=(b)"
+
+step_root="/"
+step_descendant="//"
+step_any="//*"
+step_any_l="*//"
+step_elem="//b"
+step_ns_elem="//a:a"
+step_comment="//comment()"
+step_node="//node()"
+step_node_l="node()//"
+step_pi="//processing-instruction()"
+step_text="//text()"
+step_parent="../"
+
+op_plus="+1"
+op_minus=" - 1"
+op_neg="-"
+op_mul="*1"
+op_div=" div 1"
+op_mod=" mod 1"
+op_and=" and 1"
+op_or=" or 1"
+op_ne="!=1"
+op_lt="<1"
+op_gt=">1"
+op_le="<=1"
+op_ge=">=1"
+op_predicate_num="[1]"
+op_predicate_last="[last()]"
+op_predicate_str="['a']"
+op_predicate="[1=1]"
+op_arg_num=",1"
+op_arg_str=",'a'"
+op_arg_node=",*"
+op_union="|//b"
+
+var_num="=$f"
+var_bool="=$b"
+var_str="=$s"
+var_node_set="=$n"
+
+# Unicode
+
+utf8_2="\xC3\x84"
+utf8_3="\xE2\x80\x9C"
+utf8_4="\xF0\x9F\x98\x80"
diff --git a/fuzz/xpath.options b/fuzz/xpath.options
new file mode 100644
index 00000000..02d5e976
--- /dev/null
+++ b/fuzz/xpath.options
@@ -0,0 +1,3 @@
+[libfuzzer]
+max_len = 10000
+timeout = 20
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 3020b20c..1c86a97c 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -231,7 +231,7 @@ struct _xmlParserCtxt {
int nameMax; /* Max depth of the parsing stack */
const xmlChar * *nameTab; /* array of nodes */
- long nbChars; /* number of xmlChar processed */
+ long nbChars; /* unused */
long checkIndex; /* used by progressive parsing lookup */
int keepBlanks; /* ugly but ... */
int disableSAX; /* SAX callbacks are disabled */
diff --git a/parser.c b/parser.c
index e1d139d5..be14c322 100644
--- a/parser.c
+++ b/parser.c
@@ -1073,11 +1073,15 @@ xmlHasFeature(xmlFeature feature)
*/
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
+ xmlSAXHandlerPtr sax;
if (ctxt == NULL) return;
+ sax = ctxt->sax;
#ifdef LIBXML_SAX1_ENABLED
- if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
- ((ctxt->sax->startElementNs != NULL) ||
- (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
+ if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
+ ((sax->startElementNs != NULL) ||
+ (sax->endElementNs != NULL) ||
+ ((sax->startElement == NULL) && (sax->endElement == NULL))))
+ ctxt->sax2 = 1;
#else
ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
@@ -2055,7 +2059,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
((unsigned char *) s)[ 9 ] == c10 )
#define SKIP(val) do { \
- ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
+ ctxt->input->cur += (val),ctxt->input->col+=(val); \
if (*ctxt->input->cur == 0) \
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
} while (0)
@@ -2066,7 +2070,6 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
- ctxt->nbChars++; \
ctxt->input->cur++; \
} \
if (*ctxt->input->cur == 0) \
@@ -2119,7 +2122,6 @@ static void xmlGROW (xmlParserCtxtPtr ctxt) {
#define NEXT1 { \
ctxt->input->col++; \
ctxt->input->cur++; \
- ctxt->nbChars++; \
if (*ctxt->input->cur == 0) \
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
}
@@ -2332,7 +2334,6 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
ctxt->input->col++;
- ctxt->nbChars ++;
ctxt->input->cur++;
}
} else if ((RAW == '&') && (NXT(1) == '#')) {
@@ -2361,7 +2362,6 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
ctxt->input->col++;
- ctxt->nbChars ++;
ctxt->input->cur++;
}
} else {
@@ -3333,7 +3333,6 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
}
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
ctxt->input->cur = in;
- ctxt->nbChars += count;
ctxt->input->col += count;
if (ret == NULL)
xmlErrMemory(ctxt, NULL);
@@ -3456,7 +3455,6 @@ xmlParseNCName(xmlParserCtxtPtr ctxt) {
}
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
ctxt->input->cur = in;
- ctxt->nbChars += count;
ctxt->input->col += count;
if (ret == NULL) {
xmlErrMemory(ctxt, NULL);
@@ -3493,10 +3491,10 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
while (*in != 0 && *in == *cmp) {
++in;
++cmp;
- ctxt->input->col++;
}
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
/* success */
+ ctxt->input->col += in - ctxt->input->cur;
ctxt->input->cur = in;
return (const xmlChar*) 1;
}
@@ -8826,6 +8824,7 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
}
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
/* success */
+ ctxt->input->col += in - ctxt->input->cur;
ctxt->input->cur = in;
return((const xmlChar*) 1);
}
@@ -14696,7 +14695,12 @@ xmlCleanupParser(void) {
static void
ATTRIBUTE_DESTRUCTOR
xmlDestructor(void) {
- xmlCleanupParser();
+ /*
+ * Calling custom deallocation functions in a destructor can cause
+ * problems, for example with Nokogiri.
+ */
+ if (xmlFree == free)
+ xmlCleanupParser();
}
#endif
@@ -14788,7 +14792,6 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
ctxt->vctxt.warning = xmlParserValidityWarning;
#endif
ctxt->record_info = 0;
- ctxt->nbChars = 0;
ctxt->checkIndex = 0;
ctxt->inSubset = 0;
ctxt->errNo = XML_ERR_OK;
diff --git a/parserInternals.c b/parserInternals.c
index d849c08b..b0629ef3 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -519,8 +519,6 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
} else
/* 1-byte code */
ctxt->input->cur++;
-
- ctxt->nbChars++;
} else {
/*
* Assume it's a fixed length encoding (1) with
@@ -533,7 +531,6 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
} else
ctxt->input->col++;
ctxt->input->cur++;
- ctxt->nbChars++;
}
if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
@@ -677,7 +674,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
}
if (*ctxt->input->cur == 0xD) {
if (ctxt->input->cur[1] == 0xA) {
- ctxt->nbChars++;
ctxt->input->cur++;
}
return(0xA);
@@ -693,7 +689,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
*len = 1;
if (*ctxt->input->cur == 0xD) {
if (ctxt->input->cur[1] == 0xA) {
- ctxt->nbChars++;
ctxt->input->cur++;
}
return(0xA);
@@ -1748,7 +1743,6 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
ctxt->options |= XML_PARSE_NOENT;
}
ctxt->record_info = 0;
- ctxt->nbChars = 0;
ctxt->checkIndex = 0;
ctxt->inSubset = 0;
ctxt->errNo = XML_ERR_OK;
diff --git a/result/XInclude/fallback3.xml b/result/XInclude/fallback3.xml
new file mode 100644
index 00000000..b4235514
--- /dev/null
+++ b/result/XInclude/fallback3.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<a>
+ <doc xml:base="../ents/something.xml">
+<p>something</p>
+<p>really</p>
+<p>simple</p>
+</doc>
+</a>
diff --git a/result/XInclude/fallback3.xml.err b/result/XInclude/fallback3.xml.err
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/result/XInclude/fallback3.xml.err
diff --git a/result/XInclude/fallback3.xml.rdr b/result/XInclude/fallback3.xml.rdr
new file mode 100644
index 00000000..aa2f1374
--- /dev/null
+++ b/result/XInclude/fallback3.xml.rdr
@@ -0,0 +1,25 @@
+0 1 a 0 0
+1 14 #text 0 1
+
+1 1 doc 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 something
+2 15 p 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 really
+2 15 p 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 simple
+2 15 p 0 0
+2 14 #text 0 1
+
+1 15 doc 0 0
+1 14 #text 0 1
+
+0 15 a 0 0
diff --git a/result/XInclude/fallback4.xml b/result/XInclude/fallback4.xml
new file mode 100644
index 00000000..9883fd54
--- /dev/null
+++ b/result/XInclude/fallback4.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<a>
+
+ <doc xml:base="../ents/something.xml">
+<p>something</p>
+<p>really</p>
+<p>simple</p>
+</doc>
+
+</a>
diff --git a/result/XInclude/fallback4.xml.err b/result/XInclude/fallback4.xml.err
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/result/XInclude/fallback4.xml.err
diff --git a/result/XInclude/fallback4.xml.rdr b/result/XInclude/fallback4.xml.rdr
new file mode 100644
index 00000000..628b9513
--- /dev/null
+++ b/result/XInclude/fallback4.xml.rdr
@@ -0,0 +1,29 @@
+0 1 a 0 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 doc 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 something
+2 15 p 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 really
+2 15 p 0 0
+2 14 #text 0 1
+
+2 1 p 0 0
+3 3 #text 0 1 simple
+2 15 p 0 0
+2 14 #text 0 1
+
+1 15 doc 0 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+0 15 a 0 0
diff --git a/result/XInclude/fallback5.xml b/result/XInclude/fallback5.xml
new file mode 100644
index 00000000..0ba503d9
--- /dev/null
+++ b/result/XInclude/fallback5.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0"?>
+<a>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+ <elem/>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+</a>
diff --git a/result/XInclude/fallback5.xml.rdr b/result/XInclude/fallback5.xml.rdr
new file mode 100644
index 00000000..0e1dab71
--- /dev/null
+++ b/result/XInclude/fallback5.xml.rdr
@@ -0,0 +1,116 @@
+0 1 a 0 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+0 15 a 0 0
diff --git a/result/XInclude/fallback6.xml b/result/XInclude/fallback6.xml
new file mode 100644
index 00000000..2b5d4116
--- /dev/null
+++ b/result/XInclude/fallback6.xml
@@ -0,0 +1 @@
+<?xml version="1.0"?>
diff --git a/result/XInclude/fallback6.xml.rdr b/result/XInclude/fallback6.xml.rdr
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/result/XInclude/fallback6.xml.rdr
diff --git a/result/XInclude/ns1.xml b/result/XInclude/ns1.xml
new file mode 100644
index 00000000..ab41fb7a
--- /dev/null
+++ b/result/XInclude/ns1.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<doc xmlns:xi="http://www.w3.org/2001/XInclude">
+ <ns:elem xmlns:ns="urn:foo" xml:id="a"/>
+ <elem xmlns:ns="urn:foo">
+ <ns:elem xml:id="a"/>
+ </elem>
+
+ <ns:elem xmlns:ns="urn:bar"/>
+
+</doc>
diff --git a/result/XInclude/ns1.xml.rdr b/result/XInclude/ns1.xml.rdr
new file mode 100644
index 00000000..f23702f5
--- /dev/null
+++ b/result/XInclude/ns1.xml.rdr
@@ -0,0 +1,23 @@
+0 1 doc 0 0
+1 14 #text 0 1
+
+1 1 ns:elem 1 0
+1 14 #text 0 1
+
+1 1 elem 0 0
+2 14 #text 0 1
+
+2 1 ns:elem 1 0
+2 14 #text 0 1
+
+1 15 elem 0 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+1 1 ns:elem 1 0
+1 14 #text 0 1
+
+1 14 #text 0 1
+
+0 15 doc 0 0
diff --git a/runtest.c b/runtest.c
index 19ed629f..0f178cb0 100644
--- a/runtest.c
+++ b/runtest.c
@@ -2108,16 +2108,16 @@ errParseTest(const char *filename, const char *result, const char *err,
xmlDocDumpMemory(doc, (xmlChar **) &base, &size);
}
res = compareFileMem(result, base, size);
- if (res != 0) {
- fprintf(stderr, "Result for %s failed in %s\n", filename, result);
- return(-1);
- }
}
if (doc != NULL) {
if (base != NULL)
xmlFree((char *)base);
xmlFreeDoc(doc);
}
+ if (res != 0) {
+ fprintf(stderr, "Result for %s failed in %s\n", filename, result);
+ return(-1);
+ }
if (err != NULL) {
res = compareFileMem(err, testErrors, testErrorsSize);
if (res != 0) {
diff --git a/test/XInclude/docs/fallback3.xml b/test/XInclude/docs/fallback3.xml
new file mode 100644
index 00000000..0c8b6c9e
--- /dev/null
+++ b/test/XInclude/docs/fallback3.xml
@@ -0,0 +1,9 @@
+<a>
+ <xi:include href="../ents/something.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
+ <xi:fallback>
+ <xi:include href="c.xml">
+ <xi:fallback>There is no c.xml ... </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+</a>
diff --git a/test/XInclude/docs/fallback4.xml b/test/XInclude/docs/fallback4.xml
new file mode 100644
index 00000000..b500a635
--- /dev/null
+++ b/test/XInclude/docs/fallback4.xml
@@ -0,0 +1,7 @@
+<a>
+ <xi:include href="c.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
+ <xi:fallback>
+ <xi:include href="../ents/something.xml"/>
+ </xi:fallback>
+ </xi:include>
+</a>
diff --git a/test/XInclude/docs/fallback5.xml b/test/XInclude/docs/fallback5.xml
new file mode 100644
index 00000000..d3ad4246
--- /dev/null
+++ b/test/XInclude/docs/fallback5.xml
@@ -0,0 +1,83 @@
+<a>
+ <xi:include href="a01.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a02.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a03.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a04.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a05.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a06.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a07.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a08.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a09.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a10.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a11.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a12.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a13.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a14.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a15.xml">
+ <xi:fallback>
+ <elem/>
+ <xi:include href="a16.xml">
+ <xi:fallback>
+ <elem/>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+ </xi:fallback>
+ </xi:include>
+</a>
+
diff --git a/test/XInclude/docs/fallback6.xml b/test/XInclude/docs/fallback6.xml
new file mode 100644
index 00000000..fd00a03f
--- /dev/null
+++ b/test/XInclude/docs/fallback6.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="b.xml">
+ <xi:fallback><xi:include href="c.xml">
+ <xi:fallback/>
+ </xi:include></xi:fallback>
+</xi:include>
diff --git a/test/XInclude/docs/ns1.xml b/test/XInclude/docs/ns1.xml
new file mode 100644
index 00000000..7523f4a9
--- /dev/null
+++ b/test/XInclude/docs/ns1.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<doc xmlns:xi="http://www.w3.org/2001/XInclude">
+ <xi:include href="#a"/>
+ <elem xmlns:ns="urn:foo">
+ <ns:elem xml:id="a"/>
+ </elem>
+ <xi:include href="b.xml">
+ <xi:fallback xmlns:ns="urn:bar">
+ <ns:elem/>
+ </xi:fallback>
+ </xi:include>
+</doc>
diff --git a/xinclude.c b/xinclude.c
index baeb8dbc..f48e0af5 100644
--- a/xinclude.c
+++ b/xinclude.c
@@ -59,7 +59,7 @@ struct _xmlXIncludeRef {
xmlNodePtr inc; /* the included copy */
int xml; /* xml or txt */
int count; /* how many refs use that specific doc */
- xmlXPathObjectPtr xptr; /* the xpointer if needed */
+ int fallback; /* fallback was loaded */
int emptyFb; /* flag to show fallback empty */
};
@@ -86,10 +86,13 @@ struct _xmlXIncludeCtxt {
xmlChar * base; /* the current xml:base */
void *_private; /* application data */
+
+ unsigned long incTotal; /* total number of processed inclusions */
};
static int
-xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree);
+xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree,
+ int skipRoot);
/************************************************************************
@@ -207,8 +210,6 @@ xmlXIncludeFreeRef(xmlXIncludeRefPtr ref) {
xmlFree(ref->URI);
if (ref->fragment != NULL)
xmlFree(ref->fragment);
- if (ref->xptr != NULL)
- xmlXPathFreeObject(ref->xptr);
xmlFree(ref);
}
@@ -626,8 +627,8 @@ xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) {
xmlXIncludeErr(ctxt, cur, XML_XINCLUDE_RECURSION,
"detected a local recursion with no xpointer in %s\n",
URL);
- if (fragment != NULL)
- xmlFree(fragment);
+ xmlFree(URL);
+ xmlFree(fragment);
return(-1);
}
@@ -639,12 +640,15 @@ xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) {
if (xmlStrEqual(URL, ctxt->urlTab[i])) {
xmlXIncludeErr(ctxt, cur, XML_XINCLUDE_RECURSION,
"detected a recursion in %s\n", URL);
+ xmlFree(URL);
+ xmlFree(fragment);
return(-1);
}
}
}
ref = xmlXIncludeNewRef(ctxt, URL, cur);
+ xmlFree(URL);
if (ref == NULL) {
return(-1);
}
@@ -652,7 +656,6 @@ xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) {
ref->doc = NULL;
ref->xml = xml;
ref->count = 1;
- xmlFree(URL);
return(0);
}
@@ -729,7 +732,9 @@ xmlXIncludeRecurseDoc(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc,
* (bug 132597)
*/
newctxt->parseFlags = ctxt->parseFlags;
- xmlXIncludeDoProcess(newctxt, doc, xmlDocGetRootElement(doc));
+ newctxt->incTotal = ctxt->incTotal;
+ xmlXIncludeDoProcess(newctxt, doc, xmlDocGetRootElement(doc), 0);
+ ctxt->incTotal = newctxt->incTotal;
for (i = 0;i < ctxt->incNr;i++) {
newctxt->incTab[i]->count--;
newctxt->incTab[i] = NULL;
@@ -1459,7 +1464,7 @@ xmlXIncludeLoadDoc(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) {
*/
if ((URL[0] == 0) || (URL[0] == '#') ||
((ctxt->doc != NULL) && (xmlStrEqual(URL, ctxt->doc->URL)))) {
- doc = NULL;
+ doc = ctxt->doc;
goto loaded;
}
@@ -1551,15 +1556,8 @@ loaded:
/*
* Add the top children list as the replacement copy.
*/
- if (doc == NULL)
- {
- /* Hopefully a DTD declaration won't be copied from
- * the same document */
- ctxt->incTab[nr]->inc = xmlCopyNodeList(ctxt->doc->children);
- } else {
- ctxt->incTab[nr]->inc = xmlXIncludeCopyNodeList(ctxt, ctxt->doc,
- doc, doc->children);
- }
+ ctxt->incTab[nr]->inc = xmlXIncludeCopyNodeList(ctxt, ctxt->doc,
+ doc, doc->children);
}
#ifdef LIBXML_XPTR_ENABLED
else {
@@ -1571,12 +1569,7 @@ loaded:
xmlXPathContextPtr xptrctxt;
xmlNodeSetPtr set;
- if (doc == NULL) {
- xptrctxt = xmlXPtrNewContext(ctxt->doc, ctxt->incTab[nr]->ref,
- NULL);
- } else {
- xptrctxt = xmlXPtrNewContext(doc, NULL, NULL);
- }
+ xptrctxt = xmlXPtrNewContext(doc, NULL, NULL);
if (xptrctxt == NULL) {
xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref,
XML_XINCLUDE_XPTR_FAILED,
@@ -1680,14 +1673,9 @@ loaded:
}
}
}
- if (doc == NULL) {
- ctxt->incTab[nr]->xptr = xptr;
- ctxt->incTab[nr]->inc = NULL;
- } else {
- ctxt->incTab[nr]->inc =
- xmlXIncludeCopyXPointer(ctxt, ctxt->doc, doc, xptr);
- xmlXPathFreeObject(xptr);
- }
+ ctxt->incTab[nr]->inc =
+ xmlXIncludeCopyXPointer(ctxt, ctxt->doc, doc, xptr);
+ xmlXPathFreeObject(xptr);
xmlXPathFreeContext(xptrctxt);
xmlFree(fragment);
}
@@ -1990,19 +1978,23 @@ xmlXIncludeLoadFallback(xmlXIncludeCtxtPtr ctxt, xmlNodePtr fallback, int nr) {
newctxt->_private = ctxt->_private;
newctxt->base = xmlStrdup(ctxt->base); /* Inherit the base from the existing context */
xmlXIncludeSetFlags(newctxt, ctxt->parseFlags);
- ret = xmlXIncludeDoProcess(newctxt, ctxt->doc, fallback->children);
+ newctxt->incTotal = ctxt->incTotal;
+ if (xmlXIncludeDoProcess(newctxt, ctxt->doc, fallback, 1) < 0)
+ ret = -1;
+ ctxt->incTotal = newctxt->incTotal;
if (ctxt->nbErrors > oldNbErrors)
ret = -1;
- else if (ret > 0)
- ret = 0; /* xmlXIncludeDoProcess can return +ve number */
xmlXIncludeFreeContext(newctxt);
ctxt->incTab[nr]->inc = xmlDocCopyNodeList(ctxt->doc,
fallback->children);
+ if (ctxt->incTab[nr]->inc == NULL)
+ ctxt->incTab[nr]->emptyFb = 1;
} else {
ctxt->incTab[nr]->inc = NULL;
ctxt->incTab[nr]->emptyFb = 1; /* flag empty callback */
}
+ ctxt->incTab[nr]->fallback = 1;
return(ret);
}
@@ -2158,8 +2150,7 @@ xmlXIncludeLoadNode(xmlXIncludeCtxtPtr ctxt, int nr) {
((xmlStrEqual(children->ns->href, XINCLUDE_NS)) ||
(xmlStrEqual(children->ns->href, XINCLUDE_OLD_NS)))) {
ret = xmlXIncludeLoadFallback(ctxt, children, nr);
- if (ret == 0)
- break;
+ break;
}
children = children->next;
}
@@ -2206,19 +2197,9 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) {
if ((cur == NULL) || (cur->type == XML_NAMESPACE_DECL))
return(-1);
- /*
- * If we stored an XPointer a late computation may be needed
- */
- if ((ctxt->incTab[nr]->inc == NULL) &&
- (ctxt->incTab[nr]->xptr != NULL)) {
- ctxt->incTab[nr]->inc =
- xmlXIncludeCopyXPointer(ctxt, ctxt->doc, ctxt->doc,
- ctxt->incTab[nr]->xptr);
- xmlXPathFreeObject(ctxt->incTab[nr]->xptr);
- ctxt->incTab[nr]->xptr = NULL;
- }
list = ctxt->incTab[nr]->inc;
ctxt->incTab[nr]->inc = NULL;
+ ctxt->incTab[nr]->emptyFb = 0;
/*
* Check against the risk of generating a multi-rooted document
@@ -2238,6 +2219,7 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) {
XML_XINCLUDE_MULTIPLE_ROOT,
"XInclude error: would result in multiple root nodes\n",
NULL);
+ xmlFreeNodeList(list);
return(-1);
}
}
@@ -2255,16 +2237,27 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) {
xmlUnlinkNode(cur);
xmlFreeNode(cur);
} else {
+ xmlNodePtr child, next;
+
/*
* Change the current node as an XInclude start one, and add an
* XInclude end one
*/
+ if (ctxt->incTab[nr]->fallback)
+ xmlUnsetProp(cur, BAD_CAST "href");
cur->type = XML_XINCLUDE_START;
+ /* Remove fallback children */
+ for (child = cur->children; child != NULL; child = next) {
+ next = child->next;
+ xmlUnlinkNode(child);
+ xmlFreeNode(child);
+ }
end = xmlNewDocNode(cur->doc, cur->ns, cur->name, NULL);
if (end == NULL) {
xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref,
XML_XINCLUDE_BUILD_FAILED,
"failed to build node\n", NULL);
+ xmlFreeNodeList(list);
return(-1);
}
end->type = XML_XINCLUDE_END;
@@ -2366,6 +2359,7 @@ xmlXIncludeTestNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) {
* @ctxt: the XInclude processing context
* @doc: an XML document
* @tree: the top of the tree to process
+ * @skipRoot: don't process the root node of the tree
*
* Implement the XInclude substitution on the XML document @doc
*
@@ -2373,13 +2367,16 @@ xmlXIncludeTestNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) {
* or the number of substitutions done.
*/
static int
-xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) {
+xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree,
+ int skipRoot) {
xmlNodePtr cur;
int ret = 0;
int i, start;
if ((doc == NULL) || (tree == NULL) || (tree->type == XML_NAMESPACE_DECL))
return(-1);
+ if ((skipRoot) && (tree->children == NULL))
+ return(-1);
if (ctxt == NULL)
return(-1);
@@ -2391,40 +2388,59 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) {
start = ctxt->incNr;
/*
+ * TODO: The phases must run separately for recursive inclusions.
+ *
+ * - Phase 1 should start with top-level XInclude nodes, load documents,
+ * execute XPointer expressions, then process only the result nodes
+ * (not whole document, see bug #324081) and only for phase 1
+ * recursively. We will need a backreference from xmlNodes to
+ * xmlIncludeRefs to detect references that were already visited.
+ * This can also be used for proper cycle detection, see bug #344240.
+ *
+ * - Phase 2 should visit all top-level XInclude nodes and expand
+ * possible subreferences in the replacement recursively.
+ *
+ * - Phase 3 should finally replace the top-level XInclude nodes.
+ * It could also be run together with phase 2.
+ */
+
+ /*
* First phase: lookup the elements in the document
*/
- cur = tree;
- if (xmlXIncludeTestNode(ctxt, cur) == 1)
- xmlXIncludePreProcessNode(ctxt, cur);
- while ((cur != NULL) && (cur != tree->parent)) {
+ if (skipRoot)
+ cur = tree->children;
+ else
+ cur = tree;
+ do {
/* TODO: need to work on entities -> stack */
- if ((cur->children != NULL) &&
- (cur->children->type != XML_ENTITY_DECL) &&
- (cur->children->type != XML_XINCLUDE_START) &&
- (cur->children->type != XML_XINCLUDE_END)) {
- cur = cur->children;
- if (xmlXIncludeTestNode(ctxt, cur))
- xmlXIncludePreProcessNode(ctxt, cur);
- } else if (cur->next != NULL) {
- cur = cur->next;
- if (xmlXIncludeTestNode(ctxt, cur))
- xmlXIncludePreProcessNode(ctxt, cur);
- } else {
- if (cur == tree)
- break;
- do {
- cur = cur->parent;
- if ((cur == NULL) || (cur == tree->parent))
- break; /* do */
- if (cur->next != NULL) {
- cur = cur->next;
- if (xmlXIncludeTestNode(ctxt, cur))
- xmlXIncludePreProcessNode(ctxt, cur);
- break; /* do */
- }
- } while (cur != NULL);
- }
- }
+ if (xmlXIncludeTestNode(ctxt, cur) == 1) {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ /*
+ * Avoid superlinear expansion by limiting the total number
+ * of replacements.
+ */
+ if (ctxt->incTotal >= 20)
+ return(-1);
+#endif
+ ctxt->incTotal++;
+ xmlXIncludePreProcessNode(ctxt, cur);
+ } else if ((cur->children != NULL) &&
+ (cur->children->type != XML_ENTITY_DECL) &&
+ (cur->children->type != XML_XINCLUDE_START) &&
+ (cur->children->type != XML_XINCLUDE_END)) {
+ cur = cur->children;
+ continue;
+ }
+ do {
+ if (cur == tree)
+ break;
+ if (cur->next != NULL) {
+ cur = cur->next;
+ break;
+ }
+ cur = cur->parent;
+ } while (cur != NULL);
+ } while ((cur != NULL) && (cur != tree));
/*
* Second Phase : collect the infosets fragments
@@ -2447,8 +2463,7 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) {
*/
for (i = ctxt->incBase;i < ctxt->incNr; i++) {
if ((ctxt->incTab[i]->inc != NULL) ||
- (ctxt->incTab[i]->xptr != NULL) ||
- (ctxt->incTab[i]->emptyFb != 0)) /* (empty fallback) */
+ (ctxt->incTab[i]->emptyFb != 0)) /* (empty fallback) */
xmlXIncludeIncludeNode(ctxt, i);
}
@@ -2502,7 +2517,7 @@ xmlXIncludeProcessTreeFlagsData(xmlNodePtr tree, int flags, void *data) {
ctxt->_private = data;
ctxt->base = xmlStrdup((xmlChar *)tree->doc->URL);
xmlXIncludeSetFlags(ctxt, flags);
- ret = xmlXIncludeDoProcess(ctxt, tree->doc, tree);
+ ret = xmlXIncludeDoProcess(ctxt, tree->doc, tree, 0);
if ((ret >= 0) && (ctxt->nbErrors > 0))
ret = -1;
@@ -2586,7 +2601,7 @@ xmlXIncludeProcessTreeFlags(xmlNodePtr tree, int flags) {
return(-1);
ctxt->base = xmlNodeGetBase(tree->doc, tree);
xmlXIncludeSetFlags(ctxt, flags);
- ret = xmlXIncludeDoProcess(ctxt, tree->doc, tree);
+ ret = xmlXIncludeDoProcess(ctxt, tree->doc, tree, 0);
if ((ret >= 0) && (ctxt->nbErrors > 0))
ret = -1;
@@ -2626,7 +2641,7 @@ xmlXIncludeProcessNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) {
if ((node == NULL) || (node->type == XML_NAMESPACE_DECL) ||
(node->doc == NULL) || (ctxt == NULL))
return(-1);
- ret = xmlXIncludeDoProcess(ctxt, node->doc, node);
+ ret = xmlXIncludeDoProcess(ctxt, node->doc, node, 0);
if ((ret >= 0) && (ctxt->nbErrors > 0))
ret = -1;
return(ret);
diff --git a/xmllint.c b/xmllint.c
index f6a8e463..4469bf0a 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -528,6 +528,12 @@ static void
xmlHTMLEncodeSend(void) {
char *result;
+ /*
+ * xmlEncodeEntitiesReentrant assumes valid UTF-8, but the buffer might
+ * end with a truncated UTF-8 sequence. This is a hack to at least avoid
+ * an out-of-bounds read.
+ */
+ memset(&buffer[sizeof(buffer)-4], 0, 4);
result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
if (result) {
xmlGenericError(xmlGenericErrorContext, "%s", result);
@@ -2281,7 +2287,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) {
doc = ctxt->myDoc;
ret = ctxt->wellFormed;
xmlFreeParserCtxt(ctxt);
- if (!ret) {
+ if ((!ret) && (!recovery)) {
xmlFreeDoc(doc);
doc = NULL;
}
diff --git a/xmlreader.c b/xmlreader.c
index 6ae6e922..a9b9ef93 100644
--- a/xmlreader.c
+++ b/xmlreader.c
@@ -48,6 +48,13 @@
#define MAX_ERR_MSG_SIZE 64000
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+/* Keeping free objects can hide memory errors. */
+#define MAX_FREE_NODES 1
+#else
+#define MAX_FREE_NODES 100
+#endif
+
/*
* The following VA_COPY was coded following an example in
* the Samba project. It may not be sufficient for some
@@ -365,7 +372,7 @@ xmlTextReaderFreeProp(xmlTextReaderPtr reader, xmlAttrPtr cur) {
DICT_FREE(cur->name);
if ((reader != NULL) && (reader->ctxt != NULL) &&
- (reader->ctxt->freeAttrsNr < 100)) {
+ (reader->ctxt->freeAttrsNr < MAX_FREE_NODES)) {
cur->next = reader->ctxt->freeAttrs;
reader->ctxt->freeAttrs = cur;
reader->ctxt->freeAttrsNr++;
@@ -466,7 +473,7 @@ xmlTextReaderFreeNodeList(xmlTextReaderPtr reader, xmlNodePtr cur) {
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_TEXT_NODE)) &&
(reader != NULL) && (reader->ctxt != NULL) &&
- (reader->ctxt->freeElemsNr < 100)) {
+ (reader->ctxt->freeElemsNr < MAX_FREE_NODES)) {
cur->next = reader->ctxt->freeElems;
reader->ctxt->freeElems = cur;
reader->ctxt->freeElemsNr++;
@@ -554,7 +561,7 @@ xmlTextReaderFreeNode(xmlTextReaderPtr reader, xmlNodePtr cur) {
if (((cur->type == XML_ELEMENT_NODE) ||
(cur->type == XML_TEXT_NODE)) &&
(reader != NULL) && (reader->ctxt != NULL) &&
- (reader->ctxt->freeElemsNr < 100)) {
+ (reader->ctxt->freeElemsNr < MAX_FREE_NODES)) {
cur->next = reader->ctxt->freeElems;
reader->ctxt->freeElems = cur;
reader->ctxt->freeElemsNr++;
@@ -1491,6 +1498,8 @@ get_next_node:
(reader->node->prev->type != XML_DTD_NODE)) {
xmlNodePtr tmp = reader->node->prev;
if ((tmp->extra & NODE_IS_PRESERVED) == 0) {
+ if (oldnode == tmp)
+ oldnode = NULL;
xmlUnlinkNode(tmp);
xmlTextReaderFreeNode(reader, tmp);
}
diff --git a/xmlsave.c b/xmlsave.c
index f1d40b9b..2225628d 100644
--- a/xmlsave.c
+++ b/xmlsave.c
@@ -1049,7 +1049,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
while (1) {
if (cur == root)
return;
- if (ctxt->format == 1)
+ if ((ctxt->format == 1) &&
+ (cur->type != XML_XINCLUDE_START) &&
+ (cur->type != XML_XINCLUDE_END))
xmlOutputBufferWrite(buf, 1, "\n");
if (cur->next != NULL) {
cur = cur->next;
@@ -1224,7 +1226,9 @@ xmlDocContentDumpOutput(xmlSaveCtxtPtr ctxt, xmlDocPtr cur) {
else
#endif
xmlNodeDumpOutputInternal(ctxt, child);
- xmlOutputBufferWrite(buf, 1, "\n");
+ if ((child->type != XML_XINCLUDE_START) &&
+ (child->type != XML_XINCLUDE_END))
+ xmlOutputBufferWrite(buf, 1, "\n");
child = child->next;
}
}
diff --git a/xmlschemastypes.c b/xmlschemastypes.c
index 4249d700..d6b9f924 100644
--- a/xmlschemastypes.c
+++ b/xmlschemastypes.c
@@ -3691,6 +3691,8 @@ xmlSchemaCompareDurations(xmlSchemaValPtr x, xmlSchemaValPtr y)
minday = 0;
maxday = 0;
} else {
+ if (myear > LONG_MAX / 366)
+ return -2;
/* FIXME: This doesn't take leap year exceptions every 100/400 years
into account. */
maxday = 365 * myear + (myear + 3) / 4;
@@ -4079,6 +4081,14 @@ xmlSchemaCompareDates (xmlSchemaValPtr x, xmlSchemaValPtr y)
if ((x == NULL) || (y == NULL))
return -2;
+ if ((x->value.date.year > LONG_MAX / 366) ||
+ (x->value.date.year < LONG_MIN / 366) ||
+ (y->value.date.year > LONG_MAX / 366) ||
+ (y->value.date.year < LONG_MIN / 366)) {
+ /* Possible overflow when converting to days. */
+ return -2;
+ }
+
if (x->value.date.tz_flag) {
if (!y->value.date.tz_flag) {
diff --git a/xmlstring.c b/xmlstring.c
index 8d2e06f6..e8a1e45d 100644
--- a/xmlstring.c
+++ b/xmlstring.c
@@ -130,16 +130,18 @@ xmlCharStrdup(const char *cur) {
int
xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
- register int tmp;
-
if (str1 == str2) return(0);
if (str1 == NULL) return(-1);
if (str2 == NULL) return(1);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return(strcmp((const char *)str1, (const char *)str2));
+#else
do {
- tmp = *str1++ - *str2;
+ int tmp = *str1++ - *str2;
if (tmp != 0) return(tmp);
} while (*str2++ != 0);
return 0;
+#endif
}
/**
@@ -158,10 +160,14 @@ xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
if (str1 == str2) return(1);
if (str1 == NULL) return(0);
if (str2 == NULL) return(0);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return(strcmp((const char *)str1, (const char *)str2) == 0);
+#else
do {
if (*str1++ != *str2) return(0);
} while (*str2++);
return(1);
+#endif
}
/**
@@ -204,18 +210,15 @@ xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
int
xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
- register int tmp;
-
if (len <= 0) return(0);
if (str1 == str2) return(0);
if (str1 == NULL) return(-1);
if (str2 == NULL) return(1);
-#ifdef __GNUC__
- tmp = strncmp((const char *)str1, (const char *)str2, len);
- return tmp;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return(strncmp((const char *)str1, (const char *)str2, len));
#else
do {
- tmp = *str1++ - *str2;
+ int tmp = *str1++ - *str2;
if (tmp != 0 || --len == 0) return(tmp);
} while (*str2++ != 0);
return 0;
diff --git a/xpath.c b/xpath.c
index 673482a7..2850a1ac 100644
--- a/xpath.c
+++ b/xpath.c
@@ -136,6 +136,17 @@
#define XPATH_MAX_NODESET_LENGTH 10000000
/*
+ * XPATH_MAX_RECRUSION_DEPTH:
+ * Maximum amount of nested functions calls when parsing or evaluating
+ * expressions
+ */
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#define XPATH_MAX_RECURSION_DEPTH 500
+#else
+#define XPATH_MAX_RECURSION_DEPTH 5000
+#endif
+
+/*
* TODO:
* There are a few spots where some tests are done which depend upon ascii
* data. These should be enhanced for full UTF8 support (see particularly
@@ -6118,9 +6129,6 @@ xmlXPathNewContext(xmlDocPtr doc) {
ret->contextSize = -1;
ret->proximityPosition = -1;
- ret->maxDepth = INT_MAX;
- ret->maxParserDepth = INT_MAX;
-
#ifdef XP_DEFAULT_CACHE_ON
if (xmlXPathContextSetCache(ret, 1, -1, 0) == -1) {
xmlXPathFreeContext(ret);
@@ -10948,9 +10956,13 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) {
xmlXPathContextPtr xpctxt = ctxt->context;
if (xpctxt != NULL) {
- if (xpctxt->depth >= xpctxt->maxParserDepth)
+ if (xpctxt->depth >= XPATH_MAX_RECURSION_DEPTH)
XP_ERROR(XPATH_RECURSION_LIMIT_EXCEEDED);
- xpctxt->depth += 1;
+ /*
+ * Parsing a single '(' pushes about 10 functions on the call stack
+ * before recursing!
+ */
+ xpctxt->depth += 10;
}
xmlXPathCompAndExpr(ctxt);
@@ -11880,7 +11892,7 @@ xmlXPathCompOpEvalPredicate(xmlXPathParserContextPtr ctxt,
"xmlXPathCompOpEvalPredicate: Expected a predicate\n");
XP_ERROR(XPATH_INVALID_OPERAND);
}
- if (ctxt->context->depth >= ctxt->context->maxDepth)
+ if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH)
XP_ERROR(XPATH_RECURSION_LIMIT_EXCEEDED);
ctxt->context->depth += 1;
xmlXPathCompOpEvalPredicate(ctxt, &comp->steps[op->ch1], set,
@@ -12596,7 +12608,7 @@ xmlXPathCompOpEvalFirst(xmlXPathParserContextPtr ctxt,
CHECK_ERROR0;
if (OP_LIMIT_EXCEEDED(ctxt, 1))
return(0);
- if (ctxt->context->depth >= ctxt->context->maxDepth)
+ if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH)
XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED);
ctxt->context->depth += 1;
comp = ctxt->comp;
@@ -12737,7 +12749,7 @@ xmlXPathCompOpEvalLast(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op,
CHECK_ERROR0;
if (OP_LIMIT_EXCEEDED(ctxt, 1))
return(0);
- if (ctxt->context->depth >= ctxt->context->maxDepth)
+ if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH)
XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED);
ctxt->context->depth += 1;
comp = ctxt->comp;
@@ -12955,7 +12967,7 @@ xmlXPathCompOpEval(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op)
CHECK_ERROR0;
if (OP_LIMIT_EXCEEDED(ctxt, 1))
return(0);
- if (ctxt->context->depth >= ctxt->context->maxDepth)
+ if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH)
XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED);
ctxt->context->depth += 1;
comp = ctxt->comp;
@@ -14189,7 +14201,7 @@ xmlXPathOptimizeExpression(xmlXPathParserContextPtr pctxt,
/* Recurse */
ctxt = pctxt->context;
if (ctxt != NULL) {
- if (ctxt->depth >= ctxt->maxDepth)
+ if (ctxt->depth >= XPATH_MAX_RECURSION_DEPTH)
return;
ctxt->depth += 1;
}