diff options
46 files changed, 1399 insertions, 455 deletions
diff --git a/HTMLparser.c b/HTMLparser.c index b9812985..de624f8d 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -296,7 +296,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt) #define UPPER (toupper(*ctxt->input->cur)) -#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val) +#define SKIP(val) ctxt->input->cur += (val),ctxt->input->col+=(val) #define NXT(val) ctxt->input->cur[(val)] @@ -330,7 +330,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt) if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ - ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ + ctxt->token = 0; ctxt->input->cur += l; \ } while (0) /************ @@ -597,7 +597,6 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) { ctxt->input->line++; ctxt->input->col = 1; } else ctxt->input->col++; ctxt->input->cur++; - ctxt->nbChars++; if (*ctxt->input->cur == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); } @@ -2495,7 +2494,6 @@ htmlParseName(htmlParserCtxtPtr ctxt) { count = in - ctxt->input->cur; ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; - ctxt->nbChars += count; ctxt->input->col += count; return(ret); } @@ -3852,8 +3850,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { while ((CUR != 0) && (CUR != '>') && ((CUR != '/') || (NXT(1) != '>'))) { - long cons = ctxt->nbChars; - GROW; attname = htmlParseAttribute(ctxt, &attvalue); if (attname != NULL) { @@ -3920,12 +3916,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { failed: SKIP_BLANKS; - if (cons == ctxt->nbChars) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseStartTag: problem parsing attributes\n", - NULL, NULL); - break; - } } /* @@ -4161,8 +4151,6 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { currentNode = xmlStrdup(ctxt->name); depth = ctxt->nameNr; while (1) { - long cons = ctxt->nbChars; - GROW; if (ctxt->instate == XML_PARSER_EOF) @@ -4282,15 +4270,6 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { else { htmlParseCharData(ctxt); } - - if (cons == ctxt->nbChars) { - if (ctxt->node != NULL) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "detected an error in element content\n", - NULL, NULL); - } - break; - } } GROW; } @@ -4561,8 +4540,6 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { currentNode = xmlStrdup(ctxt->name); depth = ctxt->nameNr; while (1) { - long cons = ctxt->nbChars; - GROW; if (ctxt->instate == XML_PARSER_EOF) @@ -4696,15 +4673,6 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { else { htmlParseCharData(ctxt); } - - if (cons == ctxt->nbChars) { - if (ctxt->node != NULL) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "detected an error in element content\n", - NULL, NULL); - } - break; - } } GROW; } @@ -4968,7 +4936,6 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) ctxt->vctxt.warning = xmlParserValidityWarning; ctxt->record_info = 0; ctxt->validate = 0; - ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->catalogs = NULL; xmlInitNodeInfoSeq(&ctxt->node_seq); @@ -5702,7 +5669,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { } case XML_PARSER_CONTENT: { xmlChar chr[2] = { 0, 0 }; - long cons; /* * Handle preparsed entities and charRef @@ -5747,7 +5713,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { goto done; cur = in->cur[0]; next = in->cur[1]; - cons = ctxt->nbChars; if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { /* @@ -5877,15 +5842,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { } } } - if (cons == ctxt->nbChars) { - if (ctxt->node != NULL) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "detected an error in element content\n", - NULL, NULL); - } - NEXT; - break; - } break; } @@ -6622,7 +6578,6 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt) ctxt->vctxt.error = xmlParserValidityError; ctxt->vctxt.warning = xmlParserValidityWarning; ctxt->record_info = 0; - ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->inSubset = 0; ctxt->errNo = XML_ERR_OK; @@ -706,49 +706,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { + xmlChar *escaped; xmlChar *tmp = value; - /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ - xmlBufCCat(buf->buffer, "\""); while (IS_BLANK_CH(*tmp)) tmp++; - /* URI Escape everything, except server side includes. */ - for ( ; ; ) { - xmlChar *escaped; - xmlChar endChar; - xmlChar *end = NULL; - xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--"); - if (start != NULL) { - end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->"); - if (end != NULL) { - *start = '\0'; - } - } - - /* Escape the whole string, or until start (set to '\0'). */ - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); - if (escaped != NULL) { - xmlBufCat(buf->buffer, escaped); - xmlFree(escaped); - } else { - xmlBufCat(buf->buffer, tmp); - } - - if (end == NULL) { /* Everything has been written. */ - break; - } - - /* Do not escape anything within server side includes. */ - *start = '<'; /* Restore the first character of "<!--". */ - end += 3; /* strlen("-->") */ - endChar = *end; - *end = '\0'; - xmlBufCat(buf->buffer, start); - *end = endChar; - tmp = end; + /* + * the < and > have already been escaped at the entity level + * And doing so here breaks server side includes + */ + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>"); + if (escaped != NULL) { + xmlBufWriteQuotedString(buf->buffer, escaped); + xmlFree(escaped); + } else { + xmlBufWriteQuotedString(buf->buffer, value); } - - xmlBufCCat(buf->buffer, "\""); } else { xmlBufWriteQuotedString(buf->buffer, value); } @@ -7,13 +7,13 @@ third_party { } url { type: ARCHIVE - value: "https://github.com/GNOME/libxml2/archive/905820a44c0c895c02124ecacff735794509f4fe.zip" + value: "https://github.com/GNOME/libxml2/archive/f0fd1b67fc883a24cdd039abb3d4fe4696104d72.zip" } - version: "905820a44c0c895c02124ecacff735794509f4fe" + version: "f0fd1b67fc883a24cdd039abb3d4fe4696104d72" license_type: NOTICE last_upgrade_date { year: 2020 - month: 7 - day: 31 + month: 9 + day: 1 } } diff --git a/configure.ac b/configure.ac index 32c47ac8..a4c675bb 100644 --- a/configure.ac +++ b/configure.ac @@ -1,15 +1,20 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ([2.63]) -AC_INIT + +m4_define([MAJOR_VERSION], 2) +m4_define([MINOR_VERSION], 9) +m4_define([MICRO_VERSION], 10) + +AC_INIT([libxml2],[MAJOR_VERSION.MINOR_VERSION.MICRO_VERSION]) AC_CONFIG_SRCDIR([entities.c]) AC_CONFIG_HEADERS([config.h]) AM_MAINTAINER_MODE([enable]) AC_CONFIG_MACRO_DIR([m4]) AC_CANONICAL_HOST -LIBXML_MAJOR_VERSION=2 -LIBXML_MINOR_VERSION=9 -LIBXML_MICRO_VERSION=10 +LIBXML_MAJOR_VERSION=MAJOR_VERSION +LIBXML_MINOR_VERSION=MINOR_VERSION +LIBXML_MICRO_VERSION=MICRO_VERSION LIBXML_MICRO_VERSION_SUFFIX= LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION$LIBXML_MICRO_VERSION_SUFFIX LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION @@ -50,7 +55,7 @@ AC_SUBST(LIBXML_VERSION_EXTRA) VERSION=${LIBXML_VERSION} -AM_INIT_AUTOMAKE(libxml2, $VERSION) +AM_INIT_AUTOMAKE([foreign]) # Support silent build rules, requires at least automake-1.11. Disable # by either passing --disable-silent-rules to configure or passing V=1 @@ -1514,8 +1519,8 @@ else AC_CHECK_HEADER(unicode/ucnv.h, AC_MSG_CHECKING(for icu) - AC_TRY_LINK([#include <unicode/ucnv.h>],[ - UConverter *utf = ucnv_open("UTF-8", NULL);],[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <unicode/ucnv.h>]], [[ + UConverter *utf = ucnv_open("UTF-8", NULL);]])],[ AC_MSG_RESULT(yes) have_libicu=yes],[ AC_MSG_RESULT(no) @@ -1526,8 +1531,8 @@ else LDFLAGS="${LDFLAGS} ${ICU_LIBS}" LIBS="${LIBS} -licucore" - AC_TRY_LINK([#include <unicode/ucnv.h>],[ - UConverter *utf = ucnv_open("UTF-8", NULL);],[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <unicode/ucnv.h>]], [[ + UConverter *utf = ucnv_open("UTF-8", NULL);]])],[ AC_MSG_RESULT(yes) have_libicu=yes ICU_LIBS="${ICU_LIBS} -licucore" @@ -557,6 +557,7 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel, * of the usual "base" (doc->URL) for the node (bug 152623). */ xmlNodePtr prev = baseptr; + char *href = NULL; int inclcount = 0; while (prev != NULL) { if (prev->prev == NULL) @@ -564,21 +565,20 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel, else { prev = prev->prev; if (prev->type == XML_XINCLUDE_START) { - if (--inclcount < 0) - break; + if (inclcount > 0) { + --inclcount; + } else { + href = (char *) xmlGetProp(prev, BAD_CAST "href"); + if (href != NULL) + break; + } } else if (prev->type == XML_XINCLUDE_END) inclcount++; } } - if (prev != NULL) { - if (prev->type == XML_XINCLUDE_START) { - prev->type = XML_ELEMENT_NODE; - to->file = (char *) xmlGetProp(prev, BAD_CAST "href"); - prev->type = XML_XINCLUDE_START; - } else { - to->file = (char *) xmlGetProp(prev, BAD_CAST "href"); - } - } else + if (href != NULL) + to->file = href; + else #endif to->file = (char *) xmlStrdup(baseptr->doc->URL); if ((to->file == NULL) && (node != NULL) && (node->doc != NULL)) { diff --git a/fuzz/.gitignore b/fuzz/.gitignore index eecb4aea..02c74b11 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,13 +1,13 @@ corpus/ +genSeed html -htmlSeed regexp schema -schemaSeed seed/html* -seed/xml* seed/schema* +seed/xml* +seed/xpath* testFuzzer uri xml -xmlSeed +xpath diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am index 9a1225db..49b95541 100644 --- a/fuzz/Makefile.am +++ b/fuzz/Makefile.am @@ -1,4 +1,5 @@ -EXTRA_PROGRAMS = html htmlSeed regexp uri schema schemaSeed xml xmlSeed +AUTOMAKE_OPTIONS = -Wno-syntax +EXTRA_PROGRAMS = genSeed html regexp schema uri xml xpath check_PROGRAMS = testFuzzer CLEANFILES = $(EXTRA_PROGRAMS) AM_CPPFLAGS = -I$(top_srcdir)/include @@ -6,41 +7,43 @@ DEPENDENCIES = $(top_builddir)/libxml2.la LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) $(M_LIBS) $(WIN32_EXTRA_LIBADD) XML_MAX_LEN = 80000 +# Single quotes to avoid wildcard expansion by the shell XML_SEED_CORPUS_SRC = \ - $(top_srcdir)/test/* \ - $(top_srcdir)/test/errors/*.xml \ - $(top_srcdir)/test/errors10/*.xml \ - $(top_srcdir)/test/namespaces/* \ - $(top_srcdir)/test/valid/*.xml \ - $(top_srcdir)/test/xmlid/* \ - $(top_srcdir)/test/VC/* \ - $(top_srcdir)/test/VCM/* + '$(top_srcdir)/test/*' \ + '$(top_srcdir)/test/errors/*.xml' \ + '$(top_srcdir)/test/errors10/*.xml' \ + '$(top_srcdir)/test/namespaces/*' \ + '$(top_srcdir)/test/valid/*.xml' \ + '$(top_srcdir)/test/VC/*' \ + '$(top_srcdir)/test/VCM/*' \ + '$(top_srcdir)/test/XInclude/docs/*' \ + '$(top_srcdir)/test/xmlid/*' testFuzzer_SOURCES = testFuzzer.c fuzz.c -tests: testFuzzer$(EXEEXT) +.PHONY: tests corpus clean-corpus + +corpus: seed/html.stamp seed/schema.stamp seed/xml.stamp seed/xpath.stamp + +tests: testFuzzer$(EXEEXT) corpus @echo "## Running fuzzer tests" @./testFuzzer$(EXEEXT) -# XML fuzzer +clean-corpus: + rm -rf seed/html.stamp seed/html + rm -rf seed/schema.stamp seed/schema + rm -rf seed/xml.stamp seed/xml + rm -rf seed/xpath.stamp seed/xpath + +# Seed corpus -xmlSeed_SOURCES = xmlSeed.c fuzz.c +genSeed_SOURCES = genSeed.c fuzz.c -seed/xml.stamp: xmlSeed$(EXEEXT) +# XML fuzzer + +seed/xml.stamp: genSeed$(EXEEXT) @mkdir -p seed/xml - @for i in $(XML_SEED_CORPUS_SRC); do \ - if [ -f $$i ]; then \ - echo Processing seed $$i; \ - base=$$(basename $$i) \ - outfile=$(abs_builddir)/seed/xml/$$base; \ - pushd $$(dirname $$i) >/dev/null; \ - $(abs_builddir)/xmlSeed$(EXEEXT) $$base > $$outfile; \ - popd >/dev/null; \ - if [ "$$(wc -c < $$outfile)" -gt $(XML_MAX_LEN) ]; then \ - rm $$outfile; \ - fi; \ - fi; \ - done + @./genSeed$(EXEEXT) xml $(XML_SEED_CORPUS_SRC) @touch seed/xml.stamp xml_SOURCES = xml.c fuzz.c @@ -56,16 +59,9 @@ fuzz-xml: xml$(EXEEXT) seed/xml.stamp # HTML fuzzer -htmlSeed_SOURCES = htmlSeed.c fuzz.c - -seed/html.stamp: htmlSeed$(EXEEXT) +seed/html.stamp: genSeed$(EXEEXT) @mkdir -p seed/html - @for i in $(top_srcdir)/test/HTML/*; do \ - if [ -f $$i ]; then \ - echo Processing seed $$i; \ - ./htmlSeed$(EXEEXT) $$i > seed/html/$$(basename $$i); \ - fi; \ - done + @./genSeed$(EXEEXT) html '$(top_srcdir)/test/HTML/*' @touch seed/html.stamp html_SOURCES = html.c fuzz.c @@ -106,20 +102,9 @@ fuzz-uri: uri$(EXEEXT) # XML Schema fuzzer -schemaSeed_SOURCES = schemaSeed.c fuzz.c - -seed/schema.stamp: schemaSeed$(EXEEXT) +seed/schema.stamp: genSeed$(EXEEXT) @mkdir -p seed/schema - @for i in ../test/schemas/*.xsd; do \ - if [ -f $$i ]; then \ - echo Processing seed $$i; \ - base=$$(basename $$i) \ - outfile=$(abs_builddir)/seed/schema/$$base; \ - pushd $$(dirname $$i) >/dev/null; \ - $(abs_builddir)/schemaSeed$(EXEEXT) $$base > $$outfile; \ - popd >/dev/null; \ - fi; \ - done + @./genSeed$(EXEEXT) schema '$(top_srcdir)/test/schemas/*.xsd' @touch seed/schema.stamp schema_SOURCES = schema.c fuzz.c @@ -133,3 +118,21 @@ fuzz-schema: schema$(EXEEXT) seed/schema.stamp -timeout=20 \ corpus/schema seed/schema +# XPath fuzzer + +seed/xpath.stamp: genSeed$(EXEEXT) + @mkdir -p seed/xpath + @./genSeed$(EXEEXT) xpath "$(top_builddir)/test/XPath" + @touch seed/xpath.stamp + +xpath_SOURCES = xpath.c fuzz.c +xpath_LDFLAGS = -fsanitize=fuzzer + +fuzz-xpath: xpath$(EXEEXT) seed/xpath.stamp + @mkdir -p corpus/xpath + ./xpath$(EXEEXT) \ + -dict=xpath.dict \ + -max_len=10000 \ + -timeout=20 \ + corpus/xpath seed/xpath + diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c index ba7c9cad..b5dfa185 100644 --- a/fuzz/fuzz.c +++ b/fuzz/fuzz.c @@ -4,8 +4,11 @@ * See Copyright for the status of this software. */ +#include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/stat.h> + #include <libxml/hash.h> #include <libxml/parser.h> #include <libxml/parserInternals.h> @@ -69,11 +72,6 @@ xmlFuzzDataInit(const char *data, size_t size) { fuzzData.mainEntity = NULL; } -static void -xmlFreeEntityEntry(void *value, const xmlChar *name) { - xmlFree(value); -} - /** * xmlFuzzDataFree: * @@ -82,7 +80,7 @@ xmlFreeEntityEntry(void *value, const xmlChar *name) { void xmlFuzzDataCleanup(void) { xmlFree(fuzzData.outBuf); - xmlHashFree(fuzzData.entities, xmlFreeEntityEntry); + xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator); } /** @@ -122,20 +120,24 @@ xmlFuzzReadRemaining(size_t *size) { } /* - * Write a random-length string to stdout in a format similar to + * xmlFuzzWriteString: + * @out: output file + * @str: string to write + * + * Write a random-length string to file in a format similar to * FuzzedDataProvider. Backslash followed by newline marks the end of the * string. Two backslashes are used to escape a backslash. */ -static void -xmlFuzzWriteString(const char *str) { +void +xmlFuzzWriteString(FILE *out, const char *str) { for (; *str; str++) { int c = (unsigned char) *str; - putchar(c); + putc(c, out); if (c == '\\') - putchar(c); + putc(c, out); } - putchar('\\'); - putchar('\n'); + putc('\\', out); + putc('\n', out); } /** @@ -150,7 +152,7 @@ xmlFuzzWriteString(const char *str) { * * Returns a zero-terminated string or NULL if the fuzz data is exhausted. */ -static const char * +const char * xmlFuzzReadString(size_t *size) { const char *out = fuzzData.outPtr; @@ -186,47 +188,6 @@ xmlFuzzReadString(size_t *size) { return(NULL); } -/* - * A custom entity loader that writes all external DTDs or entities to a - * single file in the format expected by xmlFuzzEntityLoader. - */ -xmlParserInputPtr -xmlFuzzEntityRecorder(const char *URL, const char *ID, - xmlParserCtxtPtr ctxt) { - xmlParserInputPtr in; - static const int chunkSize = 16384; - int len; - - in = xmlNoNetExternalEntityLoader(URL, ID, ctxt); - if (in == NULL) - return(NULL); - - if (fuzzData.entities == NULL) { - fuzzData.entities = xmlHashCreate(4); - } else if (xmlHashLookup(fuzzData.entities, - (const xmlChar *) URL) != NULL) { - return(in); - } - - do { - len = xmlParserInputBufferGrow(in->buf, chunkSize); - if (len < 0) { - fprintf(stderr, "Error reading %s\n", URL); - xmlFreeInputStream(in); - return(NULL); - } - } while (len > 0); - - xmlFuzzWriteString(URL); - xmlFuzzWriteString((char *) xmlBufContent(in->buf->buffer)); - - xmlFreeInputStream(in); - - xmlHashAddEntry(fuzzData.entities, (const xmlChar *) URL, NULL); - - return(xmlNoNetExternalEntityLoader(URL, ID, ctxt)); -} - /** * xmlFuzzReadEntities: * @@ -357,3 +318,32 @@ xmlFuzzExtractStrings(const char *data, size_t size, char **strings, return(ret); } +char * +xmlSlurpFile(const char *path, size_t *sizeRet) { + FILE *file; + struct stat statbuf; + char *data; + size_t size; + + if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) + return(NULL); + size = statbuf.st_size; + file = fopen(path, "rb"); + if (file == NULL) + return(NULL); + data = xmlMalloc(size + 1); + if (data != NULL) { + if (fread(data, 1, size, file) != size) { + xmlFree(data); + data = NULL; + } else { + data[size] = 0; + if (sizeRet != NULL) + *sizeRet = size; + } + } + fclose(file); + + return(data); +} + diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h index 7e7fc29c..8716af93 100644 --- a/fuzz/fuzz.h +++ b/fuzz/fuzz.h @@ -8,6 +8,7 @@ #define __XML_FUZZERCOMMON_H__ #include <stddef.h> +#include <stdio.h> #include <libxml/parser.h> #ifdef __cplusplus @@ -36,8 +37,11 @@ xmlFuzzReadInt(void); const char * xmlFuzzReadRemaining(size_t *size); -xmlParserInputPtr -xmlFuzzEntityRecorder(const char *URL, const char *ID, xmlParserCtxtPtr ctxt); +void +xmlFuzzWriteString(FILE *out, const char *str); + +const char * +xmlFuzzReadString(size_t *size); void xmlFuzzReadEntities(void); @@ -55,6 +59,9 @@ size_t xmlFuzzExtractStrings(const char *data, size_t size, char **strings, size_t numStrings); +char * +xmlSlurpFile(const char *path, size_t *size); + #ifdef __cplusplus } #endif diff --git a/fuzz/genSeed.c b/fuzz/genSeed.c new file mode 100644 index 00000000..68fb87a1 --- /dev/null +++ b/fuzz/genSeed.c @@ -0,0 +1,407 @@ +/* + * xmlSeed.c: Generate the XML seed corpus for fuzzing. + * + * See Copyright for the status of this software. + */ + +#include <stdio.h> +#include <string.h> +#include <glob.h> +#include <libgen.h> +#include <sys/stat.h> + +#ifdef _WIN32 +#include <direct.h> +#else +#include <unistd.h> +#endif + +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/HTMLparser.h> +#include <libxml/xinclude.h> +#include <libxml/xmlschemas.h> +#include "fuzz.h" + +#define PATH_SIZE 500 +#define SEED_BUF_SIZE 16384 +#define EXPR_SIZE 4500 + +typedef int +(*fileFunc)(const char *base, FILE *out); + +typedef int +(*mainFunc)(const char *arg); + +static struct { + FILE *out; + xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */ + xmlExternalEntityLoader oldLoader; + fileFunc processFile; + const char *fuzzer; + int counter; + char cwd[PATH_SIZE]; +} globalData; + +/* + * A custom entity loader that writes all external DTDs or entities to a + * single file in the format expected by xmlFuzzEntityLoader. + */ +static xmlParserInputPtr +fuzzEntityRecorder(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) { + xmlParserInputPtr in; + static const int chunkSize = 16384; + int len; + + in = xmlNoNetExternalEntityLoader(URL, ID, ctxt); + if (in == NULL) + return(NULL); + + if (globalData.entities == NULL) { + globalData.entities = xmlHashCreate(4); + } else if (xmlHashLookup(globalData.entities, + (const xmlChar *) URL) != NULL) { + return(in); + } + + do { + len = xmlParserInputBufferGrow(in->buf, chunkSize); + if (len < 0) { + fprintf(stderr, "Error reading %s\n", URL); + xmlFreeInputStream(in); + return(NULL); + } + } while (len > 0); + + xmlFuzzWriteString(globalData.out, URL); + xmlFuzzWriteString(globalData.out, + (char *) xmlBufContent(in->buf->buffer)); + + xmlFreeInputStream(in); + + xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, NULL); + + return(xmlNoNetExternalEntityLoader(URL, ID, ctxt)); +} + +static void +fuzzRecorderInit(FILE *out) { + globalData.out = out; + globalData.entities = xmlHashCreate(8); + globalData.oldLoader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(fuzzEntityRecorder); +} + +static void +fuzzRecorderCleanup() { + xmlSetExternalEntityLoader(globalData.oldLoader); + xmlHashFree(globalData.entities, xmlHashDefaultDeallocator); + globalData.out = NULL; + globalData.entities = NULL; + globalData.oldLoader = NULL; +} + +static int +processXml(const char *docFile, FILE *out) { + int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD; + xmlDocPtr doc; + + fwrite(&opts, sizeof(opts), 1, out); + + fuzzRecorderInit(out); + + doc = xmlReadFile(docFile, NULL, opts); + xmlXIncludeProcessFlags(doc, opts); + xmlFreeDoc(doc); + + fuzzRecorderCleanup(); + + return(0); +} + +static int +processHtml(const char *docFile, FILE *out) { + char buf[SEED_BUF_SIZE]; + FILE *file; + size_t size; + int opts = 0; + + fwrite(&opts, sizeof(opts), 1, out); + + /* Copy file */ + file = fopen(docFile, "rb"); + if (file == NULL) { + fprintf(stderr, "couldn't open %s\n", docFile); + return(0); + } + do { + size = fread(buf, 1, SEED_BUF_SIZE, file); + if (size > 0) + fwrite(buf, 1, size, out); + } while (size == SEED_BUF_SIZE); + fclose(file); + + return(0); +} + +static int +processSchema(const char *docFile, FILE *out) { + xmlSchemaPtr schema; + xmlSchemaParserCtxtPtr pctxt; + + fuzzRecorderInit(out); + + pctxt = xmlSchemaNewParserCtxt(docFile); + xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL); + schema = xmlSchemaParse(pctxt); + xmlSchemaFreeParserCtxt(pctxt); + xmlSchemaFree(schema); + + fuzzRecorderCleanup(); + + return(0); +} + +static int +processPattern(const char *pattern) { + glob_t globbuf; + int ret = 0; + int res, i; + + res = glob(pattern, 0, NULL, &globbuf); + if (res == GLOB_NOMATCH) + return(0); + if (res != 0) { + fprintf(stderr, "couldn't match pattern %s\n", pattern); + return(-1); + } + + for (i = 0; i < globbuf.gl_pathc; i++) { + struct stat statbuf; + char outPath[PATH_SIZE]; + char *dirBuf = NULL; + char *baseBuf = NULL; + const char *path, *dir, *base; + FILE *out = NULL; + int dirChanged = 0; + size_t size; + + path = globbuf.gl_pathv[i]; + + if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) + continue; + + dirBuf = (char *) xmlCharStrdup(path); + baseBuf = (char *) xmlCharStrdup(path); + if ((dirBuf == NULL) || (baseBuf == NULL)) { + fprintf(stderr, "memory allocation failed\n"); + ret = -1; + goto error; + } + dir = dirname(dirBuf); + base = basename(baseBuf); + + size = snprintf(outPath, sizeof(outPath), "seed/%s/%s", + globalData.fuzzer, base); + if (size >= PATH_SIZE) { + fprintf(stderr, "creating path failed\n"); + ret = -1; + goto error; + } + out = fopen(outPath, "wb"); + if (out == NULL) { + fprintf(stderr, "couldn't open %s for writing\n", outPath); + ret = -1; + goto error; + } + if (chdir(dir) != 0) { + fprintf(stderr, "couldn't chdir to %s\n", dir); + ret = -1; + goto error; + } + dirChanged = 1; + if (globalData.processFile(base, out) != 0) + ret = -1; + +error: + if (out != NULL) + fclose(out); + xmlFree(dirBuf); + xmlFree(baseBuf); + if ((dirChanged) && (chdir(globalData.cwd) != 0)) { + fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd); + ret = -1; + break; + } + } + + globfree(&globbuf); + return(ret); +} + +static int +processXPath(const char *testDir, const char *prefix, const char *name, + const char *data, const char *subdir, int xptr) { + char pattern[PATH_SIZE]; + glob_t globbuf; + size_t i, size; + int ret = 0, res; + + size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*", + testDir, subdir, prefix); + if (size >= PATH_SIZE) + return(-1); + res = glob(pattern, 0, NULL, &globbuf); + if (res == GLOB_NOMATCH) + return(0); + if (res != 0) { + fprintf(stderr, "couldn't match pattern %s\n", pattern); + return(-1); + } + + for (i = 0; i < globbuf.gl_pathc; i++) { + char *path = globbuf.gl_pathv[i]; + struct stat statbuf; + FILE *in; + char expr[EXPR_SIZE]; + + if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) + continue; + + in = fopen(path, "rb"); + if (in == NULL) { + ret = -1; + continue; + } + + while (fgets(expr, EXPR_SIZE, in) > 0) { + char outPath[PATH_SIZE]; + FILE *out; + int j; + + for (j = 0; expr[j] != 0; j++) + if (expr[j] == '\r' || expr[j] == '\n') + break; + expr[j] = 0; + + size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d", + name, globalData.counter); + if (size >= PATH_SIZE) { + ret = -1; + continue; + } + out = fopen(outPath, "wb"); + if (out == NULL) { + ret = -1; + continue; + } + + if (xptr) { + xmlFuzzWriteString(out, expr); + } else { + char xptrExpr[EXPR_SIZE+100]; + + /* Wrap XPath expressions as XPointer */ + snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr); + xmlFuzzWriteString(out, xptrExpr); + } + + xmlFuzzWriteString(out, data); + + fclose(out); + globalData.counter++; + } + + fclose(in); + } + + globfree(&globbuf); + + return(ret); +} + +int +processXPathDir(const char *testDir) { + char pattern[PATH_SIZE]; + glob_t globbuf; + size_t i, size; + int ret = 0; + + globalData.counter = 1; + if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0) + ret = -1; + + size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir); + if (size >= PATH_SIZE) + return(1); + if (glob(pattern, 0, NULL, &globbuf) != 0) + return(1); + + for (i = 0; i < globbuf.gl_pathc; i++) { + char *path = globbuf.gl_pathv[i]; + char *data; + const char *docFile; + + data = xmlSlurpFile(path, NULL); + if (data == NULL) { + ret = -1; + continue; + } + docFile = basename(path); + + globalData.counter = 1; + if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0) + ret = -1; + if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0) + ret = -1; + + xmlFree(data); + } + + globfree(&globbuf); + + return(ret); +} + +int +main(int argc, const char **argv) { + mainFunc processArg = processPattern; + const char *fuzzer; + int ret = 0; + int xpath = 0; + int i; + + if (argc < 3) { + fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n"); + return(1); + } + + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + fuzzer = argv[1]; + if (strcmp(fuzzer, "html") == 0) { + globalData.processFile = processHtml; + } else if (strcmp(fuzzer, "schema") == 0) { + globalData.processFile = processSchema; + } else if (strcmp(fuzzer, "xml") == 0) { + globalData.processFile = processXml; + } else if (strcmp(fuzzer, "xpath") == 0) { + processArg = processXPathDir; + } else { + fprintf(stderr, "unknown fuzzer %s\n", fuzzer); + return(1); + } + globalData.fuzzer = fuzzer; + + if (getcwd(globalData.cwd, PATH_SIZE) == NULL) { + fprintf(stderr, "couldn't get current directory\n"); + return(1); + } + + for (i = 2; i < argc; i++) + processArg(argv[i]); + + return(ret); +} + diff --git a/fuzz/htmlSeed.c b/fuzz/htmlSeed.c deleted file mode 100644 index f3213e2e..00000000 --- a/fuzz/htmlSeed.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * htmlSeed.c: Generate the HTML seed corpus for fuzzing. - * - * See Copyright for the status of this software. - */ - -#include <stdio.h> - -#define SEED_BUF_SIZE 16384 - -int -main(int argc, char **argv) { - int opts = 0; - FILE *file; - char buf[SEED_BUF_SIZE]; - size_t size; - - if (argc != 2) { - fprintf(stderr, "Usage: htmlSeed [FILE]\n"); - return(1); - } - - fwrite(&opts, sizeof(opts), 1, stdout); - - /* Copy file */ - file = fopen(argv[1], "rb"); - do { - size = fread(buf, 1, SEED_BUF_SIZE, file); - if (size > 0) - fwrite(buf, 1, size, stdout); - } while (size == SEED_BUF_SIZE); - fclose(file); - - return(0); -} - diff --git a/fuzz/schemaSeed.c b/fuzz/schemaSeed.c deleted file mode 100644 index 4e2c6bc6..00000000 --- a/fuzz/schemaSeed.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * xmlSeed.c: Generate the XML seed corpus for fuzzing. - * - * See Copyright for the status of this software. - */ - -#include <stdio.h> -#include <libxml/xmlschemas.h> -#include "fuzz.h" - -int -main(int argc, char **argv) { - xmlSchemaPtr schema; - xmlSchemaParserCtxtPtr pctxt; - - if (argc != 2) { - fprintf(stderr, "Usage: schemaSeed [XSD]\n"); - return(1); - } - - xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); - xmlSetExternalEntityLoader(xmlFuzzEntityRecorder); - - pctxt = xmlSchemaNewParserCtxt(argv[1]); - xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL); - schema = xmlSchemaParse(pctxt); - xmlSchemaFreeParserCtxt(pctxt); - - xmlSchemaFree(schema); - xmlFuzzDataCleanup(); - - return(0); -} - diff --git a/fuzz/testFuzzer.c b/fuzz/testFuzzer.c index f6be7b8f..678f3243 100644 --- a/fuzz/testFuzzer.c +++ b/fuzz/testFuzzer.c @@ -6,13 +6,93 @@ */ #include <string.h> +#include <glob.h> #include <libxml/parser.h> #include <libxml/tree.h> #include <libxml/xmlstring.h> #include "fuzz.h" -int -main() { +#define LLVMFuzzerInitialize fuzzHtmlInit +#define LLVMFuzzerTestOneInput fuzzHtml +#include "html.c" +#undef LLVMFuzzerInitialize +#undef LLVMFuzzerTestOneInput + +#define LLVMFuzzerInitialize fuzzRegexpInit +#define LLVMFuzzerTestOneInput fuzzRegexp +#include "regexp.c" +#undef LLVMFuzzerInitialize +#undef LLVMFuzzerTestOneInput + +#define LLVMFuzzerInitialize fuzzSchemaInit +#define LLVMFuzzerTestOneInput fuzzSchema +#include "schema.c" +#undef LLVMFuzzerInitialize +#undef LLVMFuzzerTestOneInput + +#define LLVMFuzzerInitialize fuzzUriInit +#define LLVMFuzzerTestOneInput fuzzUri +#include "uri.c" +#undef LLVMFuzzerInitialize +#undef LLVMFuzzerTestOneInput + +#define LLVMFuzzerInitialize fuzzXmlInit +#define LLVMFuzzerTestOneInput fuzzXml +#include "xml.c" +#undef LLVMFuzzerInitialize +#undef LLVMFuzzerTestOneInput + +#define LLVMFuzzerInitialize fuzzXPathInit +#define LLVMFuzzerTestOneInput fuzzXPath +#include "xpath.c" +#undef LLVMFuzzerInitialize +#undef LLVMFuzzerTestOneInput + +typedef int +(*initFunc)(int *argc, char ***argv); +typedef int +(*fuzzFunc)(const char *data, size_t size); + +int numInputs; + +static int +testFuzzer(initFunc init, fuzzFunc fuzz, const char *pattern) { + glob_t globbuf; + int ret = -1; + int i; + + if (glob(pattern, 0, NULL, &globbuf) != 0) { + fprintf(stderr, "pattern %s matches no files\n", pattern); + return(-1); + } + + if (init != NULL) + init(NULL, NULL); + + for (i = 0; i < globbuf.gl_pathc; i++) { + const char *path = globbuf.gl_pathv[i]; + char *data; + size_t size; + + data = xmlSlurpFile(path, &size); + if (data == NULL) { + fprintf(stderr, "couldn't read %s\n", path); + goto error; + } + fuzz(data, size); + xmlFree(data); + + numInputs++; + } + + ret = 0; +error: + globfree(&globbuf); + return(ret); +} + +static int +testEntityLoader() { static const char data[] = "doc.xml\\\n" "<!DOCTYPE doc SYSTEM \"doc.dtd\">\n" @@ -53,3 +133,28 @@ main() { return(ret); } +int +main() { + int ret = 0; + + if (testEntityLoader() != 0) + ret = 1; + if (testFuzzer(fuzzHtmlInit, fuzzHtml, "seed/html/*") != 0) + ret = 1; + if (testFuzzer(fuzzRegexpInit, fuzzRegexp, "seed/regexp/*") != 0) + ret = 1; + if (testFuzzer(fuzzSchemaInit, fuzzSchema, "seed/schema/*") != 0) + ret = 1; + if (testFuzzer(NULL, fuzzUri, "seed/uri/*") != 0) + ret = 1; + if (testFuzzer(fuzzXmlInit, fuzzXml, "seed/xml/*") != 0) + ret = 1; + if (testFuzzer(fuzzXPathInit, fuzzXPath, "seed/xpath/*") != 0) + ret = 1; + + if (ret == 0) + printf("Successfully tested %d inputs\n", numInputs); + + return(ret); +} + @@ -7,6 +7,7 @@ #include <libxml/parser.h> #include <libxml/tree.h> #include <libxml/xmlerror.h> +#include <libxml/xinclude.h> #include <libxml/xmlreader.h> #include "fuzz.h" @@ -27,7 +28,7 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) { xmlParserCtxtPtr ctxt; xmlTextReaderPtr reader; xmlChar *out; - const char *docBuffer; + const char *docBuffer, *docUrl; size_t docSize, consumed, chunkSize; int opts, outSize; @@ -38,6 +39,7 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) { xmlFuzzReadEntities(); docBuffer = xmlFuzzMainEntity(&docSize); + docUrl = xmlFuzzMainUrl(); if (docBuffer == NULL) { xmlFuzzDataCleanup(); return(0); @@ -45,7 +47,9 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) { /* Pull parser */ - doc = xmlReadMemory(docBuffer, docSize, NULL, NULL, opts); + doc = xmlReadMemory(docBuffer, docSize, docUrl, NULL, opts); + if (opts & XML_PARSE_XINCLUDE) + xmlXIncludeProcessFlags(doc, opts); /* Also test the serializer. */ xmlDocDumpMemory(doc, &out, &outSize); xmlFree(out); @@ -53,7 +57,7 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) { /* Push parser */ - ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL); + ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, docUrl); xmlCtxtUseOptions(ctxt, opts); for (consumed = 0; consumed < docSize; consumed += chunkSize) { @@ -64,6 +68,8 @@ LLVMFuzzerTestOneInput(const char *data, size_t size) { } xmlParseChunk(ctxt, NULL, 0, 1); + if (opts & XML_PARSE_XINCLUDE) + xmlXIncludeProcessFlags(ctxt->myDoc, opts); xmlFreeDoc(ctxt->myDoc); xmlFreeParserCtxt(ctxt); diff --git a/fuzz/xmlSeed.c b/fuzz/xmlSeed.c deleted file mode 100644 index 5ce97d0b..00000000 --- a/fuzz/xmlSeed.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * xmlSeed.c: Generate the XML seed corpus for fuzzing. - * - * See Copyright for the status of this software. - */ - -#include <stdio.h> -#include "fuzz.h" - -int -main(int argc, char **argv) { - int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD; - - if (argc != 2) { - fprintf(stderr, "Usage: xmlSeed [FILE]\n"); - return(1); - } - - fwrite(&opts, sizeof(opts), 1, stdout); - - xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); - xmlSetExternalEntityLoader(xmlFuzzEntityRecorder); - xmlFreeDoc(xmlReadFile(argv[1], NULL, opts)); - xmlFuzzDataCleanup(); - - return(0); -} - diff --git a/fuzz/xpath.c b/fuzz/xpath.c new file mode 100644 index 00000000..767acb98 --- /dev/null +++ b/fuzz/xpath.c @@ -0,0 +1,48 @@ +/* + * xpath.c: a libFuzzer target to test XPath and XPointer expressions. + * + * See Copyright for the status of this software. + */ + +#include <libxml/parser.h> +#include <libxml/xpointer.h> +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlInitParser(); + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlDocPtr doc; + const char *expr, *xml; + size_t exprSize, xmlSize; + + xmlFuzzDataInit(data, size); + + expr = xmlFuzzReadString(&exprSize); + xml = xmlFuzzReadString(&xmlSize); + + /* Recovery mode allows more input to be fuzzed. */ + doc = xmlReadMemory(xml, xmlSize, NULL, NULL, XML_PARSE_RECOVER); + if (doc != NULL) { + xmlXPathContextPtr xpctxt = xmlXPathNewContext(doc); + + /* Operation limit to avoid timeout */ + xpctxt->opLimit = 500000; + + xmlXPathFreeObject(xmlXPtrEval(BAD_CAST expr, xpctxt)); + xmlXPathFreeContext(xpctxt); + } + xmlFreeDoc(doc); + + xmlFuzzDataCleanup(); + + return(0); +} + diff --git a/fuzz/xpath.dict b/fuzz/xpath.dict new file mode 100644 index 00000000..4fe375fb --- /dev/null +++ b/fuzz/xpath.dict @@ -0,0 +1,94 @@ +# XML + +elem_a="<a></a>" +elem_b="<b></b>" +elem_c="<c></c>" +elem_d="<d></d>" +elem_empty="<a/>" +elem_ns_a="<a:a xmlns:a='a'></a:a>" +elem_ns_b="<b:b xmlns:b='b'></b:b>" + +attr_a=" a='a'" +attr_b=" b='b'" + +ns_decl=" xmlns:a='a'" +ns_default=" xmlns='a'" +ns_prefix_a="a:" +ns_prefix_b="b:" + +cdata_section="<![CDATA[ ]]>" + +comment="<!-- -->" + +pi="<?a?>" + +# XPath + +axis_ancestor="ancestor::" +axis_ancestor_or_self="ancestor-or-self::" +axis_attribute="attribute::" +axis_attribute_abbrev="@" +axis_child="child::" +axis_descendant="descendant::" +axis_descendant_or_self="descendant-or-self::" +axis_following="following::" +axis_following_sibling="following-sibling::" +axis_namespace="namespace::" +axis_parent="parent::" +axis_preceding="preceding::" +axis_preceding_siblings="preceding-sibling::" +axis_self="self::" + +node_test_ns="a:" + +val_num="=(1.0)" +val_str_sq="=('a')" +val_str_dq="=(\"a\")" +val_node_set="=(*)" +val_elem="=(b)" + +step_root="/" +step_descendant="//" +step_any="//*" +step_any_l="*//" +step_elem="//b" +step_ns_elem="//a:a" +step_comment="//comment()" +step_node="//node()" +step_node_l="node()//" +step_pi="//processing-instruction()" +step_text="//text()" +step_parent="../" + +op_plus="+1" +op_minus=" - 1" +op_neg="-" +op_mul="*1" +op_div=" div 1" +op_mod=" mod 1" +op_and=" and 1" +op_or=" or 1" +op_ne="!=1" +op_lt="<1" +op_gt=">1" +op_le="<=1" +op_ge=">=1" +op_predicate_num="[1]" +op_predicate_last="[last()]" +op_predicate_str="['a']" +op_predicate="[1=1]" +op_arg_num=",1" +op_arg_str=",'a'" +op_arg_node=",*" +op_union="|//b" + +var_num="=$f" +var_bool="=$b" +var_str="=$s" +var_node_set="=$n" + +# Unicode + +utf8_2="\xC3\x84" +utf8_3="\xE2\x80\x9C" +utf8_4="\xF0\x9F\x98\x80" diff --git a/fuzz/xpath.options b/fuzz/xpath.options new file mode 100644 index 00000000..02d5e976 --- /dev/null +++ b/fuzz/xpath.options @@ -0,0 +1,3 @@ +[libfuzzer] +max_len = 10000 +timeout = 20 diff --git a/include/libxml/parser.h b/include/libxml/parser.h index 3020b20c..1c86a97c 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -231,7 +231,7 @@ struct _xmlParserCtxt { int nameMax; /* Max depth of the parsing stack */ const xmlChar * *nameTab; /* array of nodes */ - long nbChars; /* number of xmlChar processed */ + long nbChars; /* unused */ long checkIndex; /* used by progressive parsing lookup */ int keepBlanks; /* ugly but ... */ int disableSAX; /* SAX callbacks are disabled */ @@ -1073,11 +1073,15 @@ xmlHasFeature(xmlFeature feature) */ static void xmlDetectSAX2(xmlParserCtxtPtr ctxt) { + xmlSAXHandlerPtr sax; if (ctxt == NULL) return; + sax = ctxt->sax; #ifdef LIBXML_SAX1_ENABLED - if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && - ((ctxt->sax->startElementNs != NULL) || - (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; + if ((sax) && (sax->initialized == XML_SAX2_MAGIC) && + ((sax->startElementNs != NULL) || + (sax->endElementNs != NULL) || + ((sax->startElement == NULL) && (sax->endElement == NULL)))) + ctxt->sax2 = 1; #else ctxt->sax2 = 1; #endif /* LIBXML_SAX1_ENABLED */ @@ -2055,7 +2059,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) { ((unsigned char *) s)[ 9 ] == c10 ) #define SKIP(val) do { \ - ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ + ctxt->input->cur += (val),ctxt->input->col+=(val); \ if (*ctxt->input->cur == 0) \ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ } while (0) @@ -2066,7 +2070,6 @@ static int spacePop(xmlParserCtxtPtr ctxt) { if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ - ctxt->nbChars++; \ ctxt->input->cur++; \ } \ if (*ctxt->input->cur == 0) \ @@ -2119,7 +2122,6 @@ static void xmlGROW (xmlParserCtxtPtr ctxt) { #define NEXT1 { \ ctxt->input->col++; \ ctxt->input->cur++; \ - ctxt->nbChars++; \ if (*ctxt->input->cur == 0) \ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ } @@ -2332,7 +2334,6 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ ctxt->input->col++; - ctxt->nbChars ++; ctxt->input->cur++; } } else if ((RAW == '&') && (NXT(1) == '#')) { @@ -2361,7 +2362,6 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ ctxt->input->col++; - ctxt->nbChars ++; ctxt->input->cur++; } } else { @@ -3333,7 +3333,6 @@ xmlParseName(xmlParserCtxtPtr ctxt) { } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; - ctxt->nbChars += count; ctxt->input->col += count; if (ret == NULL) xmlErrMemory(ctxt, NULL); @@ -3456,7 +3455,6 @@ xmlParseNCName(xmlParserCtxtPtr ctxt) { } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; - ctxt->nbChars += count; ctxt->input->col += count; if (ret == NULL) { xmlErrMemory(ctxt, NULL); @@ -3493,10 +3491,10 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { while (*in != 0 && *in == *cmp) { ++in; ++cmp; - ctxt->input->col++; } if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { /* success */ + ctxt->input->col += in - ctxt->input->cur; ctxt->input->cur = in; return (const xmlChar*) 1; } @@ -8826,6 +8824,7 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, } if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { /* success */ + ctxt->input->col += in - ctxt->input->cur; ctxt->input->cur = in; return((const xmlChar*) 1); } @@ -14696,7 +14695,12 @@ xmlCleanupParser(void) { static void ATTRIBUTE_DESTRUCTOR xmlDestructor(void) { - xmlCleanupParser(); + /* + * Calling custom deallocation functions in a destructor can cause + * problems, for example with Nokogiri. + */ + if (xmlFree == free) + xmlCleanupParser(); } #endif @@ -14788,7 +14792,6 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->vctxt.warning = xmlParserValidityWarning; #endif ctxt->record_info = 0; - ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->inSubset = 0; ctxt->errNo = XML_ERR_OK; diff --git a/parserInternals.c b/parserInternals.c index d849c08b..b0629ef3 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -519,8 +519,6 @@ xmlNextChar(xmlParserCtxtPtr ctxt) } else /* 1-byte code */ ctxt->input->cur++; - - ctxt->nbChars++; } else { /* * Assume it's a fixed length encoding (1) with @@ -533,7 +531,6 @@ xmlNextChar(xmlParserCtxtPtr ctxt) } else ctxt->input->col++; ctxt->input->cur++; - ctxt->nbChars++; } if (*ctxt->input->cur == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); @@ -677,7 +674,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { } if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { - ctxt->nbChars++; ctxt->input->cur++; } return(0xA); @@ -693,7 +689,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { *len = 1; if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { - ctxt->nbChars++; ctxt->input->cur++; } return(0xA); @@ -1748,7 +1743,6 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->options |= XML_PARSE_NOENT; } ctxt->record_info = 0; - ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->inSubset = 0; ctxt->errNo = XML_ERR_OK; diff --git a/result/XInclude/fallback3.xml b/result/XInclude/fallback3.xml new file mode 100644 index 00000000..b4235514 --- /dev/null +++ b/result/XInclude/fallback3.xml @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<a> + <doc xml:base="../ents/something.xml"> +<p>something</p> +<p>really</p> +<p>simple</p> +</doc> +</a> diff --git a/result/XInclude/fallback3.xml.err b/result/XInclude/fallback3.xml.err new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/result/XInclude/fallback3.xml.err diff --git a/result/XInclude/fallback3.xml.rdr b/result/XInclude/fallback3.xml.rdr new file mode 100644 index 00000000..aa2f1374 --- /dev/null +++ b/result/XInclude/fallback3.xml.rdr @@ -0,0 +1,25 @@ +0 1 a 0 0 +1 14 #text 0 1 + +1 1 doc 0 0 +2 14 #text 0 1 + +2 1 p 0 0 +3 3 #text 0 1 something +2 15 p 0 0 +2 14 #text 0 1 + +2 1 p 0 0 +3 3 #text 0 1 really +2 15 p 0 0 +2 14 #text 0 1 + +2 1 p 0 0 +3 3 #text 0 1 simple +2 15 p 0 0 +2 14 #text 0 1 + +1 15 doc 0 0 +1 14 #text 0 1 + +0 15 a 0 0 diff --git a/result/XInclude/fallback4.xml b/result/XInclude/fallback4.xml new file mode 100644 index 00000000..9883fd54 --- /dev/null +++ b/result/XInclude/fallback4.xml @@ -0,0 +1,10 @@ +<?xml version="1.0"?> +<a> + + <doc xml:base="../ents/something.xml"> +<p>something</p> +<p>really</p> +<p>simple</p> +</doc> + +</a> diff --git a/result/XInclude/fallback4.xml.err b/result/XInclude/fallback4.xml.err new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/result/XInclude/fallback4.xml.err diff --git a/result/XInclude/fallback4.xml.rdr b/result/XInclude/fallback4.xml.rdr new file mode 100644 index 00000000..628b9513 --- /dev/null +++ b/result/XInclude/fallback4.xml.rdr @@ -0,0 +1,29 @@ +0 1 a 0 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 doc 0 0 +2 14 #text 0 1 + +2 1 p 0 0 +3 3 #text 0 1 something +2 15 p 0 0 +2 14 #text 0 1 + +2 1 p 0 0 +3 3 #text 0 1 really +2 15 p 0 0 +2 14 #text 0 1 + +2 1 p 0 0 +3 3 #text 0 1 simple +2 15 p 0 0 +2 14 #text 0 1 + +1 15 doc 0 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +0 15 a 0 0 diff --git a/result/XInclude/fallback5.xml b/result/XInclude/fallback5.xml new file mode 100644 index 00000000..0ba503d9 --- /dev/null +++ b/result/XInclude/fallback5.xml @@ -0,0 +1,51 @@ +<?xml version="1.0"?> +<a> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + <elem/> + + + + + + + + + + + + + + + + +</a> diff --git a/result/XInclude/fallback5.xml.rdr b/result/XInclude/fallback5.xml.rdr new file mode 100644 index 00000000..0e1dab71 --- /dev/null +++ b/result/XInclude/fallback5.xml.rdr @@ -0,0 +1,116 @@ +0 1 a 0 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +1 14 #text 0 1 + +0 15 a 0 0 diff --git a/result/XInclude/fallback6.xml b/result/XInclude/fallback6.xml new file mode 100644 index 00000000..2b5d4116 --- /dev/null +++ b/result/XInclude/fallback6.xml @@ -0,0 +1 @@ +<?xml version="1.0"?> diff --git a/result/XInclude/fallback6.xml.rdr b/result/XInclude/fallback6.xml.rdr new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/result/XInclude/fallback6.xml.rdr diff --git a/result/XInclude/ns1.xml b/result/XInclude/ns1.xml new file mode 100644 index 00000000..ab41fb7a --- /dev/null +++ b/result/XInclude/ns1.xml @@ -0,0 +1,10 @@ +<?xml version="1.0"?> +<doc xmlns:xi="http://www.w3.org/2001/XInclude"> + <ns:elem xmlns:ns="urn:foo" xml:id="a"/> + <elem xmlns:ns="urn:foo"> + <ns:elem xml:id="a"/> + </elem> + + <ns:elem xmlns:ns="urn:bar"/> + +</doc> diff --git a/result/XInclude/ns1.xml.rdr b/result/XInclude/ns1.xml.rdr new file mode 100644 index 00000000..f23702f5 --- /dev/null +++ b/result/XInclude/ns1.xml.rdr @@ -0,0 +1,23 @@ +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 ns:elem 1 0 +1 14 #text 0 1 + +1 1 elem 0 0 +2 14 #text 0 1 + +2 1 ns:elem 1 0 +2 14 #text 0 1 + +1 15 elem 0 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +1 1 ns:elem 1 0 +1 14 #text 0 1 + +1 14 #text 0 1 + +0 15 doc 0 0 @@ -2108,16 +2108,16 @@ errParseTest(const char *filename, const char *result, const char *err, xmlDocDumpMemory(doc, (xmlChar **) &base, &size); } res = compareFileMem(result, base, size); - if (res != 0) { - fprintf(stderr, "Result for %s failed in %s\n", filename, result); - return(-1); - } } if (doc != NULL) { if (base != NULL) xmlFree((char *)base); xmlFreeDoc(doc); } + if (res != 0) { + fprintf(stderr, "Result for %s failed in %s\n", filename, result); + return(-1); + } if (err != NULL) { res = compareFileMem(err, testErrors, testErrorsSize); if (res != 0) { diff --git a/test/XInclude/docs/fallback3.xml b/test/XInclude/docs/fallback3.xml new file mode 100644 index 00000000..0c8b6c9e --- /dev/null +++ b/test/XInclude/docs/fallback3.xml @@ -0,0 +1,9 @@ +<a> + <xi:include href="../ents/something.xml" xmlns:xi="http://www.w3.org/2001/XInclude"> + <xi:fallback> + <xi:include href="c.xml"> + <xi:fallback>There is no c.xml ... </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> +</a> diff --git a/test/XInclude/docs/fallback4.xml b/test/XInclude/docs/fallback4.xml new file mode 100644 index 00000000..b500a635 --- /dev/null +++ b/test/XInclude/docs/fallback4.xml @@ -0,0 +1,7 @@ +<a> + <xi:include href="c.xml" xmlns:xi="http://www.w3.org/2001/XInclude"> + <xi:fallback> + <xi:include href="../ents/something.xml"/> + </xi:fallback> + </xi:include> +</a> diff --git a/test/XInclude/docs/fallback5.xml b/test/XInclude/docs/fallback5.xml new file mode 100644 index 00000000..d3ad4246 --- /dev/null +++ b/test/XInclude/docs/fallback5.xml @@ -0,0 +1,83 @@ +<a> + <xi:include href="a01.xml" xmlns:xi="http://www.w3.org/2001/XInclude"> + <xi:fallback> + <elem/> + <xi:include href="a02.xml"> + <xi:fallback> + <elem/> + <xi:include href="a03.xml"> + <xi:fallback> + <elem/> + <xi:include href="a04.xml"> + <xi:fallback> + <elem/> + <xi:include href="a05.xml"> + <xi:fallback> + <elem/> + <xi:include href="a06.xml"> + <xi:fallback> + <elem/> + <xi:include href="a07.xml"> + <xi:fallback> + <elem/> + <xi:include href="a08.xml"> + <xi:fallback> + <elem/> + <xi:include href="a09.xml"> + <xi:fallback> + <elem/> + <xi:include href="a10.xml"> + <xi:fallback> + <elem/> + <xi:include href="a11.xml"> + <xi:fallback> + <elem/> + <xi:include href="a12.xml"> + <xi:fallback> + <elem/> + <xi:include href="a13.xml"> + <xi:fallback> + <elem/> + <xi:include href="a14.xml"> + <xi:fallback> + <elem/> + <xi:include href="a15.xml"> + <xi:fallback> + <elem/> + <xi:include href="a16.xml"> + <xi:fallback> + <elem/> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> + </xi:fallback> + </xi:include> +</a> + diff --git a/test/XInclude/docs/fallback6.xml b/test/XInclude/docs/fallback6.xml new file mode 100644 index 00000000..fd00a03f --- /dev/null +++ b/test/XInclude/docs/fallback6.xml @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="b.xml"> + <xi:fallback><xi:include href="c.xml"> + <xi:fallback/> + </xi:include></xi:fallback> +</xi:include> diff --git a/test/XInclude/docs/ns1.xml b/test/XInclude/docs/ns1.xml new file mode 100644 index 00000000..7523f4a9 --- /dev/null +++ b/test/XInclude/docs/ns1.xml @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<doc xmlns:xi="http://www.w3.org/2001/XInclude"> + <xi:include href="#a"/> + <elem xmlns:ns="urn:foo"> + <ns:elem xml:id="a"/> + </elem> + <xi:include href="b.xml"> + <xi:fallback xmlns:ns="urn:bar"> + <ns:elem/> + </xi:fallback> + </xi:include> +</doc> @@ -59,7 +59,7 @@ struct _xmlXIncludeRef { xmlNodePtr inc; /* the included copy */ int xml; /* xml or txt */ int count; /* how many refs use that specific doc */ - xmlXPathObjectPtr xptr; /* the xpointer if needed */ + int fallback; /* fallback was loaded */ int emptyFb; /* flag to show fallback empty */ }; @@ -86,10 +86,13 @@ struct _xmlXIncludeCtxt { xmlChar * base; /* the current xml:base */ void *_private; /* application data */ + + unsigned long incTotal; /* total number of processed inclusions */ }; static int -xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree); +xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree, + int skipRoot); /************************************************************************ @@ -207,8 +210,6 @@ xmlXIncludeFreeRef(xmlXIncludeRefPtr ref) { xmlFree(ref->URI); if (ref->fragment != NULL) xmlFree(ref->fragment); - if (ref->xptr != NULL) - xmlXPathFreeObject(ref->xptr); xmlFree(ref); } @@ -626,8 +627,8 @@ xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) { xmlXIncludeErr(ctxt, cur, XML_XINCLUDE_RECURSION, "detected a local recursion with no xpointer in %s\n", URL); - if (fragment != NULL) - xmlFree(fragment); + xmlFree(URL); + xmlFree(fragment); return(-1); } @@ -639,12 +640,15 @@ xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) { if (xmlStrEqual(URL, ctxt->urlTab[i])) { xmlXIncludeErr(ctxt, cur, XML_XINCLUDE_RECURSION, "detected a recursion in %s\n", URL); + xmlFree(URL); + xmlFree(fragment); return(-1); } } } ref = xmlXIncludeNewRef(ctxt, URL, cur); + xmlFree(URL); if (ref == NULL) { return(-1); } @@ -652,7 +656,6 @@ xmlXIncludeAddNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr cur) { ref->doc = NULL; ref->xml = xml; ref->count = 1; - xmlFree(URL); return(0); } @@ -729,7 +732,9 @@ xmlXIncludeRecurseDoc(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, * (bug 132597) */ newctxt->parseFlags = ctxt->parseFlags; - xmlXIncludeDoProcess(newctxt, doc, xmlDocGetRootElement(doc)); + newctxt->incTotal = ctxt->incTotal; + xmlXIncludeDoProcess(newctxt, doc, xmlDocGetRootElement(doc), 0); + ctxt->incTotal = newctxt->incTotal; for (i = 0;i < ctxt->incNr;i++) { newctxt->incTab[i]->count--; newctxt->incTab[i] = NULL; @@ -1459,7 +1464,7 @@ xmlXIncludeLoadDoc(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) { */ if ((URL[0] == 0) || (URL[0] == '#') || ((ctxt->doc != NULL) && (xmlStrEqual(URL, ctxt->doc->URL)))) { - doc = NULL; + doc = ctxt->doc; goto loaded; } @@ -1551,15 +1556,8 @@ loaded: /* * Add the top children list as the replacement copy. */ - if (doc == NULL) - { - /* Hopefully a DTD declaration won't be copied from - * the same document */ - ctxt->incTab[nr]->inc = xmlCopyNodeList(ctxt->doc->children); - } else { - ctxt->incTab[nr]->inc = xmlXIncludeCopyNodeList(ctxt, ctxt->doc, - doc, doc->children); - } + ctxt->incTab[nr]->inc = xmlXIncludeCopyNodeList(ctxt, ctxt->doc, + doc, doc->children); } #ifdef LIBXML_XPTR_ENABLED else { @@ -1571,12 +1569,7 @@ loaded: xmlXPathContextPtr xptrctxt; xmlNodeSetPtr set; - if (doc == NULL) { - xptrctxt = xmlXPtrNewContext(ctxt->doc, ctxt->incTab[nr]->ref, - NULL); - } else { - xptrctxt = xmlXPtrNewContext(doc, NULL, NULL); - } + xptrctxt = xmlXPtrNewContext(doc, NULL, NULL); if (xptrctxt == NULL) { xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref, XML_XINCLUDE_XPTR_FAILED, @@ -1680,14 +1673,9 @@ loaded: } } } - if (doc == NULL) { - ctxt->incTab[nr]->xptr = xptr; - ctxt->incTab[nr]->inc = NULL; - } else { - ctxt->incTab[nr]->inc = - xmlXIncludeCopyXPointer(ctxt, ctxt->doc, doc, xptr); - xmlXPathFreeObject(xptr); - } + ctxt->incTab[nr]->inc = + xmlXIncludeCopyXPointer(ctxt, ctxt->doc, doc, xptr); + xmlXPathFreeObject(xptr); xmlXPathFreeContext(xptrctxt); xmlFree(fragment); } @@ -1990,19 +1978,23 @@ xmlXIncludeLoadFallback(xmlXIncludeCtxtPtr ctxt, xmlNodePtr fallback, int nr) { newctxt->_private = ctxt->_private; newctxt->base = xmlStrdup(ctxt->base); /* Inherit the base from the existing context */ xmlXIncludeSetFlags(newctxt, ctxt->parseFlags); - ret = xmlXIncludeDoProcess(newctxt, ctxt->doc, fallback->children); + newctxt->incTotal = ctxt->incTotal; + if (xmlXIncludeDoProcess(newctxt, ctxt->doc, fallback, 1) < 0) + ret = -1; + ctxt->incTotal = newctxt->incTotal; if (ctxt->nbErrors > oldNbErrors) ret = -1; - else if (ret > 0) - ret = 0; /* xmlXIncludeDoProcess can return +ve number */ xmlXIncludeFreeContext(newctxt); ctxt->incTab[nr]->inc = xmlDocCopyNodeList(ctxt->doc, fallback->children); + if (ctxt->incTab[nr]->inc == NULL) + ctxt->incTab[nr]->emptyFb = 1; } else { ctxt->incTab[nr]->inc = NULL; ctxt->incTab[nr]->emptyFb = 1; /* flag empty callback */ } + ctxt->incTab[nr]->fallback = 1; return(ret); } @@ -2158,8 +2150,7 @@ xmlXIncludeLoadNode(xmlXIncludeCtxtPtr ctxt, int nr) { ((xmlStrEqual(children->ns->href, XINCLUDE_NS)) || (xmlStrEqual(children->ns->href, XINCLUDE_OLD_NS)))) { ret = xmlXIncludeLoadFallback(ctxt, children, nr); - if (ret == 0) - break; + break; } children = children->next; } @@ -2206,19 +2197,9 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { if ((cur == NULL) || (cur->type == XML_NAMESPACE_DECL)) return(-1); - /* - * If we stored an XPointer a late computation may be needed - */ - if ((ctxt->incTab[nr]->inc == NULL) && - (ctxt->incTab[nr]->xptr != NULL)) { - ctxt->incTab[nr]->inc = - xmlXIncludeCopyXPointer(ctxt, ctxt->doc, ctxt->doc, - ctxt->incTab[nr]->xptr); - xmlXPathFreeObject(ctxt->incTab[nr]->xptr); - ctxt->incTab[nr]->xptr = NULL; - } list = ctxt->incTab[nr]->inc; ctxt->incTab[nr]->inc = NULL; + ctxt->incTab[nr]->emptyFb = 0; /* * Check against the risk of generating a multi-rooted document @@ -2238,6 +2219,7 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { XML_XINCLUDE_MULTIPLE_ROOT, "XInclude error: would result in multiple root nodes\n", NULL); + xmlFreeNodeList(list); return(-1); } } @@ -2255,16 +2237,27 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { xmlUnlinkNode(cur); xmlFreeNode(cur); } else { + xmlNodePtr child, next; + /* * Change the current node as an XInclude start one, and add an * XInclude end one */ + if (ctxt->incTab[nr]->fallback) + xmlUnsetProp(cur, BAD_CAST "href"); cur->type = XML_XINCLUDE_START; + /* Remove fallback children */ + for (child = cur->children; child != NULL; child = next) { + next = child->next; + xmlUnlinkNode(child); + xmlFreeNode(child); + } end = xmlNewDocNode(cur->doc, cur->ns, cur->name, NULL); if (end == NULL) { xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref, XML_XINCLUDE_BUILD_FAILED, "failed to build node\n", NULL); + xmlFreeNodeList(list); return(-1); } end->type = XML_XINCLUDE_END; @@ -2366,6 +2359,7 @@ xmlXIncludeTestNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) { * @ctxt: the XInclude processing context * @doc: an XML document * @tree: the top of the tree to process + * @skipRoot: don't process the root node of the tree * * Implement the XInclude substitution on the XML document @doc * @@ -2373,13 +2367,16 @@ xmlXIncludeTestNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) { * or the number of substitutions done. */ static int -xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) { +xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree, + int skipRoot) { xmlNodePtr cur; int ret = 0; int i, start; if ((doc == NULL) || (tree == NULL) || (tree->type == XML_NAMESPACE_DECL)) return(-1); + if ((skipRoot) && (tree->children == NULL)) + return(-1); if (ctxt == NULL) return(-1); @@ -2391,40 +2388,59 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) { start = ctxt->incNr; /* + * TODO: The phases must run separately for recursive inclusions. + * + * - Phase 1 should start with top-level XInclude nodes, load documents, + * execute XPointer expressions, then process only the result nodes + * (not whole document, see bug #324081) and only for phase 1 + * recursively. We will need a backreference from xmlNodes to + * xmlIncludeRefs to detect references that were already visited. + * This can also be used for proper cycle detection, see bug #344240. + * + * - Phase 2 should visit all top-level XInclude nodes and expand + * possible subreferences in the replacement recursively. + * + * - Phase 3 should finally replace the top-level XInclude nodes. + * It could also be run together with phase 2. + */ + + /* * First phase: lookup the elements in the document */ - cur = tree; - if (xmlXIncludeTestNode(ctxt, cur) == 1) - xmlXIncludePreProcessNode(ctxt, cur); - while ((cur != NULL) && (cur != tree->parent)) { + if (skipRoot) + cur = tree->children; + else + cur = tree; + do { /* TODO: need to work on entities -> stack */ - if ((cur->children != NULL) && - (cur->children->type != XML_ENTITY_DECL) && - (cur->children->type != XML_XINCLUDE_START) && - (cur->children->type != XML_XINCLUDE_END)) { - cur = cur->children; - if (xmlXIncludeTestNode(ctxt, cur)) - xmlXIncludePreProcessNode(ctxt, cur); - } else if (cur->next != NULL) { - cur = cur->next; - if (xmlXIncludeTestNode(ctxt, cur)) - xmlXIncludePreProcessNode(ctxt, cur); - } else { - if (cur == tree) - break; - do { - cur = cur->parent; - if ((cur == NULL) || (cur == tree->parent)) - break; /* do */ - if (cur->next != NULL) { - cur = cur->next; - if (xmlXIncludeTestNode(ctxt, cur)) - xmlXIncludePreProcessNode(ctxt, cur); - break; /* do */ - } - } while (cur != NULL); - } - } + if (xmlXIncludeTestNode(ctxt, cur) == 1) { +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* + * Avoid superlinear expansion by limiting the total number + * of replacements. + */ + if (ctxt->incTotal >= 20) + return(-1); +#endif + ctxt->incTotal++; + xmlXIncludePreProcessNode(ctxt, cur); + } else if ((cur->children != NULL) && + (cur->children->type != XML_ENTITY_DECL) && + (cur->children->type != XML_XINCLUDE_START) && + (cur->children->type != XML_XINCLUDE_END)) { + cur = cur->children; + continue; + } + do { + if (cur == tree) + break; + if (cur->next != NULL) { + cur = cur->next; + break; + } + cur = cur->parent; + } while (cur != NULL); + } while ((cur != NULL) && (cur != tree)); /* * Second Phase : collect the infosets fragments @@ -2447,8 +2463,7 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree) { */ for (i = ctxt->incBase;i < ctxt->incNr; i++) { if ((ctxt->incTab[i]->inc != NULL) || - (ctxt->incTab[i]->xptr != NULL) || - (ctxt->incTab[i]->emptyFb != 0)) /* (empty fallback) */ + (ctxt->incTab[i]->emptyFb != 0)) /* (empty fallback) */ xmlXIncludeIncludeNode(ctxt, i); } @@ -2502,7 +2517,7 @@ xmlXIncludeProcessTreeFlagsData(xmlNodePtr tree, int flags, void *data) { ctxt->_private = data; ctxt->base = xmlStrdup((xmlChar *)tree->doc->URL); xmlXIncludeSetFlags(ctxt, flags); - ret = xmlXIncludeDoProcess(ctxt, tree->doc, tree); + ret = xmlXIncludeDoProcess(ctxt, tree->doc, tree, 0); if ((ret >= 0) && (ctxt->nbErrors > 0)) ret = -1; @@ -2586,7 +2601,7 @@ xmlXIncludeProcessTreeFlags(xmlNodePtr tree, int flags) { return(-1); ctxt->base = xmlNodeGetBase(tree->doc, tree); xmlXIncludeSetFlags(ctxt, flags); - ret = xmlXIncludeDoProcess(ctxt, tree->doc, tree); + ret = xmlXIncludeDoProcess(ctxt, tree->doc, tree, 0); if ((ret >= 0) && (ctxt->nbErrors > 0)) ret = -1; @@ -2626,7 +2641,7 @@ xmlXIncludeProcessNode(xmlXIncludeCtxtPtr ctxt, xmlNodePtr node) { if ((node == NULL) || (node->type == XML_NAMESPACE_DECL) || (node->doc == NULL) || (ctxt == NULL)) return(-1); - ret = xmlXIncludeDoProcess(ctxt, node->doc, node); + ret = xmlXIncludeDoProcess(ctxt, node->doc, node, 0); if ((ret >= 0) && (ctxt->nbErrors > 0)) ret = -1; return(ret); @@ -528,6 +528,12 @@ static void xmlHTMLEncodeSend(void) { char *result; + /* + * xmlEncodeEntitiesReentrant assumes valid UTF-8, but the buffer might + * end with a truncated UTF-8 sequence. This is a hack to at least avoid + * an out-of-bounds read. + */ + memset(&buffer[sizeof(buffer)-4], 0, 4); result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer); if (result) { xmlGenericError(xmlGenericErrorContext, "%s", result); @@ -2281,7 +2287,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) { doc = ctxt->myDoc; ret = ctxt->wellFormed; xmlFreeParserCtxt(ctxt); - if (!ret) { + if ((!ret) && (!recovery)) { xmlFreeDoc(doc); doc = NULL; } diff --git a/xmlreader.c b/xmlreader.c index 6ae6e922..a9b9ef93 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -48,6 +48,13 @@ #define MAX_ERR_MSG_SIZE 64000 +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +/* Keeping free objects can hide memory errors. */ +#define MAX_FREE_NODES 1 +#else +#define MAX_FREE_NODES 100 +#endif + /* * The following VA_COPY was coded following an example in * the Samba project. It may not be sufficient for some @@ -365,7 +372,7 @@ xmlTextReaderFreeProp(xmlTextReaderPtr reader, xmlAttrPtr cur) { DICT_FREE(cur->name); if ((reader != NULL) && (reader->ctxt != NULL) && - (reader->ctxt->freeAttrsNr < 100)) { + (reader->ctxt->freeAttrsNr < MAX_FREE_NODES)) { cur->next = reader->ctxt->freeAttrs; reader->ctxt->freeAttrs = cur; reader->ctxt->freeAttrsNr++; @@ -466,7 +473,7 @@ xmlTextReaderFreeNodeList(xmlTextReaderPtr reader, xmlNodePtr cur) { if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_TEXT_NODE)) && (reader != NULL) && (reader->ctxt != NULL) && - (reader->ctxt->freeElemsNr < 100)) { + (reader->ctxt->freeElemsNr < MAX_FREE_NODES)) { cur->next = reader->ctxt->freeElems; reader->ctxt->freeElems = cur; reader->ctxt->freeElemsNr++; @@ -554,7 +561,7 @@ xmlTextReaderFreeNode(xmlTextReaderPtr reader, xmlNodePtr cur) { if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_TEXT_NODE)) && (reader != NULL) && (reader->ctxt != NULL) && - (reader->ctxt->freeElemsNr < 100)) { + (reader->ctxt->freeElemsNr < MAX_FREE_NODES)) { cur->next = reader->ctxt->freeElems; reader->ctxt->freeElems = cur; reader->ctxt->freeElemsNr++; @@ -1491,6 +1498,8 @@ get_next_node: (reader->node->prev->type != XML_DTD_NODE)) { xmlNodePtr tmp = reader->node->prev; if ((tmp->extra & NODE_IS_PRESERVED) == 0) { + if (oldnode == tmp) + oldnode = NULL; xmlUnlinkNode(tmp); xmlTextReaderFreeNode(reader, tmp); } @@ -1049,7 +1049,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { while (1) { if (cur == root) return; - if (ctxt->format == 1) + if ((ctxt->format == 1) && + (cur->type != XML_XINCLUDE_START) && + (cur->type != XML_XINCLUDE_END)) xmlOutputBufferWrite(buf, 1, "\n"); if (cur->next != NULL) { cur = cur->next; @@ -1224,7 +1226,9 @@ xmlDocContentDumpOutput(xmlSaveCtxtPtr ctxt, xmlDocPtr cur) { else #endif xmlNodeDumpOutputInternal(ctxt, child); - xmlOutputBufferWrite(buf, 1, "\n"); + if ((child->type != XML_XINCLUDE_START) && + (child->type != XML_XINCLUDE_END)) + xmlOutputBufferWrite(buf, 1, "\n"); child = child->next; } } diff --git a/xmlschemastypes.c b/xmlschemastypes.c index 4249d700..d6b9f924 100644 --- a/xmlschemastypes.c +++ b/xmlschemastypes.c @@ -3691,6 +3691,8 @@ xmlSchemaCompareDurations(xmlSchemaValPtr x, xmlSchemaValPtr y) minday = 0; maxday = 0; } else { + if (myear > LONG_MAX / 366) + return -2; /* FIXME: This doesn't take leap year exceptions every 100/400 years into account. */ maxday = 365 * myear + (myear + 3) / 4; @@ -4079,6 +4081,14 @@ xmlSchemaCompareDates (xmlSchemaValPtr x, xmlSchemaValPtr y) if ((x == NULL) || (y == NULL)) return -2; + if ((x->value.date.year > LONG_MAX / 366) || + (x->value.date.year < LONG_MIN / 366) || + (y->value.date.year > LONG_MAX / 366) || + (y->value.date.year < LONG_MIN / 366)) { + /* Possible overflow when converting to days. */ + return -2; + } + if (x->value.date.tz_flag) { if (!y->value.date.tz_flag) { diff --git a/xmlstring.c b/xmlstring.c index 8d2e06f6..e8a1e45d 100644 --- a/xmlstring.c +++ b/xmlstring.c @@ -130,16 +130,18 @@ xmlCharStrdup(const char *cur) { int xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { - register int tmp; - if (str1 == str2) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return(strcmp((const char *)str1, (const char *)str2)); +#else do { - tmp = *str1++ - *str2; + int tmp = *str1++ - *str2; if (tmp != 0) return(tmp); } while (*str2++ != 0); return 0; +#endif } /** @@ -158,10 +160,14 @@ xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { if (str1 == str2) return(1); if (str1 == NULL) return(0); if (str2 == NULL) return(0); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return(strcmp((const char *)str1, (const char *)str2) == 0); +#else do { if (*str1++ != *str2) return(0); } while (*str2++); return(1); +#endif } /** @@ -204,18 +210,15 @@ xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) { int xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { - register int tmp; - if (len <= 0) return(0); if (str1 == str2) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); -#ifdef __GNUC__ - tmp = strncmp((const char *)str1, (const char *)str2, len); - return tmp; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return(strncmp((const char *)str1, (const char *)str2, len)); #else do { - tmp = *str1++ - *str2; + int tmp = *str1++ - *str2; if (tmp != 0 || --len == 0) return(tmp); } while (*str2++ != 0); return 0; @@ -136,6 +136,17 @@ #define XPATH_MAX_NODESET_LENGTH 10000000 /* + * XPATH_MAX_RECRUSION_DEPTH: + * Maximum amount of nested functions calls when parsing or evaluating + * expressions + */ +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +#define XPATH_MAX_RECURSION_DEPTH 500 +#else +#define XPATH_MAX_RECURSION_DEPTH 5000 +#endif + +/* * TODO: * There are a few spots where some tests are done which depend upon ascii * data. These should be enhanced for full UTF8 support (see particularly @@ -6118,9 +6129,6 @@ xmlXPathNewContext(xmlDocPtr doc) { ret->contextSize = -1; ret->proximityPosition = -1; - ret->maxDepth = INT_MAX; - ret->maxParserDepth = INT_MAX; - #ifdef XP_DEFAULT_CACHE_ON if (xmlXPathContextSetCache(ret, 1, -1, 0) == -1) { xmlXPathFreeContext(ret); @@ -10948,9 +10956,13 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) { xmlXPathContextPtr xpctxt = ctxt->context; if (xpctxt != NULL) { - if (xpctxt->depth >= xpctxt->maxParserDepth) + if (xpctxt->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR(XPATH_RECURSION_LIMIT_EXCEEDED); - xpctxt->depth += 1; + /* + * Parsing a single '(' pushes about 10 functions on the call stack + * before recursing! + */ + xpctxt->depth += 10; } xmlXPathCompAndExpr(ctxt); @@ -11880,7 +11892,7 @@ xmlXPathCompOpEvalPredicate(xmlXPathParserContextPtr ctxt, "xmlXPathCompOpEvalPredicate: Expected a predicate\n"); XP_ERROR(XPATH_INVALID_OPERAND); } - if (ctxt->context->depth >= ctxt->context->maxDepth) + if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR(XPATH_RECURSION_LIMIT_EXCEEDED); ctxt->context->depth += 1; xmlXPathCompOpEvalPredicate(ctxt, &comp->steps[op->ch1], set, @@ -12596,7 +12608,7 @@ xmlXPathCompOpEvalFirst(xmlXPathParserContextPtr ctxt, CHECK_ERROR0; if (OP_LIMIT_EXCEEDED(ctxt, 1)) return(0); - if (ctxt->context->depth >= ctxt->context->maxDepth) + if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED); ctxt->context->depth += 1; comp = ctxt->comp; @@ -12737,7 +12749,7 @@ xmlXPathCompOpEvalLast(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op, CHECK_ERROR0; if (OP_LIMIT_EXCEEDED(ctxt, 1)) return(0); - if (ctxt->context->depth >= ctxt->context->maxDepth) + if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED); ctxt->context->depth += 1; comp = ctxt->comp; @@ -12955,7 +12967,7 @@ xmlXPathCompOpEval(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op) CHECK_ERROR0; if (OP_LIMIT_EXCEEDED(ctxt, 1)) return(0); - if (ctxt->context->depth >= ctxt->context->maxDepth) + if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED); ctxt->context->depth += 1; comp = ctxt->comp; @@ -14189,7 +14201,7 @@ xmlXPathOptimizeExpression(xmlXPathParserContextPtr pctxt, /* Recurse */ ctxt = pctxt->context; if (ctxt != NULL) { - if (ctxt->depth >= ctxt->maxDepth) + if (ctxt->depth >= XPATH_MAX_RECURSION_DEPTH) return; ctxt->depth += 1; } |
