diff --git a/Makefile.am b/Makefile.am index be1a883d..2a9d4709 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,7 +2,7 @@ ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = include . doc example xstc $(PYTHON_SUBDIR) +SUBDIRS = include . doc example fuzz xstc $(PYTHON_SUBDIR) DIST_SUBDIRS = include . doc example python xstc @@ -210,6 +210,7 @@ runtests: runtest$(EXEEXT) testrecurse$(EXEEXT) testapi$(EXEEXT) \ $(CHECKER) ./runxmlconf$(EXEEXT) @(if [ "$(PYTHON_SUBDIR)" != "" ] ; then cd python ; \ $(MAKE) tests ; fi) + @cd fuzz; $(MAKE) tests check: all runtests diff --git a/configure.ac b/configure.ac index 5f95fee0..3a3d91d3 100644 --- a/configure.ac +++ b/configure.ac @@ -1714,7 +1714,7 @@ rm -f COPYING.LIB COPYING ln -s $srcdir/Copyright COPYING # keep on one line for cygwin c.f. #130896 -AC_CONFIG_FILES([libxml2.spec:libxml.spec.in Makefile include/Makefile include/libxml/Makefile doc/Makefile doc/examples/Makefile doc/devhelp/Makefile example/Makefile python/Makefile python/tests/Makefile xstc/Makefile include/libxml/xmlversion.h libxml-2.0.pc libxml-2.0-uninstalled.pc libxml2-config.cmake]) +AC_CONFIG_FILES([libxml2.spec:libxml.spec.in Makefile include/Makefile include/libxml/Makefile doc/Makefile doc/examples/Makefile doc/devhelp/Makefile example/Makefile fuzz/Makefile python/Makefile python/tests/Makefile xstc/Makefile include/libxml/xmlversion.h libxml-2.0.pc libxml-2.0-uninstalled.pc libxml2-config.cmake]) AC_CONFIG_FILES([python/setup.py], [chmod +x python/setup.py]) AC_CONFIG_FILES([xml2-config], [chmod +x xml2-config]) AC_OUTPUT diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 00000000..28b71084 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,7 @@ +corpus/ +regexp +seed/xml* +testFuzzer +uri +xml +xmlSeed diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am new file mode 100644 index 00000000..0e7391ba --- /dev/null +++ b/fuzz/Makefile.am @@ -0,0 +1,75 @@ +EXTRA_PROGRAMS = regexp uri xml xmlSeed +check_PROGRAMS = testFuzzer +CLEANFILES = $(EXTRA_PROGRAMS) +AM_CPPFLAGS = -I$(top_srcdir)/include +DEPENDENCIES = $(top_builddir)/libxml2.la +LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) $(M_LIBS) $(WIN32_EXTRA_LIBADD) + +PARSER_FUZZER_MAX_LEN = 100000 +XML_SEED_CORPUS_SRC = \ + $(top_srcdir)/test/* \ + $(top_srcdir)/test/errors/*.xml \ + $(top_srcdir)/test/errors10/*.xml \ + $(top_srcdir)/test/namespaces/* \ + $(top_srcdir)/test/valid/*.xml \ + $(top_srcdir)/test/xmlid/* \ + $(top_srcdir)/test/VC/* \ + $(top_srcdir)/test/VCM/* + +xmlSeed_SOURCES = xmlSeed.c fuzz.c + +seed/xml.stamp: xmlSeed$(EXEEXT) + @mkdir -p seed/xml + @for i in $(XML_SEED_CORPUS_SRC); do \ + if [ -f $$i ]; then \ + echo Processing seed $$i; \ + base=$$(basename $$i) \ + outfile=$(abs_builddir)/seed/xml/$$base; \ + pushd $$(dirname $$i) >/dev/null; \ + $(abs_builddir)/xmlSeed$(EXEEXT) $$base > $$outfile; \ + popd >/dev/null; \ + if [ "$$(wc -c < $$outfile)" -gt $(PARSER_FUZZER_MAX_LEN) ]; then \ + rm $$outfile; \ + fi; \ + fi; \ + done + @touch seed/xml.stamp + +testFuzzer_SOURCES = testFuzzer.c fuzz.c + +tests: testFuzzer$(EXEEXT) + @echo "## Running fuzzer tests" + @./testFuzzer$(EXEEXT) + +xml_SOURCES = xml.c fuzz.c +xml_LDFLAGS = -fsanitize=fuzzer + +fuzz-xml: xml$(EXEEXT) seed/xml.stamp + @mkdir -p corpus/xml + ./xml$(EXEEXT) \ + -dict=xml.dict \ + -max_len=$(PARSER_FUZZER_MAX_LEN) \ + -timeout=20 \ + corpus/xml seed/xml + +regexp_SOURCES = regexp.c fuzz.c +regexp_LDFLAGS = -fsanitize=fuzzer + +fuzz-regexp: regexp$(EXEEXT) + @mkdir -p corpus/regexp + ./regexp$(EXEEXT) \ + -dict=regexp.dict \ + -max_len=10000 \ + -timeout=20 \ + corpus/regexp $(srcdir)/seed/regexp + +uri_SOURCES = uri.c fuzz.c +uri_LDFLAGS = -fsanitize=fuzzer + +fuzz-uri: uri$(EXEEXT) + @mkdir -p corpus/uri + ./uri$(EXEEXT) \ + -max_len=10000 \ + -timeout=2 \ + corpus/uri $(srcdir)/seed/uri + diff --git a/fuzz/README b/fuzz/README new file mode 100644 index 00000000..f675ad82 --- /dev/null +++ b/fuzz/README @@ -0,0 +1,19 @@ +libFuzzer instructions for libxml2 +================================== + +Set compiler and options: + + export CC=clang + export CFLAGS="-g -fsanitize=fuzzer-no-link,address,undefined \ + -fno-sanitize-recover=all \ + -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" + +Build libxml2 with instrumentation: + + ./configure --without-python + make + +Run fuzzers: + + make -C fuzz fuzz-xml + diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c new file mode 100644 index 00000000..6955f280 --- /dev/null +++ b/fuzz/fuzz.c @@ -0,0 +1,274 @@ +/* + * fuzz.c: Common functions for fuzzing. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "fuzz.h" + +typedef struct { + const char *data; + size_t size; +} xmlFuzzEntityInfo; + +/* Single static instance for now */ +static struct { + /* Original data */ + const char *data; + size_t size; + + /* Remaining data */ + const char *ptr; + size_t remaining; + + /* Buffer for unescaped strings */ + char *outBuf; + char *outPtr; /* Free space at end of buffer */ + + xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */ + + /* The first entity is the main entity. */ + const char *mainUrl; + xmlFuzzEntityInfo *mainEntity; +} fuzzData; + +/** + * xmlFuzzErrorFunc: + * + * An error function that simply discards all errors. + */ +void +xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, + ...) { +} + +/** + * xmlFuzzDataInit: + * + * Initialize fuzz data provider. + */ +void +xmlFuzzDataInit(const char *data, size_t size) { + fuzzData.data = data; + fuzzData.size = size; + fuzzData.ptr = data; + fuzzData.remaining = size; + + fuzzData.outBuf = xmlMalloc(size + 1); + fuzzData.outPtr = fuzzData.outBuf; + + fuzzData.entities = xmlHashCreate(8); + fuzzData.mainUrl = NULL; + fuzzData.mainEntity = NULL; +} + +static void +xmlFreeEntityEntry(void *value, const xmlChar *name) { + xmlFree(value); +} + +/** + * xmlFuzzDataFree: + * + * Cleanup fuzz data provider. + */ +void +xmlFuzzDataCleanup(void) { + xmlFree(fuzzData.outBuf); + xmlHashFree(fuzzData.entities, xmlFreeEntityEntry); +} + +/** + * xmlFuzzReadInt: + * @size: size of string in bytes + * + * Read an integer from the fuzz data. + */ +int +xmlFuzzReadInt() { + int ret; + + if (fuzzData.remaining < sizeof(int)) + return(0); + memcpy(&ret, fuzzData.ptr, sizeof(int)); + fuzzData.ptr += sizeof(int); + fuzzData.remaining -= sizeof(int); + + return ret; +} + +/** + * xmlFuzzReadString: + * @size: size of string in bytes + * + * Read a random-length string from the fuzz data. + * + * The format is similar to libFuzzer's FuzzedDataProvider but treats + * backslash followed by newline as end of string. This makes the fuzz data + * more readable. A backslash character is escaped with another backslash. + * + * Returns a zero-terminated string or NULL if the fuzz data is exhausted. + */ +static const char * +xmlFuzzReadString(size_t *size) { + const char *out = fuzzData.outPtr; + + while (fuzzData.remaining > 0) { + int c = *fuzzData.ptr++; + fuzzData.remaining--; + + if ((c == '\\') && (fuzzData.remaining > 0)) { + int c2 = *fuzzData.ptr; + + if (c2 == '\n') { + fuzzData.ptr++; + fuzzData.remaining--; + *size = fuzzData.outPtr - out; + *fuzzData.outPtr++ = '\0'; + return(out); + } + if (c2 == '\\') { + fuzzData.ptr++; + fuzzData.remaining--; + } + } + + *fuzzData.outPtr++ = c; + } + + if (fuzzData.outPtr > out) { + *size = fuzzData.outPtr - out; + *fuzzData.outPtr++ = '\0'; + return(out); + } + + return(NULL); +} + +/** + * xmlFuzzReadEntities: + * + * Read entities like the main XML file, external DTDs, external parsed + * entities from fuzz data. + */ +void +xmlFuzzReadEntities(void) { + size_t num = 0; + + while (1) { + const char *url, *entity; + size_t urlSize, entitySize; + xmlFuzzEntityInfo *entityInfo; + + url = xmlFuzzReadString(&urlSize); + if (url == NULL) break; + + entity = xmlFuzzReadString(&entitySize); + if (entity == NULL) break; + + if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) { + entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo)); + entityInfo->data = entity; + entityInfo->size = entitySize; + + xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo); + + if (num == 0) { + fuzzData.mainUrl = url; + fuzzData.mainEntity = entityInfo; + } + + num++; + } + } +} + +/** + * xmlFuzzMainEntity: + * @size: size of the main entity in bytes + * + * Returns the main entity. + */ +const char * +xmlFuzzMainEntity(size_t *size) { + if (fuzzData.mainEntity == NULL) + return(NULL); + *size = fuzzData.mainEntity->size; + return(fuzzData.mainEntity->data); +} + +/** + * xmlFuzzEntityLoader: + * + * The entity loader for fuzz data. + */ +xmlParserInputPtr +xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, + xmlParserCtxtPtr ctxt) { + xmlParserInputPtr input; + xmlFuzzEntityInfo *entity; + + if (URL == NULL) + return(NULL); + entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL); + if (entity == NULL) + return(NULL); + + input = xmlNewInputStream(ctxt); + input->filename = NULL; + input->buf = xmlParserInputBufferCreateMem(entity->data, entity->size, + XML_CHAR_ENCODING_NONE); + input->base = input->cur = xmlBufContent(input->buf->buffer); + input->end = input->base + entity->size; + + return input; +} + +/** + * xmlFuzzExtractStrings: + * + * Extract C strings from input data. Use exact-size allocations to detect + * potential memory errors. + */ +size_t +xmlFuzzExtractStrings(const char *data, size_t size, char **strings, + size_t numStrings) { + const char *start = data; + const char *end = data + size; + size_t i = 0, ret; + + while (i < numStrings) { + size_t strSize = end - start; + const char *zero = memchr(start, 0, strSize); + + if (zero != NULL) + strSize = zero - start; + + strings[i] = xmlMalloc(strSize + 1); + memcpy(strings[i], start, strSize); + strings[i][strSize] = '\0'; + + i++; + if (zero != NULL) + start = zero + 1; + else + break; + } + + ret = i; + + while (i < numStrings) { + strings[i] = NULL; + i++; + } + + return(ret); +} + diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h new file mode 100644 index 00000000..1093be14 --- /dev/null +++ b/fuzz/fuzz.h @@ -0,0 +1,55 @@ +/* + * fuzz.h: Common functions and macros for fuzzing. + * + * See Copyright for the status of this software. + */ + +#ifndef __XML_FUZZERCOMMON_H__ +#define __XML_FUZZERCOMMON_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int +LLVMFuzzerInitialize(int *argc, char ***argv); + +int +LLVMFuzzerTestOneInput(const char *data, size_t size); + +void +xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, + ...); + +void +xmlFuzzDataInit(const char *data, size_t size); + +void +xmlFuzzDataCleanup(void); + +int +xmlFuzzReadInt(void); + +void +xmlFuzzReadEntities(void); + +const char * +xmlFuzzMainEntity(size_t *size); + +xmlParserInputPtr +xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, + xmlParserCtxtPtr ctxt); + +size_t +xmlFuzzExtractStrings(const char *data, size_t size, char **strings, + size_t numStrings); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_FUZZERCOMMON_H__ */ + diff --git a/fuzz/regexp.c b/fuzz/regexp.c new file mode 100644 index 00000000..ed13f637 --- /dev/null +++ b/fuzz/regexp.c @@ -0,0 +1,40 @@ +/* + * regexp.c: a libFuzzer target to test the regexp module. + * + * See Copyright for the status of this software. + */ + +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlRegexpPtr regexp; + char *str[2] = { NULL, NULL }; + size_t numStrings; + + numStrings = xmlFuzzExtractStrings(data, size, str, 2); + + regexp = xmlRegexpCompile(BAD_CAST str[0]); + /* xmlRegexpExec has pathological performance in too many cases. */ +#if 0 + if ((regexp != NULL) && (numStrings >= 2)) { + xmlRegexpExec(regexp, BAD_CAST str[1]); + } +#endif + xmlRegFreeRegexp(regexp); + + xmlFree(str[0]); + xmlFree(str[1]); + + return 0; +} + diff --git a/fuzz/regexp.dict b/fuzz/regexp.dict new file mode 100644 index 00000000..06b74a6c --- /dev/null +++ b/fuzz/regexp.dict @@ -0,0 +1,16 @@ +quant_any="*" +quant_opt="?" +quant_some="+" +quant_num="{1,2}" + +branch="|a" +pos_group="[a]" +neg_group="[^a]" + +cat_letter="\\p{L}" +cat_mark="\\p{M}" +cat_number="\\p{N}" +cat_punct="\\p{P}" +cat_sym="\\p{S}" +cat_sep="\\p{Z}" +cat_other="\\p{C}" diff --git a/fuzz/seed/regexp/branch-1 b/fuzz/seed/regexp/branch-1 new file mode 100644 index 00000000..ded775ea Binary files /dev/null and b/fuzz/seed/regexp/branch-1 differ diff --git a/fuzz/seed/regexp/branch-10 b/fuzz/seed/regexp/branch-10 new file mode 100644 index 00000000..6700d775 Binary files /dev/null and b/fuzz/seed/regexp/branch-10 differ diff --git a/fuzz/seed/regexp/branch-11 b/fuzz/seed/regexp/branch-11 new file mode 100644 index 00000000..d83f9181 Binary files /dev/null and b/fuzz/seed/regexp/branch-11 differ diff --git a/fuzz/seed/regexp/branch-12 b/fuzz/seed/regexp/branch-12 new file mode 100644 index 00000000..b44dba57 Binary files /dev/null and b/fuzz/seed/regexp/branch-12 differ diff --git a/fuzz/seed/regexp/branch-13 b/fuzz/seed/regexp/branch-13 new file mode 100644 index 00000000..64e50a00 Binary files /dev/null and b/fuzz/seed/regexp/branch-13 differ diff --git a/fuzz/seed/regexp/branch-2 b/fuzz/seed/regexp/branch-2 new file mode 100644 index 00000000..8293d81d Binary files /dev/null and b/fuzz/seed/regexp/branch-2 differ diff --git a/fuzz/seed/regexp/branch-3 b/fuzz/seed/regexp/branch-3 new file mode 100644 index 00000000..696af9be Binary files /dev/null and b/fuzz/seed/regexp/branch-3 differ diff --git a/fuzz/seed/regexp/branch-4 b/fuzz/seed/regexp/branch-4 new file mode 100644 index 00000000..83179988 Binary files /dev/null and b/fuzz/seed/regexp/branch-4 differ diff --git a/fuzz/seed/regexp/branch-5 b/fuzz/seed/regexp/branch-5 new file mode 100644 index 00000000..6b6db8b9 Binary files /dev/null and b/fuzz/seed/regexp/branch-5 differ diff --git a/fuzz/seed/regexp/branch-6 b/fuzz/seed/regexp/branch-6 new file mode 100644 index 00000000..4f477902 Binary files /dev/null and b/fuzz/seed/regexp/branch-6 differ diff --git a/fuzz/seed/regexp/branch-7 b/fuzz/seed/regexp/branch-7 new file mode 100644 index 00000000..6334f725 Binary files /dev/null and b/fuzz/seed/regexp/branch-7 differ diff --git a/fuzz/seed/regexp/branch-8 b/fuzz/seed/regexp/branch-8 new file mode 100644 index 00000000..f77a8f4d Binary files /dev/null and b/fuzz/seed/regexp/branch-8 differ diff --git a/fuzz/seed/regexp/branch-9 b/fuzz/seed/regexp/branch-9 new file mode 100644 index 00000000..acd0eeca Binary files /dev/null and b/fuzz/seed/regexp/branch-9 differ diff --git a/fuzz/seed/regexp/bug316338-1 b/fuzz/seed/regexp/bug316338-1 new file mode 100644 index 00000000..9f0a504a Binary files /dev/null and b/fuzz/seed/regexp/bug316338-1 differ diff --git a/fuzz/seed/regexp/bug316338-10 b/fuzz/seed/regexp/bug316338-10 new file mode 100644 index 00000000..60685bbe Binary files /dev/null and b/fuzz/seed/regexp/bug316338-10 differ diff --git a/fuzz/seed/regexp/bug316338-11 b/fuzz/seed/regexp/bug316338-11 new file mode 100644 index 00000000..72a7956c Binary files /dev/null and b/fuzz/seed/regexp/bug316338-11 differ diff --git a/fuzz/seed/regexp/bug316338-12 b/fuzz/seed/regexp/bug316338-12 new file mode 100644 index 00000000..85416ee2 Binary files /dev/null and b/fuzz/seed/regexp/bug316338-12 differ diff --git a/fuzz/seed/regexp/bug316338-13 b/fuzz/seed/regexp/bug316338-13 new file mode 100644 index 00000000..c91d4fea Binary files /dev/null and b/fuzz/seed/regexp/bug316338-13 differ diff --git a/fuzz/seed/regexp/bug316338-14 b/fuzz/seed/regexp/bug316338-14 new file mode 100644 index 00000000..a164b423 Binary files /dev/null and b/fuzz/seed/regexp/bug316338-14 differ diff --git a/fuzz/seed/regexp/bug316338-15 b/fuzz/seed/regexp/bug316338-15 new file mode 100644 index 00000000..750c76de Binary files /dev/null and b/fuzz/seed/regexp/bug316338-15 differ diff --git a/fuzz/seed/regexp/bug316338-16 b/fuzz/seed/regexp/bug316338-16 new file mode 100644 index 00000000..23c5d230 Binary files /dev/null and b/fuzz/seed/regexp/bug316338-16 differ diff --git a/fuzz/seed/regexp/bug316338-2 b/fuzz/seed/regexp/bug316338-2 new file mode 100644 index 00000000..5468d06d Binary files /dev/null and b/fuzz/seed/regexp/bug316338-2 differ diff --git a/fuzz/seed/regexp/bug316338-3 b/fuzz/seed/regexp/bug316338-3 new file mode 100644 index 00000000..76e1e0bc Binary files /dev/null and b/fuzz/seed/regexp/bug316338-3 differ diff --git a/fuzz/seed/regexp/bug316338-4 b/fuzz/seed/regexp/bug316338-4 new file mode 100644 index 00000000..e0f65a42 Binary files /dev/null and b/fuzz/seed/regexp/bug316338-4 differ diff --git a/fuzz/seed/regexp/bug316338-5 b/fuzz/seed/regexp/bug316338-5 new file mode 100644 index 00000000..fcfaa97a Binary files /dev/null and b/fuzz/seed/regexp/bug316338-5 differ diff --git a/fuzz/seed/regexp/bug316338-6 b/fuzz/seed/regexp/bug316338-6 new file mode 100644 index 00000000..ce00a15b Binary files /dev/null and b/fuzz/seed/regexp/bug316338-6 differ diff --git a/fuzz/seed/regexp/bug316338-7 b/fuzz/seed/regexp/bug316338-7 new file mode 100644 index 00000000..127fe1f6 Binary files /dev/null and b/fuzz/seed/regexp/bug316338-7 differ diff --git a/fuzz/seed/regexp/bug316338-8 b/fuzz/seed/regexp/bug316338-8 new file mode 100644 index 00000000..fe8bb8b2 Binary files /dev/null and b/fuzz/seed/regexp/bug316338-8 differ diff --git a/fuzz/seed/regexp/bug316338-9 b/fuzz/seed/regexp/bug316338-9 new file mode 100644 index 00000000..3d56e5d7 Binary files /dev/null and b/fuzz/seed/regexp/bug316338-9 differ diff --git a/fuzz/seed/regexp/bug420596-1 b/fuzz/seed/regexp/bug420596-1 new file mode 100644 index 00000000..4426933e Binary files /dev/null and b/fuzz/seed/regexp/bug420596-1 differ diff --git a/fuzz/seed/regexp/bug420596-2 b/fuzz/seed/regexp/bug420596-2 new file mode 100644 index 00000000..474d2b6e Binary files /dev/null and b/fuzz/seed/regexp/bug420596-2 differ diff --git a/fuzz/seed/regexp/bug420596-3 b/fuzz/seed/regexp/bug420596-3 new file mode 100644 index 00000000..09c75cb6 Binary files /dev/null and b/fuzz/seed/regexp/bug420596-3 differ diff --git a/fuzz/seed/regexp/bug420596-4 b/fuzz/seed/regexp/bug420596-4 new file mode 100644 index 00000000..65d561ea Binary files /dev/null and b/fuzz/seed/regexp/bug420596-4 differ diff --git a/fuzz/seed/regexp/bug420596-5 b/fuzz/seed/regexp/bug420596-5 new file mode 100644 index 00000000..b6785803 Binary files /dev/null and b/fuzz/seed/regexp/bug420596-5 differ diff --git a/fuzz/seed/regexp/bug420596-6 b/fuzz/seed/regexp/bug420596-6 new file mode 100644 index 00000000..3a05d82f Binary files /dev/null and b/fuzz/seed/regexp/bug420596-6 differ diff --git a/fuzz/seed/regexp/bug420596-7 b/fuzz/seed/regexp/bug420596-7 new file mode 100644 index 00000000..88e16605 Binary files /dev/null and b/fuzz/seed/regexp/bug420596-7 differ diff --git a/fuzz/seed/regexp/bug420596-8 b/fuzz/seed/regexp/bug420596-8 new file mode 100644 index 00000000..4575a925 Binary files /dev/null and b/fuzz/seed/regexp/bug420596-8 differ diff --git a/fuzz/seed/regexp/content-1 b/fuzz/seed/regexp/content-1 new file mode 100644 index 00000000..5acbf864 Binary files /dev/null and b/fuzz/seed/regexp/content-1 differ diff --git a/fuzz/seed/regexp/content-10 b/fuzz/seed/regexp/content-10 new file mode 100644 index 00000000..f131454c Binary files /dev/null and b/fuzz/seed/regexp/content-10 differ diff --git a/fuzz/seed/regexp/content-2 b/fuzz/seed/regexp/content-2 new file mode 100644 index 00000000..4e6b663e Binary files /dev/null and b/fuzz/seed/regexp/content-2 differ diff --git a/fuzz/seed/regexp/content-3 b/fuzz/seed/regexp/content-3 new file mode 100644 index 00000000..b13fc8db Binary files /dev/null and b/fuzz/seed/regexp/content-3 differ diff --git a/fuzz/seed/regexp/content-4 b/fuzz/seed/regexp/content-4 new file mode 100644 index 00000000..47c5d6de Binary files /dev/null and b/fuzz/seed/regexp/content-4 differ diff --git a/fuzz/seed/regexp/content-5 b/fuzz/seed/regexp/content-5 new file mode 100644 index 00000000..f93860eb Binary files /dev/null and b/fuzz/seed/regexp/content-5 differ diff --git a/fuzz/seed/regexp/content-6 b/fuzz/seed/regexp/content-6 new file mode 100644 index 00000000..e5c6e14b Binary files /dev/null and b/fuzz/seed/regexp/content-6 differ diff --git a/fuzz/seed/regexp/content-7 b/fuzz/seed/regexp/content-7 new file mode 100644 index 00000000..4868dd2f Binary files /dev/null and b/fuzz/seed/regexp/content-7 differ diff --git a/fuzz/seed/regexp/content-8 b/fuzz/seed/regexp/content-8 new file mode 100644 index 00000000..a3a87d0b Binary files /dev/null and b/fuzz/seed/regexp/content-8 differ diff --git a/fuzz/seed/regexp/content-9 b/fuzz/seed/regexp/content-9 new file mode 100644 index 00000000..91f0d9e9 Binary files /dev/null and b/fuzz/seed/regexp/content-9 differ diff --git a/fuzz/seed/regexp/hard-1 b/fuzz/seed/regexp/hard-1 new file mode 100644 index 00000000..ba00382e Binary files /dev/null and b/fuzz/seed/regexp/hard-1 differ diff --git a/fuzz/seed/regexp/hard-10 b/fuzz/seed/regexp/hard-10 new file mode 100644 index 00000000..7db28fa5 Binary files /dev/null and b/fuzz/seed/regexp/hard-10 differ diff --git a/fuzz/seed/regexp/hard-2 b/fuzz/seed/regexp/hard-2 new file mode 100644 index 00000000..ed38b91b Binary files /dev/null and b/fuzz/seed/regexp/hard-2 differ diff --git a/fuzz/seed/regexp/hard-3 b/fuzz/seed/regexp/hard-3 new file mode 100644 index 00000000..7b16da0c Binary files /dev/null and b/fuzz/seed/regexp/hard-3 differ diff --git a/fuzz/seed/regexp/hard-4 b/fuzz/seed/regexp/hard-4 new file mode 100644 index 00000000..2ece886a Binary files /dev/null and b/fuzz/seed/regexp/hard-4 differ diff --git a/fuzz/seed/regexp/hard-5 b/fuzz/seed/regexp/hard-5 new file mode 100644 index 00000000..870a3ec5 Binary files /dev/null and b/fuzz/seed/regexp/hard-5 differ diff --git a/fuzz/seed/regexp/hard-6 b/fuzz/seed/regexp/hard-6 new file mode 100644 index 00000000..06aa7d0d Binary files /dev/null and b/fuzz/seed/regexp/hard-6 differ diff --git a/fuzz/seed/regexp/hard-7 b/fuzz/seed/regexp/hard-7 new file mode 100644 index 00000000..50a9ec39 Binary files /dev/null and b/fuzz/seed/regexp/hard-7 differ diff --git a/fuzz/seed/regexp/hard-8 b/fuzz/seed/regexp/hard-8 new file mode 100644 index 00000000..0991129f Binary files /dev/null and b/fuzz/seed/regexp/hard-8 differ diff --git a/fuzz/seed/regexp/hard-9 b/fuzz/seed/regexp/hard-9 new file mode 100644 index 00000000..5bd1d890 Binary files /dev/null and b/fuzz/seed/regexp/hard-9 differ diff --git a/fuzz/seed/regexp/ncname-1 b/fuzz/seed/regexp/ncname-1 new file mode 100644 index 00000000..608eb9a9 Binary files /dev/null and b/fuzz/seed/regexp/ncname-1 differ diff --git a/fuzz/seed/regexp/ncname-2 b/fuzz/seed/regexp/ncname-2 new file mode 100644 index 00000000..cfb9b960 Binary files /dev/null and b/fuzz/seed/regexp/ncname-2 differ diff --git a/fuzz/seed/regexp/ncname-3 b/fuzz/seed/regexp/ncname-3 new file mode 100644 index 00000000..07a6a081 Binary files /dev/null and b/fuzz/seed/regexp/ncname-3 differ diff --git a/fuzz/seed/regexp/ncname-4 b/fuzz/seed/regexp/ncname-4 new file mode 100644 index 00000000..87e937f4 Binary files /dev/null and b/fuzz/seed/regexp/ncname-4 differ diff --git a/fuzz/seed/regexp/ncname-5 b/fuzz/seed/regexp/ncname-5 new file mode 100644 index 00000000..ad294560 Binary files /dev/null and b/fuzz/seed/regexp/ncname-5 differ diff --git a/fuzz/seed/regexp/ranges-1 b/fuzz/seed/regexp/ranges-1 new file mode 100644 index 00000000..71448f23 Binary files /dev/null and b/fuzz/seed/regexp/ranges-1 differ diff --git a/fuzz/seed/regexp/ranges-10 b/fuzz/seed/regexp/ranges-10 new file mode 100644 index 00000000..91aed3cf Binary files /dev/null and b/fuzz/seed/regexp/ranges-10 differ diff --git a/fuzz/seed/regexp/ranges-11 b/fuzz/seed/regexp/ranges-11 new file mode 100644 index 00000000..76eb5deb Binary files /dev/null and b/fuzz/seed/regexp/ranges-11 differ diff --git a/fuzz/seed/regexp/ranges-12 b/fuzz/seed/regexp/ranges-12 new file mode 100644 index 00000000..9c3bc663 Binary files /dev/null and b/fuzz/seed/regexp/ranges-12 differ diff --git a/fuzz/seed/regexp/ranges-2 b/fuzz/seed/regexp/ranges-2 new file mode 100644 index 00000000..9369f7a5 Binary files /dev/null and b/fuzz/seed/regexp/ranges-2 differ diff --git a/fuzz/seed/regexp/ranges-3 b/fuzz/seed/regexp/ranges-3 new file mode 100644 index 00000000..58a3a081 Binary files /dev/null and b/fuzz/seed/regexp/ranges-3 differ diff --git a/fuzz/seed/regexp/ranges-4 b/fuzz/seed/regexp/ranges-4 new file mode 100644 index 00000000..da7e9dab Binary files /dev/null and b/fuzz/seed/regexp/ranges-4 differ diff --git a/fuzz/seed/regexp/ranges-5 b/fuzz/seed/regexp/ranges-5 new file mode 100644 index 00000000..83ad4a82 Binary files /dev/null and b/fuzz/seed/regexp/ranges-5 differ diff --git a/fuzz/seed/regexp/ranges-6 b/fuzz/seed/regexp/ranges-6 new file mode 100644 index 00000000..3bc9758f Binary files /dev/null and b/fuzz/seed/regexp/ranges-6 differ diff --git a/fuzz/seed/regexp/ranges-7 b/fuzz/seed/regexp/ranges-7 new file mode 100644 index 00000000..fa890384 Binary files /dev/null and b/fuzz/seed/regexp/ranges-7 differ diff --git a/fuzz/seed/regexp/ranges-8 b/fuzz/seed/regexp/ranges-8 new file mode 100644 index 00000000..96f0bb69 Binary files /dev/null and b/fuzz/seed/regexp/ranges-8 differ diff --git a/fuzz/seed/regexp/ranges-9 b/fuzz/seed/regexp/ranges-9 new file mode 100644 index 00000000..8e3fc43a Binary files /dev/null and b/fuzz/seed/regexp/ranges-9 differ diff --git a/fuzz/seed/regexp/ranges2-1 b/fuzz/seed/regexp/ranges2-1 new file mode 100644 index 00000000..044a8eb9 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-1 differ diff --git a/fuzz/seed/regexp/ranges2-10 b/fuzz/seed/regexp/ranges2-10 new file mode 100644 index 00000000..19e2aa2d Binary files /dev/null and b/fuzz/seed/regexp/ranges2-10 differ diff --git a/fuzz/seed/regexp/ranges2-11 b/fuzz/seed/regexp/ranges2-11 new file mode 100644 index 00000000..89be181d Binary files /dev/null and b/fuzz/seed/regexp/ranges2-11 differ diff --git a/fuzz/seed/regexp/ranges2-12 b/fuzz/seed/regexp/ranges2-12 new file mode 100644 index 00000000..42ebdd31 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-12 differ diff --git a/fuzz/seed/regexp/ranges2-2 b/fuzz/seed/regexp/ranges2-2 new file mode 100644 index 00000000..026f7b84 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-2 differ diff --git a/fuzz/seed/regexp/ranges2-3 b/fuzz/seed/regexp/ranges2-3 new file mode 100644 index 00000000..83e78a9c Binary files /dev/null and b/fuzz/seed/regexp/ranges2-3 differ diff --git a/fuzz/seed/regexp/ranges2-4 b/fuzz/seed/regexp/ranges2-4 new file mode 100644 index 00000000..847b4e84 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-4 differ diff --git a/fuzz/seed/regexp/ranges2-5 b/fuzz/seed/regexp/ranges2-5 new file mode 100644 index 00000000..349168d3 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-5 differ diff --git a/fuzz/seed/regexp/ranges2-6 b/fuzz/seed/regexp/ranges2-6 new file mode 100644 index 00000000..5d2a4076 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-6 differ diff --git a/fuzz/seed/regexp/ranges2-7 b/fuzz/seed/regexp/ranges2-7 new file mode 100644 index 00000000..74fbafb4 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-7 differ diff --git a/fuzz/seed/regexp/ranges2-8 b/fuzz/seed/regexp/ranges2-8 new file mode 100644 index 00000000..125bfa91 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-8 differ diff --git a/fuzz/seed/regexp/ranges2-9 b/fuzz/seed/regexp/ranges2-9 new file mode 100644 index 00000000..f2cf1288 Binary files /dev/null and b/fuzz/seed/regexp/ranges2-9 differ diff --git a/fuzz/seed/regexp/xpath-1 b/fuzz/seed/regexp/xpath-1 new file mode 100644 index 00000000..3bc17926 Binary files /dev/null and b/fuzz/seed/regexp/xpath-1 differ diff --git a/fuzz/seed/regexp/xpath-10 b/fuzz/seed/regexp/xpath-10 new file mode 100644 index 00000000..e4f4b0cd Binary files /dev/null and b/fuzz/seed/regexp/xpath-10 differ diff --git a/fuzz/seed/regexp/xpath-11 b/fuzz/seed/regexp/xpath-11 new file mode 100644 index 00000000..318e0ccf Binary files /dev/null and b/fuzz/seed/regexp/xpath-11 differ diff --git a/fuzz/seed/regexp/xpath-12 b/fuzz/seed/regexp/xpath-12 new file mode 100644 index 00000000..f204295b Binary files /dev/null and b/fuzz/seed/regexp/xpath-12 differ diff --git a/fuzz/seed/regexp/xpath-13 b/fuzz/seed/regexp/xpath-13 new file mode 100644 index 00000000..70fccd59 Binary files /dev/null and b/fuzz/seed/regexp/xpath-13 differ diff --git a/fuzz/seed/regexp/xpath-14 b/fuzz/seed/regexp/xpath-14 new file mode 100644 index 00000000..357ce2b5 Binary files /dev/null and b/fuzz/seed/regexp/xpath-14 differ diff --git a/fuzz/seed/regexp/xpath-15 b/fuzz/seed/regexp/xpath-15 new file mode 100644 index 00000000..2a10a837 Binary files /dev/null and b/fuzz/seed/regexp/xpath-15 differ diff --git a/fuzz/seed/regexp/xpath-16 b/fuzz/seed/regexp/xpath-16 new file mode 100644 index 00000000..1f3089fb Binary files /dev/null and b/fuzz/seed/regexp/xpath-16 differ diff --git a/fuzz/seed/regexp/xpath-17 b/fuzz/seed/regexp/xpath-17 new file mode 100644 index 00000000..a9d542fb Binary files /dev/null and b/fuzz/seed/regexp/xpath-17 differ diff --git a/fuzz/seed/regexp/xpath-18 b/fuzz/seed/regexp/xpath-18 new file mode 100644 index 00000000..651eb9d4 Binary files /dev/null and b/fuzz/seed/regexp/xpath-18 differ diff --git a/fuzz/seed/regexp/xpath-19 b/fuzz/seed/regexp/xpath-19 new file mode 100644 index 00000000..fefea8f1 Binary files /dev/null and b/fuzz/seed/regexp/xpath-19 differ diff --git a/fuzz/seed/regexp/xpath-2 b/fuzz/seed/regexp/xpath-2 new file mode 100644 index 00000000..81e5fba0 Binary files /dev/null and b/fuzz/seed/regexp/xpath-2 differ diff --git a/fuzz/seed/regexp/xpath-20 b/fuzz/seed/regexp/xpath-20 new file mode 100644 index 00000000..1f3089fb Binary files /dev/null and b/fuzz/seed/regexp/xpath-20 differ diff --git a/fuzz/seed/regexp/xpath-21 b/fuzz/seed/regexp/xpath-21 new file mode 100644 index 00000000..706a7025 Binary files /dev/null and b/fuzz/seed/regexp/xpath-21 differ diff --git a/fuzz/seed/regexp/xpath-22 b/fuzz/seed/regexp/xpath-22 new file mode 100644 index 00000000..a246f84c Binary files /dev/null and b/fuzz/seed/regexp/xpath-22 differ diff --git a/fuzz/seed/regexp/xpath-23 b/fuzz/seed/regexp/xpath-23 new file mode 100644 index 00000000..02753beb Binary files /dev/null and b/fuzz/seed/regexp/xpath-23 differ diff --git a/fuzz/seed/regexp/xpath-24 b/fuzz/seed/regexp/xpath-24 new file mode 100644 index 00000000..331105cd Binary files /dev/null and b/fuzz/seed/regexp/xpath-24 differ diff --git a/fuzz/seed/regexp/xpath-25 b/fuzz/seed/regexp/xpath-25 new file mode 100644 index 00000000..ce3da443 Binary files /dev/null and b/fuzz/seed/regexp/xpath-25 differ diff --git a/fuzz/seed/regexp/xpath-26 b/fuzz/seed/regexp/xpath-26 new file mode 100644 index 00000000..b3bf8c23 Binary files /dev/null and b/fuzz/seed/regexp/xpath-26 differ diff --git a/fuzz/seed/regexp/xpath-27 b/fuzz/seed/regexp/xpath-27 new file mode 100644 index 00000000..74bbe468 Binary files /dev/null and b/fuzz/seed/regexp/xpath-27 differ diff --git a/fuzz/seed/regexp/xpath-28 b/fuzz/seed/regexp/xpath-28 new file mode 100644 index 00000000..b38a709e Binary files /dev/null and b/fuzz/seed/regexp/xpath-28 differ diff --git a/fuzz/seed/regexp/xpath-29 b/fuzz/seed/regexp/xpath-29 new file mode 100644 index 00000000..104d4e54 Binary files /dev/null and b/fuzz/seed/regexp/xpath-29 differ diff --git a/fuzz/seed/regexp/xpath-3 b/fuzz/seed/regexp/xpath-3 new file mode 100644 index 00000000..6d7be85f Binary files /dev/null and b/fuzz/seed/regexp/xpath-3 differ diff --git a/fuzz/seed/regexp/xpath-30 b/fuzz/seed/regexp/xpath-30 new file mode 100644 index 00000000..b681ff14 Binary files /dev/null and b/fuzz/seed/regexp/xpath-30 differ diff --git a/fuzz/seed/regexp/xpath-31 b/fuzz/seed/regexp/xpath-31 new file mode 100644 index 00000000..cd87b0e8 Binary files /dev/null and b/fuzz/seed/regexp/xpath-31 differ diff --git a/fuzz/seed/regexp/xpath-32 b/fuzz/seed/regexp/xpath-32 new file mode 100644 index 00000000..c5cac32a Binary files /dev/null and b/fuzz/seed/regexp/xpath-32 differ diff --git a/fuzz/seed/regexp/xpath-33 b/fuzz/seed/regexp/xpath-33 new file mode 100644 index 00000000..89e3fcdc Binary files /dev/null and b/fuzz/seed/regexp/xpath-33 differ diff --git a/fuzz/seed/regexp/xpath-34 b/fuzz/seed/regexp/xpath-34 new file mode 100644 index 00000000..b65a3d6f Binary files /dev/null and b/fuzz/seed/regexp/xpath-34 differ diff --git a/fuzz/seed/regexp/xpath-35 b/fuzz/seed/regexp/xpath-35 new file mode 100644 index 00000000..252a70c2 Binary files /dev/null and b/fuzz/seed/regexp/xpath-35 differ diff --git a/fuzz/seed/regexp/xpath-4 b/fuzz/seed/regexp/xpath-4 new file mode 100644 index 00000000..30718c57 Binary files /dev/null and b/fuzz/seed/regexp/xpath-4 differ diff --git a/fuzz/seed/regexp/xpath-5 b/fuzz/seed/regexp/xpath-5 new file mode 100644 index 00000000..06ad88ef Binary files /dev/null and b/fuzz/seed/regexp/xpath-5 differ diff --git a/fuzz/seed/regexp/xpath-6 b/fuzz/seed/regexp/xpath-6 new file mode 100644 index 00000000..66787728 Binary files /dev/null and b/fuzz/seed/regexp/xpath-6 differ diff --git a/fuzz/seed/regexp/xpath-7 b/fuzz/seed/regexp/xpath-7 new file mode 100644 index 00000000..e69ad856 Binary files /dev/null and b/fuzz/seed/regexp/xpath-7 differ diff --git a/fuzz/seed/regexp/xpath-8 b/fuzz/seed/regexp/xpath-8 new file mode 100644 index 00000000..a8120ccd Binary files /dev/null and b/fuzz/seed/regexp/xpath-8 differ diff --git a/fuzz/seed/regexp/xpath-9 b/fuzz/seed/regexp/xpath-9 new file mode 100644 index 00000000..c037ce7f Binary files /dev/null and b/fuzz/seed/regexp/xpath-9 differ diff --git a/fuzz/seed/uri/dot b/fuzz/seed/uri/dot new file mode 100644 index 00000000..945c9b46 --- /dev/null +++ b/fuzz/seed/uri/dot @@ -0,0 +1 @@ +. \ No newline at end of file diff --git a/fuzz/seed/uri/full b/fuzz/seed/uri/full new file mode 100644 index 00000000..808e58a1 Binary files /dev/null and b/fuzz/seed/uri/full differ diff --git a/fuzz/testFuzzer.c b/fuzz/testFuzzer.c new file mode 100644 index 00000000..f6be7b8f --- /dev/null +++ b/fuzz/testFuzzer.c @@ -0,0 +1,55 @@ +/* + * testFuzzer.c: Test program for the custom entity loader used to fuzz + * with multiple inputs. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include +#include "fuzz.h" + +int +main() { + static const char data[] = + "doc.xml\\\n" + "\n" + "&ent;\\\n" + "doc.dtd\\\n" + "\n" + "\\\n" + "ent.txt\\\n" + "Hello, world!\\\n"; + static xmlChar expected[] = + "\n" + "\n" + "Hello, world!\n"; + const char *docBuffer; + size_t docSize; + xmlDocPtr doc; + xmlChar *out; + int ret = 0; + + xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + + xmlFuzzDataInit(data, sizeof(data) - 1); + xmlFuzzReadEntities(); + docBuffer = xmlFuzzMainEntity(&docSize); + doc = xmlReadMemory(docBuffer, docSize, NULL, NULL, + XML_PARSE_NOENT | XML_PARSE_DTDLOAD); + + xmlDocDumpMemory(doc, &out, NULL); + if (xmlStrcmp(out, expected) != 0) { + fprintf(stderr, "Expected:\n%sGot:\n%s", expected, out); + ret = 1; + } + + xmlFree(out); + xmlFreeDoc(doc); + xmlFuzzDataCleanup(); + + return(ret); +} + diff --git a/fuzz/uri.c b/fuzz/uri.c new file mode 100644 index 00000000..69d0439f --- /dev/null +++ b/fuzz/uri.c @@ -0,0 +1,45 @@ +/* + * uri.c: a libFuzzer target to test the URI module. + * + * See Copyright for the status of this software. + */ + +#include +#include "fuzz.h" + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlURIPtr uri; + char *str[2] = { NULL, NULL }; + size_t numStrings; + + numStrings = xmlFuzzExtractStrings(data, size, str, 2); + + uri = xmlParseURI(str[0]); + xmlFree(xmlSaveUri(uri)); + xmlFreeURI(uri); + + uri = xmlParseURIRaw(str[0], 1); + xmlFree(xmlSaveUri(uri)); + xmlFreeURI(uri); + + xmlFree(xmlURIUnescapeString(str[0], -1, NULL)); + xmlFree(xmlURIEscape(BAD_CAST str[0])); + xmlFree(xmlCanonicPath(BAD_CAST str[0])); + xmlFree(xmlPathToURI(BAD_CAST str[0])); + + if (numStrings >= 2) { + xmlFree(xmlBuildURI(BAD_CAST str[1], BAD_CAST str[0])); + xmlFree(xmlBuildRelativeURI(BAD_CAST str[1], BAD_CAST str[0])); + xmlFree(xmlURIEscapeStr(BAD_CAST str[0], BAD_CAST str[1])); + } + + /* Modifies string, so must come last. */ + xmlNormalizeURIPath(str[0]); + + xmlFree(str[0]); + xmlFree(str[1]); + + return 0; +} + diff --git a/fuzz/xml.c b/fuzz/xml.c new file mode 100644 index 00000000..50dd967d --- /dev/null +++ b/fuzz/xml.c @@ -0,0 +1,90 @@ +/* + * xml.c: a libFuzzer target to test several XML parser interfaces. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlInitParser(); + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + static const size_t maxChunkSize = 128; + xmlDocPtr doc; + xmlParserCtxtPtr ctxt; + xmlTextReaderPtr reader; + xmlChar *out; + const char *docBuffer; + size_t docSize, consumed, chunkSize; + int opts, outSize; + + xmlFuzzDataInit(data, size); + opts = xmlFuzzReadInt(); + /* XML_PARSE_HUGE still causes timeouts. */ + opts &= ~XML_PARSE_HUGE; + + xmlFuzzReadEntities(); + docBuffer = xmlFuzzMainEntity(&docSize); + if (docBuffer == NULL) { + xmlFuzzDataCleanup(); + return(0); + } + + /* Pull parser */ + + doc = xmlReadMemory(docBuffer, docSize, NULL, NULL, opts); + /* Also test the serializer. */ + xmlDocDumpMemory(doc, &out, &outSize); + xmlFree(out); + xmlFreeDoc(doc); + + /* Push parser */ + + ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL); + xmlCtxtUseOptions(ctxt, opts); + + for (consumed = 0; consumed < docSize; consumed += chunkSize) { + chunkSize = docSize - consumed; + if (chunkSize > maxChunkSize) + chunkSize = maxChunkSize; + xmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0); + } + + xmlParseChunk(ctxt, NULL, 0, 1); + xmlFreeDoc(ctxt->myDoc); + xmlFreeParserCtxt(ctxt); + + /* Reader */ + + reader = xmlReaderForMemory(docBuffer, docSize, NULL, NULL, opts); + while (xmlTextReaderRead(reader) == 1) { + if (xmlTextReaderNodeType(reader) == XML_ELEMENT_NODE) { + int i, n = xmlTextReaderAttributeCount(reader); + for (i=0; i +#include +#include +#include +#include +#include +#include +#include "fuzz.h" + +static xmlHashTablePtr entities; + +static void +errorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, ...) { + /* Discard error messages. */ +} + +/* + * Write a random-length string in a format similar to FuzzedDataProvider. + * Backslash followed by newline marks the end of the string. Two + * backslashes are used to escape a backslash. + */ +static void +writeEscaped(const char *str) { + for (; *str; str++) { + int c = (unsigned char) *str; + putchar(c); + if (c == '\\') + putchar(c); + } + putchar('\\'); + putchar('\n'); +} + +/* + * A custom entity loader that writes all external DTDs or entities to a + * single file in the format expected by xmlFuzzEntityLoader. + */ +static xmlParserInputPtr +entityLoader(const char *URL, const char *ID, xmlParserCtxtPtr context) { + xmlParserInputPtr in; + static const int chunkSize = 16384; + int len; + + in = xmlNoNetExternalEntityLoader(URL, ID, context); + if (in == NULL) + return(NULL); + + if (xmlHashLookup(entities, (const xmlChar *) URL) != NULL) + return(in); + + do { + len = xmlParserInputBufferGrow(in->buf, chunkSize); + if (len < 0) { + fprintf(stderr, "Error reading %s\n", URL); + xmlFreeInputStream(in); + return(NULL); + } + } while (len > 0); + + writeEscaped(URL); + writeEscaped((char *) xmlBufContent(in->buf->buffer)); + + xmlFreeInputStream(in); + + xmlHashAddEntry(entities, (const xmlChar *) URL, "seen"); + + return(xmlNoNetExternalEntityLoader(URL, ID, context)); +} + +int +main(int argc, char **argv) { + int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD; + + if (argc != 2) { + fprintf(stderr, "Usage: xmlSeed [FILE]\n"); + } + + fwrite(&opts, sizeof(opts), 1, stdout); + + entities = xmlHashCreate(4); + xmlSetGenericErrorFunc(NULL, errorFunc); + xmlSetExternalEntityLoader(entityLoader); + xmlFreeDoc(xmlReadFile(argv[1], NULL, opts)); + xmlHashFree(entities, NULL); + + return(0); +} +