diff --git a/fuzz/.gitignore b/fuzz/.gitignore index eecb4aea..92e29135 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -5,9 +5,12 @@ regexp schema schemaSeed seed/html* -seed/xml* seed/schema* +seed/xml* +seed/xpath* testFuzzer uri xml xmlSeed +xpath +xpathSeed diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am index 9a1225db..6d31c227 100644 --- a/fuzz/Makefile.am +++ b/fuzz/Makefile.am @@ -1,4 +1,5 @@ -EXTRA_PROGRAMS = html htmlSeed regexp uri schema schemaSeed xml xmlSeed +EXTRA_PROGRAMS = html htmlSeed regexp uri schema schemaSeed xml xmlSeed \ + xpath xpathSeed check_PROGRAMS = testFuzzer CLEANFILES = $(EXTRA_PROGRAMS) AM_CPPFLAGS = -I$(top_srcdir)/include @@ -133,3 +134,23 @@ fuzz-schema: schema$(EXEEXT) seed/schema.stamp -timeout=20 \ corpus/schema seed/schema +# XPath fuzzer + +xpathSeed_SOURCES = xpathSeed.c fuzz.c + +seed/xpath.stamp: xpathSeed$(EXEEXT) + @mkdir -p seed/xpath + @./xpathSeed$(EXEEXT) "$(top_builddir)/test/XPath" + @touch seed/xpath.stamp + +xpath_SOURCES = xpath.c fuzz.c +xpath_LDFLAGS = -fsanitize=fuzzer + +fuzz-xpath: xpath$(EXEEXT) seed/xpath.stamp + @mkdir -p corpus/xpath + ./xpath$(EXEEXT) \ + -dict=xpath.dict \ + -max_len=10000 \ + -timeout=20 \ + corpus/xpath seed/xpath + diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c index ba7c9cad..0155efe5 100644 --- a/fuzz/fuzz.c +++ b/fuzz/fuzz.c @@ -122,20 +122,24 @@ xmlFuzzReadRemaining(size_t *size) { } /* - * Write a random-length string to stdout in a format similar to + * xmlFuzzWriteString: + * @out: output file + * @str: string to write + * + * Write a random-length string to file in a format similar to * FuzzedDataProvider. Backslash followed by newline marks the end of the * string. Two backslashes are used to escape a backslash. */ -static void -xmlFuzzWriteString(const char *str) { +void +xmlFuzzWriteString(FILE *out, const char *str) { for (; *str; str++) { int c = (unsigned char) *str; - putchar(c); + putc(c, out); if (c == '\\') - putchar(c); + putc(c, out); } - putchar('\\'); - putchar('\n'); + putc('\\', out); + putc('\n', out); } /** @@ -150,7 +154,7 @@ xmlFuzzWriteString(const char *str) { * * Returns a zero-terminated string or NULL if the fuzz data is exhausted. */ -static const char * +const char * xmlFuzzReadString(size_t *size) { const char *out = fuzzData.outPtr; @@ -217,8 +221,8 @@ xmlFuzzEntityRecorder(const char *URL, const char *ID, } } while (len > 0); - xmlFuzzWriteString(URL); - xmlFuzzWriteString((char *) xmlBufContent(in->buf->buffer)); + xmlFuzzWriteString(stdout, URL); + xmlFuzzWriteString(stdout, (char *) xmlBufContent(in->buf->buffer)); xmlFreeInputStream(in); diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h index 7e7fc29c..1cb788f5 100644 --- a/fuzz/fuzz.h +++ b/fuzz/fuzz.h @@ -36,6 +36,12 @@ xmlFuzzReadInt(void); const char * xmlFuzzReadRemaining(size_t *size); +void +xmlFuzzWriteString(FILE *out, const char *str); + +const char * +xmlFuzzReadString(size_t *size); + xmlParserInputPtr xmlFuzzEntityRecorder(const char *URL, const char *ID, xmlParserCtxtPtr ctxt); diff --git a/fuzz/xpath.c b/fuzz/xpath.c new file mode 100644 index 00000000..1017adaa --- /dev/null +++ b/fuzz/xpath.c @@ -0,0 +1,49 @@ +/* + * xpath.c: a libFuzzer target to test XPath and XPointer expressions. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlInitParser(); + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlDocPtr doc; + const char *expr, *xml; + size_t exprSize, xmlSize; + + xmlFuzzDataInit(data, size); + + expr = xmlFuzzReadString(&exprSize); + xml = xmlFuzzReadString(&xmlSize); + + doc = xmlParseMemory(xml, xmlSize); + if (doc != NULL) { + xmlXPathContextPtr xpctxt = xmlXPathNewContext(doc); + + /* Resource limits to avoid timeouts and call stack overflows */ + xpctxt->maxParserDepth = 15; + xpctxt->maxDepth = 100; + xpctxt->opLimit = 500000; + + xmlXPathFreeObject(xmlXPtrEval(BAD_CAST expr, xpctxt)); + xmlXPathFreeContext(xpctxt); + } + xmlFreeDoc(doc); + + xmlFuzzDataCleanup(); + + return(0); +} + diff --git a/fuzz/xpath.dict b/fuzz/xpath.dict new file mode 100644 index 00000000..4fe375fb --- /dev/null +++ b/fuzz/xpath.dict @@ -0,0 +1,94 @@ +# XML + +elem_a="" +elem_b="" +elem_c="" +elem_d="" +elem_empty="" +elem_ns_a="" +elem_ns_b="" + +attr_a=" a='a'" +attr_b=" b='b'" + +ns_decl=" xmlns:a='a'" +ns_default=" xmlns='a'" +ns_prefix_a="a:" +ns_prefix_b="b:" + +cdata_section="" + +comment="" + +pi="" + +# XPath + +axis_ancestor="ancestor::" +axis_ancestor_or_self="ancestor-or-self::" +axis_attribute="attribute::" +axis_attribute_abbrev="@" +axis_child="child::" +axis_descendant="descendant::" +axis_descendant_or_self="descendant-or-self::" +axis_following="following::" +axis_following_sibling="following-sibling::" +axis_namespace="namespace::" +axis_parent="parent::" +axis_preceding="preceding::" +axis_preceding_siblings="preceding-sibling::" +axis_self="self::" + +node_test_ns="a:" + +val_num="=(1.0)" +val_str_sq="=('a')" +val_str_dq="=(\"a\")" +val_node_set="=(*)" +val_elem="=(b)" + +step_root="/" +step_descendant="//" +step_any="//*" +step_any_l="*//" +step_elem="//b" +step_ns_elem="//a:a" +step_comment="//comment()" +step_node="//node()" +step_node_l="node()//" +step_pi="//processing-instruction()" +step_text="//text()" +step_parent="../" + +op_plus="+1" +op_minus=" - 1" +op_neg="-" +op_mul="*1" +op_div=" div 1" +op_mod=" mod 1" +op_and=" and 1" +op_or=" or 1" +op_ne="!=1" +op_lt="<1" +op_gt=">1" +op_le="<=1" +op_ge=">=1" +op_predicate_num="[1]" +op_predicate_last="[last()]" +op_predicate_str="['a']" +op_predicate="[1=1]" +op_arg_num=",1" +op_arg_str=",'a'" +op_arg_node=",*" +op_union="|//b" + +var_num="=$f" +var_bool="=$b" +var_str="=$s" +var_node_set="=$n" + +# Unicode + +utf8_2="\xC3\x84" +utf8_3="\xE2\x80\x9C" +utf8_4="\xF0\x9F\x98\x80" diff --git a/fuzz/xpath.options b/fuzz/xpath.options new file mode 100644 index 00000000..02d5e976 --- /dev/null +++ b/fuzz/xpath.options @@ -0,0 +1,3 @@ +[libfuzzer] +max_len = 10000 +timeout = 20 diff --git a/fuzz/xpathSeed.c b/fuzz/xpathSeed.c new file mode 100644 index 00000000..3b612984 --- /dev/null +++ b/fuzz/xpathSeed.c @@ -0,0 +1,171 @@ +/* + * xpathSeed.c: Generate the XPath and XPointer seed corpus for fuzzing. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include +#include "fuzz.h" + +#define PATH_SIZE 256 +#define EXPR_SIZE 4500 + +typedef struct { + const char *name; + const char *prefix; + char *data; + int counter; +} xpathTestXml; + +static int +processXml(const char *testDir, xpathTestXml *xml, const char *subdir, + int xptr); + +int +main(int argc, char **argv) { + xpathTestXml xml; + char pattern[PATH_SIZE]; + glob_t globbuf; + size_t i, size; + int ret = 0; + + if (argc != 2) { + fprintf(stderr, "Usage: xpathSeed [TESTDIR]\n"); + return(1); + } + + xml.name = "expr"; + xml.prefix = ""; + xml.data = ""; + xml.counter = 1; + if (processXml(argv[1], &xml, "expr", 0) != 0) + ret = 1; + + size = snprintf(pattern, sizeof(pattern), "%s/docs/*", argv[1]); + if (size >= PATH_SIZE) + return(1); + if (glob(pattern, 0, NULL, &globbuf) != 0) + return(1); + + for (i = 0; i < globbuf.gl_pathc; i++) { + char *path = globbuf.gl_pathv[i]; + FILE *xmlFile; + struct stat statbuf; + + if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) + continue; + size = statbuf.st_size; + xmlFile = fopen(path, "rb"); + if (xmlFile == NULL) { + ret = 1; + continue; + } + xml.data = xmlMalloc(size + 1); + if (xml.data == NULL) { + ret = 1; + goto close; + } + if (fread(xml.data, 1, size, xmlFile) != size) { + ret = 1; + goto free; + } + xml.data[size] = 0; + xml.name = basename(path); + xml.prefix = xml.name; + xml.counter = 1; + + if (processXml(argv[1], &xml, "tests", 0) != 0) + ret = 1; + if (processXml(argv[1], &xml, "xptr", 1) != 0) + ret = 1; + +free: + xmlFree(xml.data); +close: + fclose(xmlFile); + } + + globfree(&globbuf); + + return(ret); +} + +static int +processXml(const char *testDir, xpathTestXml *xml, const char *subdir, + int xptr) { + char pattern[PATH_SIZE]; + glob_t globbuf; + size_t i, size; + int ret = 0; + + size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*", + testDir, subdir, xml->prefix); + if (size >= PATH_SIZE) + return(-1); + if (glob(pattern, 0, NULL, &globbuf) != 0) + return(-1); + + for (i = 0; i < globbuf.gl_pathc; i++) { + char *path = globbuf.gl_pathv[i]; + struct stat statbuf; + FILE *in; + char expr[EXPR_SIZE]; + + if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) + continue; + + printf("## Processing %s\n", path); + in = fopen(path, "rb"); + if (in == NULL) { + ret = -1; + continue; + } + + while (fgets(expr, EXPR_SIZE, in) > 0) { + char outPath[PATH_SIZE]; + FILE *out; + int j; + + for (j = 0; expr[j] != 0; j++) + if (expr[j] == '\r' || expr[j] == '\n') + break; + expr[j] = 0; + + size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d", + xml->name, xml->counter); + if (size >= PATH_SIZE) { + ret = -1; + continue; + } + out = fopen(outPath, "wb"); + if (out == NULL) { + ret = -1; + continue; + } + + if (xptr) { + xmlFuzzWriteString(out, expr); + } else { + char xptrExpr[EXPR_SIZE+100]; + + snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr); + xmlFuzzWriteString(out, xptrExpr); + } + + xmlFuzzWriteString(out, xml->data); + + fclose(out); + xml->counter++; + } + + fclose(in); + } + + globfree(&globbuf); + + return(ret); +} +