mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2026-01-26 21:41:34 +03:00
Also serialize the result of push-parsing and compare whether pull and push parser produce the same result (differential fuzzing). We lose the ability to inject IO errors when serializing for now, but this isn't too important. Use variable chunk size for push parser. Fixes #849.
522 lines
13 KiB
C
522 lines
13 KiB
C
/*
|
|
* xmlSeed.c: Generate the XML seed corpus for fuzzing.
|
|
*
|
|
* See Copyright for the status of this software.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <glob.h>
|
|
#include <libgen.h>
|
|
#include <sys/stat.h>
|
|
|
|
#ifdef _WIN32
|
|
#include <direct.h>
|
|
#else
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#include <libxml/parser.h>
|
|
#include <libxml/parserInternals.h>
|
|
#include <libxml/HTMLparser.h>
|
|
#include <libxml/xinclude.h>
|
|
#include <libxml/xmlschemas.h>
|
|
#include "fuzz.h"
|
|
|
|
#define PATH_SIZE 500
|
|
#define SEED_BUF_SIZE 16384
|
|
#define EXPR_SIZE 4500
|
|
|
|
#define FLAG_READER (1 << 0)
|
|
#define FLAG_LINT (1 << 1)
|
|
#define FLAG_XML (1 << 2)
|
|
|
|
typedef int
|
|
(*fileFunc)(const char *base, FILE *out);
|
|
|
|
typedef int
|
|
(*mainFunc)(const char *arg);
|
|
|
|
static struct {
|
|
FILE *out;
|
|
xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
|
|
xmlExternalEntityLoader oldLoader;
|
|
fileFunc processFile;
|
|
const char *fuzzer;
|
|
int counter;
|
|
char cwd[PATH_SIZE];
|
|
int flags;
|
|
} globalData;
|
|
|
|
#if defined(HAVE_SCHEMA_FUZZER) || \
|
|
defined(HAVE_XML_FUZZER)
|
|
/*
|
|
* A custom resource loader that writes all external DTDs or entities to a
|
|
* single file in the format expected by xmlFuzzResourceLoader.
|
|
*/
|
|
static int
|
|
fuzzResourceRecorder(void *data ATTRIBUTE_UNUSED, const char *URL,
|
|
const char *ID ATTRIBUTE_UNUSED,
|
|
xmlResourceType type ATTRIBUTE_UNUSED, int flags,
|
|
xmlParserInputPtr *out) {
|
|
xmlParserInputPtr in;
|
|
static const int chunkSize = 16384;
|
|
int code, len;
|
|
|
|
*out = NULL;
|
|
|
|
code = xmlNewInputFromUrl(URL, flags, &in);
|
|
if (code != XML_ERR_OK)
|
|
return(code);
|
|
|
|
if (globalData.entities == NULL) {
|
|
globalData.entities = xmlHashCreate(4);
|
|
} else if (xmlHashLookup(globalData.entities,
|
|
(const xmlChar *) URL) != NULL) {
|
|
*out = in;
|
|
return(XML_ERR_OK);
|
|
}
|
|
|
|
do {
|
|
len = xmlParserInputGrow(in, chunkSize);
|
|
if (len < 0) {
|
|
fprintf(stderr, "Error reading %s\n", URL);
|
|
xmlFreeInputStream(in);
|
|
return(in->buf->error);
|
|
}
|
|
} while (len > 0);
|
|
|
|
xmlFuzzWriteString(globalData.out, URL);
|
|
xmlFuzzWriteString(globalData.out,
|
|
(char *) xmlBufContent(in->buf->buffer));
|
|
|
|
xmlFreeInputStream(in);
|
|
|
|
xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
|
|
globalData.entities);
|
|
|
|
return(xmlNewInputFromUrl(URL, flags, out));
|
|
}
|
|
|
|
static void
|
|
fuzzRecorderInit(FILE *out) {
|
|
globalData.out = out;
|
|
globalData.entities = xmlHashCreate(8);
|
|
globalData.oldLoader = xmlGetExternalEntityLoader();
|
|
}
|
|
|
|
static void
|
|
fuzzRecorderCleanup(void) {
|
|
xmlHashFree(globalData.entities, NULL);
|
|
globalData.out = NULL;
|
|
globalData.entities = NULL;
|
|
globalData.oldLoader = NULL;
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_XML_FUZZER
|
|
static int
|
|
processXml(const char *docFile, FILE *out) {
|
|
int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
|
|
xmlParserCtxtPtr ctxt;
|
|
xmlDocPtr doc;
|
|
|
|
if (globalData.flags & FLAG_LINT) {
|
|
/* Switches */
|
|
xmlFuzzWriteInt(out, 0, 4);
|
|
xmlFuzzWriteInt(out, 0, 4);
|
|
/* maxmem */
|
|
xmlFuzzWriteInt(out, 0, 4);
|
|
/* max-ampl */
|
|
xmlFuzzWriteInt(out, 0, 1);
|
|
/* pretty */
|
|
xmlFuzzWriteInt(out, 0, 1);
|
|
/* encode */
|
|
xmlFuzzWriteString(out, "");
|
|
/* pattern */
|
|
xmlFuzzWriteString(out, "");
|
|
/* xpath */
|
|
xmlFuzzWriteString(out, "");
|
|
} else {
|
|
/* Parser options. */
|
|
xmlFuzzWriteInt(out, opts, 4);
|
|
/* Max allocations. */
|
|
xmlFuzzWriteInt(out, 0, 4);
|
|
|
|
if (globalData.flags & FLAG_XML) {
|
|
/* Push chunk size. */
|
|
xmlFuzzWriteInt(out, 256, 4);
|
|
}
|
|
|
|
if (globalData.flags & FLAG_READER) {
|
|
/* Initial reader program with a couple of OP_READs */
|
|
xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
|
|
}
|
|
}
|
|
|
|
fuzzRecorderInit(out);
|
|
|
|
ctxt = xmlNewParserCtxt();
|
|
xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
|
|
xmlCtxtSetResourceLoader(ctxt, fuzzResourceRecorder, NULL);
|
|
doc = xmlCtxtReadFile(ctxt, docFile, NULL, opts);
|
|
#ifdef LIBXML_XINCLUDE_ENABLED
|
|
{
|
|
xmlXIncludeCtxtPtr xinc = xmlXIncludeNewContext(doc);
|
|
|
|
xmlXIncludeSetErrorHandler(xinc, xmlFuzzSErrorFunc, NULL);
|
|
xmlXIncludeSetResourceLoader(xinc, fuzzResourceRecorder, NULL);
|
|
xmlXIncludeSetFlags(xinc, opts);
|
|
xmlXIncludeProcessNode(xinc, (xmlNodePtr) doc);
|
|
xmlXIncludeFreeContext(xinc);
|
|
}
|
|
#endif
|
|
xmlFreeDoc(doc);
|
|
xmlFreeParserCtxt(ctxt);
|
|
|
|
fuzzRecorderCleanup();
|
|
|
|
return(0);
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_HTML_FUZZER
|
|
static int
|
|
processHtml(const char *docFile, FILE *out) {
|
|
char buf[SEED_BUF_SIZE];
|
|
FILE *file;
|
|
size_t size;
|
|
|
|
/* Parser options. */
|
|
xmlFuzzWriteInt(out, 0, 4);
|
|
/* Max allocations. */
|
|
xmlFuzzWriteInt(out, 0, 4);
|
|
|
|
/* Copy file */
|
|
file = fopen(docFile, "rb");
|
|
if (file == NULL) {
|
|
fprintf(stderr, "couldn't open %s\n", docFile);
|
|
return(0);
|
|
}
|
|
do {
|
|
size = fread(buf, 1, SEED_BUF_SIZE, file);
|
|
if (size > 0)
|
|
fwrite(buf, 1, size, out);
|
|
} while (size == SEED_BUF_SIZE);
|
|
fclose(file);
|
|
|
|
return(0);
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_SCHEMA_FUZZER
|
|
static int
|
|
processSchema(const char *docFile, FILE *out) {
|
|
xmlSchemaPtr schema;
|
|
xmlSchemaParserCtxtPtr pctxt;
|
|
|
|
/* Max allocations. */
|
|
xmlFuzzWriteInt(out, 0, 4);
|
|
|
|
fuzzRecorderInit(out);
|
|
|
|
pctxt = xmlSchemaNewParserCtxt(docFile);
|
|
xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
|
|
xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
|
|
schema = xmlSchemaParse(pctxt);
|
|
xmlSchemaFreeParserCtxt(pctxt);
|
|
xmlSchemaFree(schema);
|
|
|
|
fuzzRecorderCleanup();
|
|
|
|
return(0);
|
|
}
|
|
#endif
|
|
|
|
#if defined(HAVE_HTML_FUZZER) || \
|
|
defined(HAVE_SCHEMA_FUZZER) || \
|
|
defined(HAVE_XML_FUZZER)
|
|
static int
|
|
processPattern(const char *pattern) {
|
|
glob_t globbuf;
|
|
int ret = 0;
|
|
int res;
|
|
size_t i;
|
|
|
|
res = glob(pattern, 0, NULL, &globbuf);
|
|
if (res == GLOB_NOMATCH)
|
|
return(0);
|
|
if (res != 0) {
|
|
fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
|
return(-1);
|
|
}
|
|
|
|
for (i = 0; i < globbuf.gl_pathc; i++) {
|
|
struct stat statbuf;
|
|
char outPath[PATH_SIZE];
|
|
char *dirBuf = NULL;
|
|
char *baseBuf = NULL;
|
|
const char *path, *dir, *base;
|
|
FILE *out = NULL;
|
|
int dirChanged = 0;
|
|
size_t size;
|
|
|
|
path = globbuf.gl_pathv[i];
|
|
|
|
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
|
continue;
|
|
|
|
dirBuf = (char *) xmlCharStrdup(path);
|
|
baseBuf = (char *) xmlCharStrdup(path);
|
|
if ((dirBuf == NULL) || (baseBuf == NULL)) {
|
|
fprintf(stderr, "memory allocation failed\n");
|
|
ret = -1;
|
|
goto error;
|
|
}
|
|
dir = dirname(dirBuf);
|
|
base = basename(baseBuf);
|
|
|
|
size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
|
|
globalData.fuzzer, base);
|
|
if (size >= PATH_SIZE) {
|
|
fprintf(stderr, "creating path failed\n");
|
|
ret = -1;
|
|
goto error;
|
|
}
|
|
out = fopen(outPath, "wb");
|
|
if (out == NULL) {
|
|
fprintf(stderr, "couldn't open %s for writing\n", outPath);
|
|
ret = -1;
|
|
goto error;
|
|
}
|
|
if (chdir(dir) != 0) {
|
|
fprintf(stderr, "couldn't chdir to %s\n", dir);
|
|
ret = -1;
|
|
goto error;
|
|
}
|
|
dirChanged = 1;
|
|
if (globalData.processFile(base, out) != 0)
|
|
ret = -1;
|
|
|
|
error:
|
|
if (out != NULL)
|
|
fclose(out);
|
|
xmlFree(dirBuf);
|
|
xmlFree(baseBuf);
|
|
if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
|
|
fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
|
|
ret = -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
globfree(&globbuf);
|
|
return(ret);
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_XPATH_FUZZER
|
|
static int
|
|
processXPath(const char *testDir, const char *prefix, const char *name,
|
|
const char *data, const char *subdir, int xptr) {
|
|
char pattern[PATH_SIZE];
|
|
glob_t globbuf;
|
|
size_t i, size;
|
|
int ret = 0, res;
|
|
|
|
size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
|
|
testDir, subdir, prefix);
|
|
if (size >= PATH_SIZE)
|
|
return(-1);
|
|
res = glob(pattern, 0, NULL, &globbuf);
|
|
if (res == GLOB_NOMATCH)
|
|
return(0);
|
|
if (res != 0) {
|
|
fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
|
return(-1);
|
|
}
|
|
|
|
for (i = 0; i < globbuf.gl_pathc; i++) {
|
|
char *path = globbuf.gl_pathv[i];
|
|
struct stat statbuf;
|
|
FILE *in;
|
|
char expr[EXPR_SIZE];
|
|
|
|
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
|
continue;
|
|
|
|
in = fopen(path, "rb");
|
|
if (in == NULL) {
|
|
ret = -1;
|
|
continue;
|
|
}
|
|
|
|
while (fgets(expr, EXPR_SIZE, in) != NULL) {
|
|
char outPath[PATH_SIZE];
|
|
FILE *out;
|
|
int j;
|
|
|
|
for (j = 0; expr[j] != 0; j++)
|
|
if (expr[j] == '\r' || expr[j] == '\n')
|
|
break;
|
|
expr[j] = 0;
|
|
|
|
size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
|
|
name, globalData.counter);
|
|
if (size >= PATH_SIZE) {
|
|
ret = -1;
|
|
continue;
|
|
}
|
|
out = fopen(outPath, "wb");
|
|
if (out == NULL) {
|
|
ret = -1;
|
|
continue;
|
|
}
|
|
|
|
/* Max allocations. */
|
|
xmlFuzzWriteInt(out, 0, 4);
|
|
|
|
if (xptr) {
|
|
xmlFuzzWriteString(out, expr);
|
|
} else {
|
|
char xptrExpr[EXPR_SIZE+100];
|
|
|
|
/* Wrap XPath expressions as XPointer */
|
|
snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
|
|
xmlFuzzWriteString(out, xptrExpr);
|
|
}
|
|
|
|
xmlFuzzWriteString(out, data);
|
|
|
|
fclose(out);
|
|
globalData.counter++;
|
|
}
|
|
|
|
fclose(in);
|
|
}
|
|
|
|
globfree(&globbuf);
|
|
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
processXPathDir(const char *testDir) {
|
|
char pattern[PATH_SIZE];
|
|
glob_t globbuf;
|
|
size_t i, size;
|
|
int ret = 0;
|
|
|
|
globalData.counter = 1;
|
|
if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
|
|
ret = -1;
|
|
|
|
size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
|
|
if (size >= PATH_SIZE)
|
|
return(1);
|
|
if (glob(pattern, 0, NULL, &globbuf) != 0)
|
|
return(1);
|
|
|
|
for (i = 0; i < globbuf.gl_pathc; i++) {
|
|
char *path = globbuf.gl_pathv[i];
|
|
char *data;
|
|
const char *docFile;
|
|
|
|
data = xmlSlurpFile(path, NULL);
|
|
if (data == NULL) {
|
|
ret = -1;
|
|
continue;
|
|
}
|
|
docFile = basename(path);
|
|
|
|
globalData.counter = 1;
|
|
if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
|
|
ret = -1;
|
|
if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
|
|
ret = -1;
|
|
if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
|
|
ret = -1;
|
|
|
|
xmlFree(data);
|
|
}
|
|
|
|
globfree(&globbuf);
|
|
|
|
return(ret);
|
|
}
|
|
#endif
|
|
|
|
int
|
|
main(int argc, const char **argv) {
|
|
mainFunc processArg = NULL;
|
|
const char *fuzzer;
|
|
int ret = 0;
|
|
int i;
|
|
|
|
if (argc < 3) {
|
|
fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
|
|
return(1);
|
|
}
|
|
|
|
fuzzer = argv[1];
|
|
if (strcmp(fuzzer, "html") == 0) {
|
|
#ifdef HAVE_HTML_FUZZER
|
|
processArg = processPattern;
|
|
globalData.processFile = processHtml;
|
|
#endif
|
|
} else if (strcmp(fuzzer, "lint") == 0) {
|
|
#ifdef HAVE_LINT_FUZZER
|
|
processArg = processPattern;
|
|
globalData.flags |= FLAG_LINT;
|
|
globalData.processFile = processXml;
|
|
#endif
|
|
} else if (strcmp(fuzzer, "reader") == 0) {
|
|
#ifdef HAVE_READER_FUZZER
|
|
processArg = processPattern;
|
|
globalData.flags |= FLAG_READER;
|
|
globalData.processFile = processXml;
|
|
#endif
|
|
} else if (strcmp(fuzzer, "schema") == 0) {
|
|
#ifdef HAVE_SCHEMA_FUZZER
|
|
processArg = processPattern;
|
|
globalData.processFile = processSchema;
|
|
#endif
|
|
} else if (strcmp(fuzzer, "valid") == 0) {
|
|
#ifdef HAVE_VALID_FUZZER
|
|
processArg = processPattern;
|
|
globalData.processFile = processXml;
|
|
#endif
|
|
} else if (strcmp(fuzzer, "xinclude") == 0) {
|
|
#ifdef HAVE_XINCLUDE_FUZZER
|
|
processArg = processPattern;
|
|
globalData.processFile = processXml;
|
|
#endif
|
|
} else if (strcmp(fuzzer, "xml") == 0) {
|
|
#ifdef HAVE_XML_FUZZER
|
|
processArg = processPattern;
|
|
globalData.flags |= FLAG_XML;
|
|
globalData.processFile = processXml;
|
|
#endif
|
|
} else if (strcmp(fuzzer, "xpath") == 0) {
|
|
#ifdef HAVE_XPATH_FUZZER
|
|
processArg = processXPathDir;
|
|
#endif
|
|
} else {
|
|
fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
|
|
return(1);
|
|
}
|
|
globalData.fuzzer = fuzzer;
|
|
|
|
if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
|
|
fprintf(stderr, "couldn't get current directory\n");
|
|
return(1);
|
|
}
|
|
|
|
if (processArg != NULL)
|
|
for (i = 2; i < argc; i++)
|
|
processArg(argv[i]);
|
|
|
|
return(ret);
|
|
}
|
|
|