1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2026-01-26 21:41:34 +03:00
Files
libxml2/fuzz/genSeed.c
Nick Wellnhofer 40e423d6c2 fuzz: Improve fuzzing of push parser
Also serialize the result of push-parsing and compare whether pull and
push parser produce the same result (differential fuzzing).

We lose the ability to inject IO errors when serializing for now, but
this isn't too important.

Use variable chunk size for push parser.

Fixes #849.
2025-01-31 15:50:00 +01:00

522 lines
13 KiB
C

/*
* xmlSeed.c: Generate the XML seed corpus for fuzzing.
*
* See Copyright for the status of this software.
*/
#include <stdio.h>
#include <string.h>
#include <glob.h>
#include <libgen.h>
#include <sys/stat.h>
#ifdef _WIN32
#include <direct.h>
#else
#include <unistd.h>
#endif
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/HTMLparser.h>
#include <libxml/xinclude.h>
#include <libxml/xmlschemas.h>
#include "fuzz.h"
#define PATH_SIZE 500
#define SEED_BUF_SIZE 16384
#define EXPR_SIZE 4500
#define FLAG_READER (1 << 0)
#define FLAG_LINT (1 << 1)
#define FLAG_XML (1 << 2)
typedef int
(*fileFunc)(const char *base, FILE *out);
typedef int
(*mainFunc)(const char *arg);
static struct {
FILE *out;
xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
xmlExternalEntityLoader oldLoader;
fileFunc processFile;
const char *fuzzer;
int counter;
char cwd[PATH_SIZE];
int flags;
} globalData;
#if defined(HAVE_SCHEMA_FUZZER) || \
defined(HAVE_XML_FUZZER)
/*
* A custom resource loader that writes all external DTDs or entities to a
* single file in the format expected by xmlFuzzResourceLoader.
*/
static int
fuzzResourceRecorder(void *data ATTRIBUTE_UNUSED, const char *URL,
const char *ID ATTRIBUTE_UNUSED,
xmlResourceType type ATTRIBUTE_UNUSED, int flags,
xmlParserInputPtr *out) {
xmlParserInputPtr in;
static const int chunkSize = 16384;
int code, len;
*out = NULL;
code = xmlNewInputFromUrl(URL, flags, &in);
if (code != XML_ERR_OK)
return(code);
if (globalData.entities == NULL) {
globalData.entities = xmlHashCreate(4);
} else if (xmlHashLookup(globalData.entities,
(const xmlChar *) URL) != NULL) {
*out = in;
return(XML_ERR_OK);
}
do {
len = xmlParserInputGrow(in, chunkSize);
if (len < 0) {
fprintf(stderr, "Error reading %s\n", URL);
xmlFreeInputStream(in);
return(in->buf->error);
}
} while (len > 0);
xmlFuzzWriteString(globalData.out, URL);
xmlFuzzWriteString(globalData.out,
(char *) xmlBufContent(in->buf->buffer));
xmlFreeInputStream(in);
xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
globalData.entities);
return(xmlNewInputFromUrl(URL, flags, out));
}
static void
fuzzRecorderInit(FILE *out) {
globalData.out = out;
globalData.entities = xmlHashCreate(8);
globalData.oldLoader = xmlGetExternalEntityLoader();
}
static void
fuzzRecorderCleanup(void) {
xmlHashFree(globalData.entities, NULL);
globalData.out = NULL;
globalData.entities = NULL;
globalData.oldLoader = NULL;
}
#endif
#ifdef HAVE_XML_FUZZER
static int
processXml(const char *docFile, FILE *out) {
int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
xmlParserCtxtPtr ctxt;
xmlDocPtr doc;
if (globalData.flags & FLAG_LINT) {
/* Switches */
xmlFuzzWriteInt(out, 0, 4);
xmlFuzzWriteInt(out, 0, 4);
/* maxmem */
xmlFuzzWriteInt(out, 0, 4);
/* max-ampl */
xmlFuzzWriteInt(out, 0, 1);
/* pretty */
xmlFuzzWriteInt(out, 0, 1);
/* encode */
xmlFuzzWriteString(out, "");
/* pattern */
xmlFuzzWriteString(out, "");
/* xpath */
xmlFuzzWriteString(out, "");
} else {
/* Parser options. */
xmlFuzzWriteInt(out, opts, 4);
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
if (globalData.flags & FLAG_XML) {
/* Push chunk size. */
xmlFuzzWriteInt(out, 256, 4);
}
if (globalData.flags & FLAG_READER) {
/* Initial reader program with a couple of OP_READs */
xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
}
}
fuzzRecorderInit(out);
ctxt = xmlNewParserCtxt();
xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
xmlCtxtSetResourceLoader(ctxt, fuzzResourceRecorder, NULL);
doc = xmlCtxtReadFile(ctxt, docFile, NULL, opts);
#ifdef LIBXML_XINCLUDE_ENABLED
{
xmlXIncludeCtxtPtr xinc = xmlXIncludeNewContext(doc);
xmlXIncludeSetErrorHandler(xinc, xmlFuzzSErrorFunc, NULL);
xmlXIncludeSetResourceLoader(xinc, fuzzResourceRecorder, NULL);
xmlXIncludeSetFlags(xinc, opts);
xmlXIncludeProcessNode(xinc, (xmlNodePtr) doc);
xmlXIncludeFreeContext(xinc);
}
#endif
xmlFreeDoc(doc);
xmlFreeParserCtxt(ctxt);
fuzzRecorderCleanup();
return(0);
}
#endif
#ifdef HAVE_HTML_FUZZER
static int
processHtml(const char *docFile, FILE *out) {
char buf[SEED_BUF_SIZE];
FILE *file;
size_t size;
/* Parser options. */
xmlFuzzWriteInt(out, 0, 4);
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
/* Copy file */
file = fopen(docFile, "rb");
if (file == NULL) {
fprintf(stderr, "couldn't open %s\n", docFile);
return(0);
}
do {
size = fread(buf, 1, SEED_BUF_SIZE, file);
if (size > 0)
fwrite(buf, 1, size, out);
} while (size == SEED_BUF_SIZE);
fclose(file);
return(0);
}
#endif
#ifdef HAVE_SCHEMA_FUZZER
static int
processSchema(const char *docFile, FILE *out) {
xmlSchemaPtr schema;
xmlSchemaParserCtxtPtr pctxt;
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
fuzzRecorderInit(out);
pctxt = xmlSchemaNewParserCtxt(docFile);
xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
schema = xmlSchemaParse(pctxt);
xmlSchemaFreeParserCtxt(pctxt);
xmlSchemaFree(schema);
fuzzRecorderCleanup();
return(0);
}
#endif
#if defined(HAVE_HTML_FUZZER) || \
defined(HAVE_SCHEMA_FUZZER) || \
defined(HAVE_XML_FUZZER)
static int
processPattern(const char *pattern) {
glob_t globbuf;
int ret = 0;
int res;
size_t i;
res = glob(pattern, 0, NULL, &globbuf);
if (res == GLOB_NOMATCH)
return(0);
if (res != 0) {
fprintf(stderr, "couldn't match pattern %s\n", pattern);
return(-1);
}
for (i = 0; i < globbuf.gl_pathc; i++) {
struct stat statbuf;
char outPath[PATH_SIZE];
char *dirBuf = NULL;
char *baseBuf = NULL;
const char *path, *dir, *base;
FILE *out = NULL;
int dirChanged = 0;
size_t size;
path = globbuf.gl_pathv[i];
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
continue;
dirBuf = (char *) xmlCharStrdup(path);
baseBuf = (char *) xmlCharStrdup(path);
if ((dirBuf == NULL) || (baseBuf == NULL)) {
fprintf(stderr, "memory allocation failed\n");
ret = -1;
goto error;
}
dir = dirname(dirBuf);
base = basename(baseBuf);
size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
globalData.fuzzer, base);
if (size >= PATH_SIZE) {
fprintf(stderr, "creating path failed\n");
ret = -1;
goto error;
}
out = fopen(outPath, "wb");
if (out == NULL) {
fprintf(stderr, "couldn't open %s for writing\n", outPath);
ret = -1;
goto error;
}
if (chdir(dir) != 0) {
fprintf(stderr, "couldn't chdir to %s\n", dir);
ret = -1;
goto error;
}
dirChanged = 1;
if (globalData.processFile(base, out) != 0)
ret = -1;
error:
if (out != NULL)
fclose(out);
xmlFree(dirBuf);
xmlFree(baseBuf);
if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
ret = -1;
break;
}
}
globfree(&globbuf);
return(ret);
}
#endif
#ifdef HAVE_XPATH_FUZZER
static int
processXPath(const char *testDir, const char *prefix, const char *name,
const char *data, const char *subdir, int xptr) {
char pattern[PATH_SIZE];
glob_t globbuf;
size_t i, size;
int ret = 0, res;
size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
testDir, subdir, prefix);
if (size >= PATH_SIZE)
return(-1);
res = glob(pattern, 0, NULL, &globbuf);
if (res == GLOB_NOMATCH)
return(0);
if (res != 0) {
fprintf(stderr, "couldn't match pattern %s\n", pattern);
return(-1);
}
for (i = 0; i < globbuf.gl_pathc; i++) {
char *path = globbuf.gl_pathv[i];
struct stat statbuf;
FILE *in;
char expr[EXPR_SIZE];
if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
continue;
in = fopen(path, "rb");
if (in == NULL) {
ret = -1;
continue;
}
while (fgets(expr, EXPR_SIZE, in) != NULL) {
char outPath[PATH_SIZE];
FILE *out;
int j;
for (j = 0; expr[j] != 0; j++)
if (expr[j] == '\r' || expr[j] == '\n')
break;
expr[j] = 0;
size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
name, globalData.counter);
if (size >= PATH_SIZE) {
ret = -1;
continue;
}
out = fopen(outPath, "wb");
if (out == NULL) {
ret = -1;
continue;
}
/* Max allocations. */
xmlFuzzWriteInt(out, 0, 4);
if (xptr) {
xmlFuzzWriteString(out, expr);
} else {
char xptrExpr[EXPR_SIZE+100];
/* Wrap XPath expressions as XPointer */
snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
xmlFuzzWriteString(out, xptrExpr);
}
xmlFuzzWriteString(out, data);
fclose(out);
globalData.counter++;
}
fclose(in);
}
globfree(&globbuf);
return(ret);
}
static int
processXPathDir(const char *testDir) {
char pattern[PATH_SIZE];
glob_t globbuf;
size_t i, size;
int ret = 0;
globalData.counter = 1;
if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
ret = -1;
size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
if (size >= PATH_SIZE)
return(1);
if (glob(pattern, 0, NULL, &globbuf) != 0)
return(1);
for (i = 0; i < globbuf.gl_pathc; i++) {
char *path = globbuf.gl_pathv[i];
char *data;
const char *docFile;
data = xmlSlurpFile(path, NULL);
if (data == NULL) {
ret = -1;
continue;
}
docFile = basename(path);
globalData.counter = 1;
if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
ret = -1;
if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
ret = -1;
if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
ret = -1;
xmlFree(data);
}
globfree(&globbuf);
return(ret);
}
#endif
int
main(int argc, const char **argv) {
mainFunc processArg = NULL;
const char *fuzzer;
int ret = 0;
int i;
if (argc < 3) {
fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
return(1);
}
fuzzer = argv[1];
if (strcmp(fuzzer, "html") == 0) {
#ifdef HAVE_HTML_FUZZER
processArg = processPattern;
globalData.processFile = processHtml;
#endif
} else if (strcmp(fuzzer, "lint") == 0) {
#ifdef HAVE_LINT_FUZZER
processArg = processPattern;
globalData.flags |= FLAG_LINT;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "reader") == 0) {
#ifdef HAVE_READER_FUZZER
processArg = processPattern;
globalData.flags |= FLAG_READER;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "schema") == 0) {
#ifdef HAVE_SCHEMA_FUZZER
processArg = processPattern;
globalData.processFile = processSchema;
#endif
} else if (strcmp(fuzzer, "valid") == 0) {
#ifdef HAVE_VALID_FUZZER
processArg = processPattern;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "xinclude") == 0) {
#ifdef HAVE_XINCLUDE_FUZZER
processArg = processPattern;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "xml") == 0) {
#ifdef HAVE_XML_FUZZER
processArg = processPattern;
globalData.flags |= FLAG_XML;
globalData.processFile = processXml;
#endif
} else if (strcmp(fuzzer, "xpath") == 0) {
#ifdef HAVE_XPATH_FUZZER
processArg = processXPathDir;
#endif
} else {
fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
return(1);
}
globalData.fuzzer = fuzzer;
if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
fprintf(stderr, "couldn't get current directory\n");
return(1);
}
if (processArg != NULL)
for (i = 2; i < argc; i++)
processArg(argv[i]);
return(ret);
}