1
0
mirror of https://gitlab.gnome.org/GNOME/libxslt synced 2025-08-07 10:42:55 +03:00
Files
libxslt/tests/fuzz/fuzz.c
Nick Wellnhofer 77c26bad04 Don't set maxDepth in XPath contexts
The maximum recursion depth is hardcoded in libxml2 now.
2020-08-26 00:34:38 +02:00

378 lines
12 KiB
C

/*
* fuzz.c: Fuzz targets for libxslt
*
* See Copyright for the status of this software.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fuzz.h"
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include <libxslt/extensions.h>
#include <libxslt/functions.h>
#include <libxslt/security.h>
#include <libxslt/transform.h>
#include <libxslt/xslt.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/xsltutils.h>
#include <libexslt/exslt.h>
#if defined(_WIN32)
#define DIR_SEP '\\'
#else
#define DIR_SEP '/'
#endif
static xmlDocPtr doc;
static xsltSecurityPrefsPtr sec;
static xsltTransformContextPtr tctxt;
static xmlHashTablePtr saxonExtHash;
static void
xsltFuzzXmlErrorFunc(void *vctxt, const char *msg ATTRIBUTE_UNUSED, ...) {
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) vctxt;
/*
* Stopping the parser should be slightly faster and might catch some
* issues related to recent libxml2 changes.
*/
xmlStopParser(ctxt);
}
static void
xsltFuzzXsltErrorFunc(void *vctxt ATTRIBUTE_UNUSED,
const char *msg ATTRIBUTE_UNUSED, ...) {
}
static void
xsltFuzzInit(void) {
/* Init libxml2, libxslt and libexslt */
xmlInitParser();
xmlXPathInit();
xsltInit();
exsltRegisterAll();
/* Suppress error messages */
xmlSetGenericErrorFunc(NULL, xsltFuzzXmlErrorFunc);
xsltSetGenericErrorFunc(NULL, xsltFuzzXsltErrorFunc);
/* Disallow I/O */
sec = xsltNewSecurityPrefs();
xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
}
static xmlDocPtr
xsltFuzzLoadDoc(const char *argv0, const char *dir, const char *filename) {
char *path;
if (dir != NULL) {
path = malloc(strlen(dir) + 1 + strlen(filename) + 1);
sprintf(path, "%s/%s", dir, filename);
doc = xmlReadFile(path, NULL, 0);
if (doc == NULL)
fprintf(stderr, "Error: unable to parse file '%s' in '%s'\n",
filename, dir);
} else {
const char *end;
size_t dirLen;
end = strrchr(argv0, DIR_SEP);
dirLen = (end == NULL) ? 0 : end - argv0 + 1;
path = malloc(dirLen + strlen(filename) + 1);
memcpy(path, argv0, dirLen);
path[dirLen] = '\0';
strcat(path, filename);
doc = xmlReadFile(path, NULL, 0);
if (doc == NULL && dirLen > 0) {
/* Binary might be in .libs, try parent directory */
path[dirLen-1] = 0;
end = strrchr(path, DIR_SEP);
dirLen = (end == NULL) ? 0 : end - path + 1;
path[dirLen] = '\0';
strcat(path, filename);
doc = xmlReadFile(path, NULL, 0);
}
if (doc == NULL)
fprintf(stderr, "Error: unable to parse file '%s'\n", filename);
}
free(path);
return doc;
}
/* XPath fuzzer
*
* This fuzz target parses and evaluates XPath expressions in an (E)XSLT
* context using a static XML document. It heavily exercises the libxml2
* XPath engine (xpath.c), a few other parts of libxml2, and most of
* libexslt.
*
* Some EXSLT functions need the transform context to create RVTs for
* node-sets. A couple of functions also access the stylesheet. The
* XPath context from the transform context is used to parse and
* evaluate expressions.
*
* All these objects are created once at startup. After fuzzing each input,
* they're reset as cheaply as possible.
*
* TODO
*
* - Some expressions can create lots of temporary node sets (RVTs) which
* aren't freed until the whole expression was evaluated, leading to
* extensive memory usage. Cleaning them up earlier would require
* callbacks from the XPath engine, for example after evaluating a
* predicate expression, which doesn't seem feasible. Terminating the
* evaluation after creating a certain number of RVTs is a simple
* workaround.
* - Register a custom xsl:decimal-format declaration for format-number().
* - Some functions add strings to the stylesheet or transform context
* dictionary, for example via xsltGetQName, requiring a clean up of the
* dicts after fuzzing each input. This behavior seems questionable.
* Extension functions shouldn't needlessly modify the transform context
* or stylesheet.
* - Register xsl:keys and fuzz the key() function.
* - Add a few custom func:functions.
* - Fuzz the document() function with external documents.
*/
int
xsltFuzzXPathInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
const char *dir) {
const char *xmlFilename = "xpath.xml";
xsltStylesheetPtr style;
xmlXPathContextPtr xpctxt;
xsltFuzzInit();
/* Load XML document */
doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
if (doc == NULL)
return -1;
style = xsltNewStylesheet();
tctxt = xsltNewTransformContext(style, doc);
xsltSetCtxtSecurityPrefs(sec, tctxt);
/*
* Some extension functions need the current instruction.
*
* - format-number() for namespaces.
* - document() for the base URL.
* - maybe others?
*
* For fuzzing, it's enough to use the source document's root element.
*/
tctxt->inst = xmlDocGetRootElement(doc);
saxonExtHash = (xmlHashTablePtr)
xsltStyleGetExtData(style, SAXON_NAMESPACE);
/* Set up XPath context */
xpctxt = tctxt->xpathCtxt;
/* Resource limits to avoid timeouts and call stack overflows */
xpctxt->opLimit = 500000;
/* Test namespaces used in xpath.xml */
xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");
/* EXSLT namespaces */
xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);
/* Register variables */
xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
xmlXPathNewString(BAD_CAST "var"));
xmlXPathRegisterVariable(
xpctxt, BAD_CAST "n",
xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));
return 0;
}
xmlXPathObjectPtr
xsltFuzzXPath(const char *data, size_t size) {
xmlXPathContextPtr xpctxt = tctxt->xpathCtxt;
xmlChar *xpathExpr;
/* Null-terminate */
xpathExpr = malloc(size + 1);
memcpy(xpathExpr, data, size);
xpathExpr[size] = 0;
/* Compile and return early if the expression is invalid */
xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, xpathExpr);
free(xpathExpr);
if (compExpr == NULL)
return NULL;
/* Initialize XPath evaluation context and evaluate */
xpctxt->node = (xmlNodePtr) doc; /* Maybe test different context nodes? */
xpctxt->contextSize = 1;
xpctxt->proximityPosition = 1;
xpctxt->opCount = 0;
xmlXPathObjectPtr xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
xmlXPathFreeCompExpr(compExpr);
/* Clean object cache */
xmlXPathContextSetCache(xpctxt, 0, 0, 0);
xmlXPathContextSetCache(xpctxt, 1, -1, 0);
/* Clean dictionaries */
if (xmlDictSize(tctxt->dict) > 0) {
xmlDictFree(tctxt->dict);
xmlDictFree(tctxt->style->dict);
tctxt->style->dict = xmlDictCreate();
tctxt->dict = xmlDictCreateSub(tctxt->style->dict);
}
/* Clean saxon:expression cache */
if (xmlHashSize(saxonExtHash) > 0) {
/* There doesn't seem to be a cheaper way with the public API. */
xsltShutdownCtxtExts(tctxt);
xsltInitCtxtExts(tctxt);
saxonExtHash = (xmlHashTablePtr)
xsltStyleGetExtData(tctxt->style, SAXON_NAMESPACE);
}
return xpathObj;
}
void
xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) {
xmlXPathFreeObject(obj);
/* Some XSLT extension functions create RVTs. */
xsltFreeRVTs(tctxt);
}
void
xsltFuzzXPathCleanup(void) {
xsltStylesheetPtr style = tctxt->style;
xmlXPathRegisteredNsCleanup(tctxt->xpathCtxt);
xsltFreeSecurityPrefs(sec);
sec = NULL;
xsltFreeTransformContext(tctxt);
tctxt = NULL;
xsltFreeStylesheet(style);
style = NULL;
xmlFreeDoc(doc);
doc = NULL;
}
/*
* XSLT fuzzer
*
* This is a rather naive fuzz target using a static XML document.
*
* TODO
*
* - Improve seed corpus
* - Mutate multiple input documents: source, xsl:import, xsl:include
* - format-number() with xsl:decimal-format
* - Better coverage for xsl:key and key() function
* - EXSLT func:function
* - xsl:document
*/
int
xsltFuzzXsltInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
const char *dir) {
const char *xmlFilename = "xslt.xml";
xsltFuzzInit();
/* Load XML document */
doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
if (doc == NULL)
return -1;
return 0;
}
xmlChar *
xsltFuzzXslt(const char *data, size_t size) {
xmlDocPtr xsltDoc;
xmlDocPtr result;
xmlNodePtr xsltRoot;
xsltStylesheetPtr sheet;
xsltTransformContextPtr ctxt;
xmlChar *ret = NULL;
int retLen;
xsltDoc = xmlReadMemory(data, size, NULL, NULL, 0);
if (xsltDoc == NULL)
return NULL;
xsltRoot = xmlDocGetRootElement(xsltDoc);
xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
xmlNewNs(xsltRoot, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
xmlNewNs(xsltRoot, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
xmlNewNs(xsltRoot, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
xmlNewNs(xsltRoot, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
xmlNewNs(xsltRoot, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
xmlNewNs(xsltRoot, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
xmlNewNs(xsltRoot, SAXON_NAMESPACE, BAD_CAST "saxon");
sheet = xsltNewStylesheet();
if (sheet == NULL) {
xmlFreeDoc(xsltDoc);
return NULL;
}
sheet->xpathCtxt->opLimit = 100000;
sheet->xpathCtxt->opCount = 0;
if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) {
xsltFreeStylesheet(sheet);
xmlFreeDoc(xsltDoc);
return NULL;
}
ctxt = xsltNewTransformContext(sheet, doc);
xsltSetCtxtSecurityPrefs(sec, ctxt);
ctxt->maxTemplateDepth = 100;
ctxt->opLimit = 20000;
ctxt->xpathCtxt->opLimit = 100000;
ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;
result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
if (result != NULL)
xsltSaveResultToString(&ret, &retLen, result, sheet);
xmlFreeDoc(result);
xsltFreeTransformContext(ctxt);
xsltFreeStylesheet(sheet);
return ret;
}
void
xsltFuzzXsltCleanup(void) {
xsltFreeSecurityPrefs(sec);
sec = NULL;
xmlFreeDoc(doc);
doc = NULL;
}