mirror of
https://github.com/facebook/zstd.git
synced 2025-07-29 11:21:22 +03:00
modify sequence compression api fuzzer
This commit is contained in:
@ -22,6 +22,7 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <sys/mman.h>
|
||||
#include "fuzz_helpers.h"
|
||||
#include "zstd_helpers.h"
|
||||
#include "fuzz_data_producer.h"
|
||||
@ -32,11 +33,17 @@ static void* literalsBuffer = NULL;
|
||||
static void* generatedSrc = NULL;
|
||||
static ZSTD_Sequence* generatedSequences = NULL;
|
||||
|
||||
static void* dictBuffer = NULL;
|
||||
static ZSTD_CDict* cdict = NULL;
|
||||
static ZSTD_DDict* ddict = NULL;
|
||||
|
||||
#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
|
||||
#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
|
||||
#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
|
||||
#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */
|
||||
#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 18) /* Fixed size 256KB literals buffer */
|
||||
#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
|
||||
#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
|
||||
#define ZSTD_FUZZ_DICT_FILE "sequence_fuzz_dictionary"
|
||||
|
||||
|
||||
/* Deterministic random number generator */
|
||||
#define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
@ -55,9 +62,9 @@ static uint32_t FUZZ_RDG_rand(uint32_t* src)
|
||||
/* Make a pseudorandom string - this simple function exists to avoid
|
||||
* taking a dependency on datagen.h to have RDG_genBuffer().
|
||||
*/
|
||||
static char* generatePseudoRandomString(char* str, size_t size) {
|
||||
static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
|
||||
const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
|
||||
uint32_t seed = 0;
|
||||
uint32_t seed = FUZZ_dataProducer_uint32(producer);
|
||||
if (size) {
|
||||
for (size_t n = 0; n < size; n++) {
|
||||
int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
|
||||
@ -67,6 +74,26 @@ static char* generatePseudoRandomString(char* str, size_t size) {
|
||||
return str;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create large dictionary file
|
||||
*/
|
||||
static void generateDictFile(size_t size, FUZZ_dataProducer_t* producer) {
|
||||
char c;
|
||||
FILE *dictFile;
|
||||
const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
|
||||
uint32_t seed = FUZZ_dataProducer_uint32(producer);
|
||||
|
||||
dictFile = fopen(ZSTD_FUZZ_DICT_FILE, "w");
|
||||
FUZZ_ASSERT(dictFile);
|
||||
|
||||
while (size) {
|
||||
c = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
|
||||
fputc(c, dictFile);
|
||||
size--;
|
||||
}
|
||||
fclose(dictFile);
|
||||
}
|
||||
|
||||
/* Returns size of source buffer */
|
||||
static size_t decodeSequences(void* dst, size_t nbSequences,
|
||||
size_t literalsSize,
|
||||
@ -100,14 +127,14 @@ static size_t decodeSequences(void* dst, size_t nbSequences,
|
||||
size_t j = 0;
|
||||
size_t k = 0;
|
||||
if (dictSize != 0) {
|
||||
if (generatedSequences[i].offset > bytesWritten) {
|
||||
/* Offset goes into the dictionary */
|
||||
size_t offsetFromEndOfDict = generatedSequences[i].offset - bytesWritten;
|
||||
for (; k < offsetFromEndOfDict && k < matchLength; ++k) {
|
||||
op[k] = dictPtr[dictSize - offsetFromEndOfDict + k];
|
||||
if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
|
||||
size_t dictOffset = generatedSequences[i].offset - bytesWritten;
|
||||
size_t matchInDict = MIN(matchLength, dictOffset);
|
||||
for (; k < matchInDict; ++k) {
|
||||
op[k] = dictPtr[dictSize - dictOffset + k];
|
||||
}
|
||||
matchLength -= k;
|
||||
op += k;
|
||||
matchLength -= matchInDict;
|
||||
op += matchInDict;
|
||||
}
|
||||
}
|
||||
for (; j < matchLength; ++j) {
|
||||
@ -138,9 +165,9 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
|
||||
size_t literalsSizeLimit, size_t dictSize,
|
||||
size_t windowLog, ZSTD_sequenceFormat_e mode)
|
||||
{
|
||||
const uint32_t repCode = 0; /* not used by sequence ingestion api */
|
||||
const uint32_t windowSize = 1 << windowLog;
|
||||
const uint32_t blockSizeMax = MIN(128 << 10, 1 << windowLog);
|
||||
const uint32_t repCode = 0; /* Not used by sequence ingestion api */
|
||||
size_t windowSize = 1ULL << windowLog;
|
||||
size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
|
||||
uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
|
||||
uint32_t bytesGenerated = 0;
|
||||
uint32_t nbSeqGenerated = 0;
|
||||
@ -148,12 +175,12 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
|
||||
uint32_t blockSize = 0;
|
||||
|
||||
if (mode == ZSTD_sf_explicitBlockDelimiters) {
|
||||
/* ensure that no sequence can be larger than one block */
|
||||
/* Ensure that no sequence can be larger than one block */
|
||||
literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2);
|
||||
matchLengthMax = MIN(matchLengthMax, blockSizeMax/2);
|
||||
}
|
||||
|
||||
while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */
|
||||
while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* Extra room for explicit delimiters */
|
||||
&& bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
|
||||
&& !FUZZ_dataProducer_empty(producer)) {
|
||||
uint32_t matchLength;
|
||||
@ -210,38 +237,31 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
|
||||
}
|
||||
generatedSequences[nbSeqGenerated++] = seq;
|
||||
isFirstSequence = 0;
|
||||
} }
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == ZSTD_sf_explicitBlockDelimiters) {
|
||||
/* always end sequences with a block delimiter */
|
||||
const ZSTD_Sequence endBlock = {0, 0, 0, 0};
|
||||
assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
|
||||
generatedSequences[nbSeqGenerated++] = endBlock;
|
||||
}
|
||||
|
||||
return nbSeqGenerated;
|
||||
}
|
||||
|
||||
static size_t roundTripTest(void* result, size_t resultCapacity,
|
||||
void* compressed, size_t compressedCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const void* dict, size_t dictSize,
|
||||
const ZSTD_Sequence* seqs, size_t seqSize,
|
||||
int wLog, int cLevel, unsigned hasDict,
|
||||
unsigned hasDict,
|
||||
ZSTD_sequenceFormat_e mode)
|
||||
{
|
||||
size_t cSize;
|
||||
size_t dSize;
|
||||
|
||||
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
|
||||
if (hasDict) {
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize));
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize));
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
|
||||
}
|
||||
|
||||
cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
|
||||
@ -272,7 +292,6 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
|
||||
size_t cBufSize;
|
||||
size_t generatedSrcSize;
|
||||
size_t nbSequences;
|
||||
void* dictBuffer = NULL;
|
||||
size_t dictSize = 0;
|
||||
unsigned hasDict;
|
||||
unsigned wLog;
|
||||
@ -281,23 +300,66 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
|
||||
|
||||
FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
|
||||
FUZZ_ASSERT(producer);
|
||||
if (literalsBuffer == NULL) {
|
||||
|
||||
if (!cctx) {
|
||||
cctx = ZSTD_createCCtx();
|
||||
FUZZ_ASSERT(cctx);
|
||||
}
|
||||
if (!dctx) {
|
||||
dctx = ZSTD_createDCtx();
|
||||
FUZZ_ASSERT(dctx);
|
||||
}
|
||||
|
||||
/* Generate window log first so we don't generate offsets too large */
|
||||
wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
|
||||
cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
|
||||
mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);
|
||||
|
||||
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);
|
||||
|
||||
if (!literalsBuffer) {
|
||||
literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
|
||||
FUZZ_ASSERT(literalsBuffer);
|
||||
literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
|
||||
literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
|
||||
}
|
||||
|
||||
if (!dictBuffer) { /* Generate global dictionary buffer */
|
||||
FILE* dictFile;
|
||||
ZSTD_compressionParameters cParams;
|
||||
|
||||
/* Generate a large dictionary file and mmap to buffer */
|
||||
generateDictFile(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, producer);
|
||||
dictFile = fopen(ZSTD_FUZZ_DICT_FILE, "r");
|
||||
dictBuffer = mmap(NULL, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, PROT_READ, MAP_PRIVATE, fileno(dictFile), 0);
|
||||
FUZZ_ASSERT(dictBuffer);
|
||||
fclose(dictFile);
|
||||
|
||||
/* Create global cdict and ddict*/
|
||||
cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
|
||||
cParams.minMatch = ZSTD_MINMATCH_MIN;
|
||||
cParams.hashLog = ZSTD_HASHLOG_MIN;
|
||||
cParams.chainLog = ZSTD_CHAINLOG_MIN;
|
||||
|
||||
cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
|
||||
ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
|
||||
FUZZ_ASSERT(cdict);
|
||||
FUZZ_ASSERT(ddict);
|
||||
}
|
||||
|
||||
FUZZ_ASSERT(cdict);
|
||||
FUZZ_ASSERT(ddict);
|
||||
|
||||
hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
|
||||
if (hasDict) {
|
||||
dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
|
||||
dictBuffer = FUZZ_malloc(dictSize);
|
||||
FUZZ_ASSERT(dictBuffer);
|
||||
dictBuffer = generatePseudoRandomString(dictBuffer, dictSize);
|
||||
dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
|
||||
}
|
||||
/* Generate window log first so we don't generate offsets too large */
|
||||
wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX_32);
|
||||
cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
|
||||
mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);
|
||||
|
||||
if (!generatedSequences) {
|
||||
generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
|
||||
@ -305,8 +367,10 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
|
||||
if (!generatedSrc) {
|
||||
generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
|
||||
}
|
||||
|
||||
nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
|
||||
generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);
|
||||
|
||||
/* Note : in explicit block delimiters mode,
|
||||
* the fuzzer might generate a lot of small blocks.
|
||||
* In which case, the final compressed size might be > ZSTD_compressBound().
|
||||
@ -318,30 +382,17 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
|
||||
rBufSize = generatedSrcSize;
|
||||
rBuf = FUZZ_malloc(rBufSize);
|
||||
|
||||
if (!cctx) {
|
||||
cctx = ZSTD_createCCtx();
|
||||
FUZZ_ASSERT(cctx);
|
||||
}
|
||||
if (!dctx) {
|
||||
dctx = ZSTD_createDCtx();
|
||||
FUZZ_ASSERT(dctx);
|
||||
}
|
||||
|
||||
{ const size_t result = roundTripTest(rBuf, rBufSize,
|
||||
cBuf, cBufSize,
|
||||
generatedSrc, generatedSrcSize,
|
||||
dictBuffer, dictSize,
|
||||
generatedSequences, nbSequences,
|
||||
(int)wLog, cLevel, hasDict, mode);
|
||||
hasDict, mode);
|
||||
FUZZ_ASSERT(result <= generatedSrcSize); /* can be 0 when no round-trip */
|
||||
}
|
||||
|
||||
free(rBuf);
|
||||
free(cBuf);
|
||||
FUZZ_dataProducer_free(producer);
|
||||
if (hasDict) {
|
||||
free(dictBuffer);
|
||||
}
|
||||
#ifndef STATEFUL_FUZZING
|
||||
ZSTD_freeCCtx(cctx); cctx = NULL;
|
||||
ZSTD_freeDCtx(dctx); dctx = NULL;
|
||||
|
Reference in New Issue
Block a user