diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 36232a8cf..b309fa9d9 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -28,7 +28,7 @@ PRGDIR = ../../programs FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \ - -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS) + -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 -DDEBUGLEVEL=5 $(CPPFLAGS) FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef \ diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index c2ec44a27..b44bf147a 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -26,22 +26,39 @@ static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; +static void* literalsBuffer = NULL; +static void* generatedSrc = NULL; +static ZSTD_Sequence* generatedSequences = NULL; #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 25) /* Allow up to 32MB generated data */ #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */ -#define ZSTD_FUZZ_GENERATED_LITERALS_MAXSIZE (1 << 19) /* Allow up to 512KB literals buffer */ #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */ -#define ZSTD_FUZZ_GENERATE_REPCODES 0 /* Disabled repcode fuzzing for now */ +#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 18) /* Fixed size 256KB literals buffer */ +#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */ + +#define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static uint32_t FUZZ_RDG_rand(uint32_t* src) +{ + static const uint32_t prime1 = 2654435761U; + static const uint32_t prime2 = 2246822519U; + uint32_t rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = FUZZ_RDG_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} /* Make a pseudorandom string - this simple function exists to avoid * taking a dependency on datagen.h to have RDG_genBuffer(). We don't need anything fancy. */ static char *generatePseudoRandomString(char *str, size_t size) { const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; + uint32_t seed = 0; if (size) { --size; for (size_t n = 0; n < size; n++) { - int key = rand() % (int) (sizeof charset - 1); + int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1); str[n] = charset[key]; } } @@ -49,26 +66,31 @@ static char *generatePseudoRandomString(char *str, size_t size) { } /* Returns size of source buffer */ -static size_t decodeSequences(void* dst, const ZSTD_Sequence* generatedSequences, size_t nbSequences, - const void* literals, size_t literalsSize, const void* dict, size_t dictSize) { - const uint8_t* ip = literals; +static size_t decodeSequences(void* dst, size_t nbSequences, + size_t literalsSize, const void* dict, size_t dictSize) { + const uint8_t* litPtr = literalsBuffer; + const uint8_t* const litBegin = literalsBuffer; + const uint8_t* const litEnd = literalsBuffer + literalsSize; const uint8_t* dictPtr = dict; uint8_t* op = dst; + const uint8_t* const oend = dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE; size_t generatedSrcBufferSize = 0; size_t bytesWritten = 0; + uint32_t lastLLSize; - /* Note that src is a literals buffer */ for (size_t i = 0; i < nbSequences; ++i) { - assert(generatedSequences[i].matchLength != 0); - assert(generatedSequences[i].offset != 0); + FUZZ_ASSERT(generatedSequences[i].matchLength != 0); + FUZZ_ASSERT(generatedSequences[i].offset != 0); - ZSTD_memcpy(op, ip, generatedSequences[i].litLength); + if (litPtr + generatedSequences[i].litLength > litEnd) { + litPtr = litBegin; + } + ZSTD_memcpy(op, litPtr, generatedSequences[i].litLength); bytesWritten += generatedSequences[i].litLength; op += generatedSequences[i].litLength; - ip += generatedSequences[i].litLength; - literalsSize -= generatedSequences[i].litLength; + litPtr += generatedSequences[i].litLength; - assert(generatedSequences[i].offset != 0); + FUZZ_ASSERT(generatedSequences[i].offset != 0); /* Copy over the match */ { size_t matchLength = generatedSequences[i].matchLength; size_t j = 0; @@ -88,21 +110,25 @@ static size_t decodeSequences(void* dst, const ZSTD_Sequence* generatedSequences op[j] = op[j-(int)generatedSequences[i].offset]; } op += j; - assert(generatedSequences[i].matchLength == j + k); + FUZZ_ASSERT(generatedSequences[i].matchLength == j + k); bytesWritten += generatedSequences[i].matchLength; } } generatedSrcBufferSize = bytesWritten; - assert(ip <= literals + literalsSize); - ZSTD_memcpy(op, ip, literalsSize); + FUZZ_ASSERT(litPtr <= litEnd); + lastLLSize = (uint32_t)(litEnd - litPtr); + if (lastLLSize <= oend - op) { + ZSTD_memcpy(op, litPtr, lastLLSize); + generatedSrcBufferSize += lastLLSize; + } return generatedSrcBufferSize; } /* Returns nb sequences generated * TODO: Add repcode fuzzing once we support repcode match splits */ -static size_t generateRandomSequences(ZSTD_Sequence* generatedSequences, FUZZ_dataProducer_t* producer, - size_t literalsSize, size_t dictSize, +static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, + size_t literalsSizeLimit, size_t dictSize, size_t windowLog) { uint32_t bytesGenerated = 0; uint32_t nbSeqGenerated = 0; @@ -114,10 +140,11 @@ static size_t generateRandomSequences(ZSTD_Sequence* generatedSequences, FUZZ_da uint32_t isFirstSequence = 1; uint32_t windowSize = 1 << windowLog; - while (bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE && !FUZZ_dataProducer_empty(producer)) { - litLength = isFirstSequence ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSize) - : FUZZ_dataProducer_uint32Range(producer, 0, literalsSize); - literalsSize -= litLength; + while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ + && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE + && !FUZZ_dataProducer_empty(producer)) { + litLength = isFirstSequence && dictSize == 0 ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSizeLimit) + : FUZZ_dataProducer_uint32Range(producer, 0, literalsSizeLimit); bytesGenerated += litLength; if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { break; @@ -139,9 +166,9 @@ static size_t generateRandomSequences(ZSTD_Sequence* generatedSequences, FUZZ_da static size_t roundTripTest(void *result, size_t resultCapacity, void *compressed, size_t compressedCapacity, - const void *src, size_t srcSize, + size_t srcSize, const void *dict, size_t dictSize, - const ZSTD_Sequence* generatedSequences, size_t generatedSequencesSize, + size_t generatedSequencesSize, size_t wLog, unsigned cLevel, unsigned hasDict) { size_t cSize; @@ -156,18 +183,13 @@ static size_t roundTripTest(void *result, size_t resultCapacity, /* TODO: Add block delim mode fuzzing */ ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters); if (hasDict) { - cdict = ZSTD_createCDict(dict, dictSize, cLevel); - FUZZ_ASSERT(cdict); - ZSTD_CCtx_refCDict(cctx, cdict); - - ddict = ZSTD_createDDict(dict, dictSize); - FUZZ_ASSERT(ddict); - ZSTD_DCtx_refDDict(dctx, ddict); + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize)); + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize)); } cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity, generatedSequences, generatedSequencesSize, - src, srcSize); + generatedSrc, srcSize); FUZZ_ZASSERT(cSize); dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); FUZZ_ZASSERT(dSize); @@ -187,12 +209,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size_t rBufSize; void* cBuf; size_t cBufSize; - void* generatedSrc; size_t generatedSrcSize; - ZSTD_Sequence* generatedSequences; size_t nbSequences; - void* literalsBuffer; - size_t literalsSize; void* dictBuffer; size_t dictSize = 0; unsigned hasDict; @@ -200,9 +218,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) int cLevel; FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); - literalsSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_LITERALS_MAXSIZE); - literalsBuffer = FUZZ_malloc(literalsSize); - literalsBuffer = generatePseudoRandomString(literalsBuffer, literalsSize); + if (literalsBuffer == NULL) { + literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE); + literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE); + } hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1); if (hasDict) { @@ -210,14 +229,18 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) dictBuffer = FUZZ_malloc(dictSize); dictBuffer = generatePseudoRandomString(dictBuffer, dictSize); } - // Generate window log first so we dont generate offsets too large + /* Generate window log first so we dont generate offsets too large */ wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); cLevel = FUZZ_dataProducer_int32Range(producer, (int)ZSTD_minCLevel, (int)ZSTD_maxCLevel); - generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); - generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); - nbSequences = generateRandomSequences(generatedSequences, producer, literalsSize, dictSize, wLog); - generatedSrcSize = decodeSequences(generatedSrc, generatedSequences, nbSequences, literalsBuffer, literalsSize, dictBuffer, dictSize); + if (!generatedSequences) { + generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ); + } + if (!generatedSrc) { + generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); + } + nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog); + generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize); cBufSize = ZSTD_compressBound(generatedSrcSize); cBuf = FUZZ_malloc(cBufSize); @@ -236,9 +259,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size_t const result = roundTripTest(rBuf, rBufSize, cBuf, cBufSize, - generatedSrc, generatedSrcSize, + generatedSrcSize, dictBuffer, dictSize, - generatedSequences, nbSequences, + nbSequences, wLog, cLevel, hasDict); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size"); @@ -246,13 +269,16 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) free(rBuf); free(cBuf); - free(generatedSequences); - free(generatedSrc); - free(literalsBuffer); FUZZ_dataProducer_free(producer); + if (hasDict) { + free(dictBuffer); + } #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; + free(generatedSequences); generatedSequences = NULL; + free(generatedSrc); generatedSrc = NULL; + free(literalsBuffer); literalsBuffer = NULL; #endif return 0; -} \ No newline at end of file +}