From ed575963c5c0c47e769af83fa8238cbae3a1df9f Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 16 Nov 2020 10:54:19 -0500 Subject: [PATCH 1/7] Implement new fuzzer for sequence compression --- tests/fuzz/sequence_compression_api.c | 258 ++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 tests/fuzz/sequence_compression_api.c diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c new file mode 100644 index 000000000..c2ec44a27 --- /dev/null +++ b/tests/fuzz/sequence_compression_api.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2016-2020, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include +#include +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; + +#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 25) /* Allow up to 32MB generated data */ +#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */ +#define ZSTD_FUZZ_GENERATED_LITERALS_MAXSIZE (1 << 19) /* Allow up to 512KB literals buffer */ +#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */ +#define ZSTD_FUZZ_GENERATE_REPCODES 0 /* Disabled repcode fuzzing for now */ + +/* Make a pseudorandom string - this simple function exists to avoid + * taking a dependency on datagen.h to have RDG_genBuffer(). We don't need anything fancy. + */ +static char *generatePseudoRandomString(char *str, size_t size) { + const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; + if (size) { + --size; + for (size_t n = 0; n < size; n++) { + int key = rand() % (int) (sizeof charset - 1); + str[n] = charset[key]; + } + } + return str; +} + +/* Returns size of source buffer */ +static size_t decodeSequences(void* dst, const ZSTD_Sequence* generatedSequences, size_t nbSequences, + const void* literals, size_t literalsSize, const void* dict, size_t dictSize) { + const uint8_t* ip = literals; + const uint8_t* dictPtr = dict; + uint8_t* op = dst; + size_t generatedSrcBufferSize = 0; + size_t bytesWritten = 0; + + /* Note that src is a literals buffer */ + for (size_t i = 0; i < nbSequences; ++i) { + assert(generatedSequences[i].matchLength != 0); + assert(generatedSequences[i].offset != 0); + + ZSTD_memcpy(op, ip, generatedSequences[i].litLength); + bytesWritten += generatedSequences[i].litLength; + op += generatedSequences[i].litLength; + ip += generatedSequences[i].litLength; + literalsSize -= generatedSequences[i].litLength; + + assert(generatedSequences[i].offset != 0); + /* Copy over the match */ + { size_t matchLength = generatedSequences[i].matchLength; + size_t j = 0; + size_t k = 0; + if (dictSize != 0) { + if (generatedSequences[i].offset > bytesWritten) { + /* Offset goes into the dictionary */ + size_t offsetFromEndOfDict = generatedSequences[i].offset - bytesWritten; + for (; k < offsetFromEndOfDict && k < matchLength; ++k) { + op[k] = dictPtr[dictSize - offsetFromEndOfDict + k]; + } + matchLength -= k; + op += k; + } + } + for (; j < matchLength; ++j) { + op[j] = op[j-(int)generatedSequences[i].offset]; + } + op += j; + assert(generatedSequences[i].matchLength == j + k); + bytesWritten += generatedSequences[i].matchLength; + } + } + generatedSrcBufferSize = bytesWritten; + assert(ip <= literals + literalsSize); + ZSTD_memcpy(op, ip, literalsSize); + return generatedSrcBufferSize; +} + +/* Returns nb sequences generated + * TODO: Add repcode fuzzing once we support repcode match splits + */ +static size_t generateRandomSequences(ZSTD_Sequence* generatedSequences, FUZZ_dataProducer_t* producer, + size_t literalsSize, size_t dictSize, + size_t windowLog) { + uint32_t bytesGenerated = 0; + uint32_t nbSeqGenerated = 0; + uint32_t litLength; + uint32_t matchLength; + uint32_t offset; + uint32_t offsetBound; + uint32_t repCode = 0; + uint32_t isFirstSequence = 1; + uint32_t windowSize = 1 << windowLog; + + while (bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE && !FUZZ_dataProducer_empty(producer)) { + litLength = isFirstSequence ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSize) + : FUZZ_dataProducer_uint32Range(producer, 0, literalsSize); + literalsSize -= litLength; + bytesGenerated += litLength; + if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { + break; + } + offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize; + offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound); + matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, ZSTD_FUZZ_MATCHLENGTH_MAXSIZE); + bytesGenerated += matchLength; + if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { + break; + } + ZSTD_Sequence seq = {offset, litLength, matchLength, repCode}; + generatedSequences[nbSeqGenerated++] = seq; + isFirstSequence = 0; + } + + return nbSeqGenerated; +} + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize, + const void *dict, size_t dictSize, + const ZSTD_Sequence* generatedSequences, size_t generatedSequencesSize, + size_t wLog, unsigned cLevel, unsigned hasDict) +{ + size_t cSize; + size_t dSize; + ZSTD_CDict* cdict = NULL; + ZSTD_DDict* ddict = NULL; + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog); + /* TODO: Add block delim mode fuzzing */ + ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters); + if (hasDict) { + cdict = ZSTD_createCDict(dict, dictSize, cLevel); + FUZZ_ASSERT(cdict); + ZSTD_CCtx_refCDict(cctx, cdict); + + ddict = ZSTD_createDDict(dict, dictSize); + FUZZ_ASSERT(ddict); + ZSTD_DCtx_refDDict(dctx, ddict); + } + + cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity, + generatedSequences, generatedSequencesSize, + src, srcSize); + FUZZ_ZASSERT(cSize); + dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); + FUZZ_ZASSERT(dSize); + + if (cdict) { + ZSTD_freeCDict(cdict); + } + if (ddict) { + ZSTD_freeDDict(ddict); + } + return dSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + void* rBuf; + size_t rBufSize; + void* cBuf; + size_t cBufSize; + void* generatedSrc; + size_t generatedSrcSize; + ZSTD_Sequence* generatedSequences; + size_t nbSequences; + void* literalsBuffer; + size_t literalsSize; + void* dictBuffer; + size_t dictSize = 0; + unsigned hasDict; + unsigned wLog; + int cLevel; + + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + literalsSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_LITERALS_MAXSIZE); + literalsBuffer = FUZZ_malloc(literalsSize); + literalsBuffer = generatePseudoRandomString(literalsBuffer, literalsSize); + + hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1); + if (hasDict) { + dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE); + dictBuffer = FUZZ_malloc(dictSize); + dictBuffer = generatePseudoRandomString(dictBuffer, dictSize); + } + // Generate window log first so we dont generate offsets too large + wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + cLevel = FUZZ_dataProducer_int32Range(producer, (int)ZSTD_minCLevel, (int)ZSTD_maxCLevel); + + generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); + generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); + nbSequences = generateRandomSequences(generatedSequences, producer, literalsSize, dictSize, wLog); + generatedSrcSize = decodeSequences(generatedSrc, generatedSequences, nbSequences, literalsBuffer, literalsSize, dictBuffer, dictSize); + + cBufSize = ZSTD_compressBound(generatedSrcSize); + cBuf = FUZZ_malloc(cBufSize); + + rBufSize = generatedSrcSize; + rBuf = FUZZ_malloc(rBufSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + size_t const result = roundTripTest(rBuf, rBufSize, + cBuf, cBufSize, + generatedSrc, generatedSrcSize, + dictBuffer, dictSize, + generatedSequences, nbSequences, + wLog, cLevel, hasDict); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(generatedSrc, rBuf, generatedSrcSize), "Corruption!"); + + free(rBuf); + free(cBuf); + free(generatedSequences); + free(generatedSrc); + free(literalsBuffer); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} \ No newline at end of file From 26bc0bfdf62a6c4140b530fce6402f8e9835a451 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 16 Nov 2020 10:54:55 -0500 Subject: [PATCH 2/7] Add new fuzzer to build targets --- tests/fuzz/Makefile | 6 +++++- tests/fuzz/fuzz.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index c88a60a33..36232a8cf 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -97,7 +97,8 @@ FUZZ_TARGETS := \ raw_dictionary_round_trip \ dictionary_stream_round_trip \ decompress_dstSize_tooSmall \ - fse_read_ncount + fse_read_ncount \ + sequence_compression_api all: libregression.a $(FUZZ_TARGETS) @@ -188,6 +189,9 @@ decompress_dstSize_tooSmall: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decom fse_read_ncount: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_fse_read_ncount.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_fse_read_ncount.o $(LIB_FUZZING_ENGINE) -o $@ +sequence_compression_api: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequence_compression_api.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequence_compression_api.o $(LIB_FUZZING_ENGINE) -o $@ + libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 24430a228..ef94a53b4 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -61,6 +61,7 @@ TARGET_INFO = { 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), + 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), } TARGETS = list(TARGET_INFO.keys()) ALL_TARGETS = TARGETS + ['all'] From 59c021f501367296c25391d75281a430c3479711 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 16 Nov 2020 10:55:07 -0500 Subject: [PATCH 3/7] Add built binary to .gitignore --- tests/fuzz/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore index aab4760f5..9bd280c08 100644 --- a/tests/fuzz/.gitignore +++ b/tests/fuzz/.gitignore @@ -15,6 +15,7 @@ stream_round_trip zstd_frame_info decompress_dstSize_tooSmall fse_read_ncount +sequence_compression_api fuzz-*.log rt_lib_* d_lib_* From 5c68c5e31e8e5c67bcfc7cf8e357f56ae0a9568e Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 16 Nov 2020 17:53:38 -0500 Subject: [PATCH 4/7] Variety of minor fixups, reduce allocation, make deterministic --- tests/fuzz/Makefile | 2 +- tests/fuzz/sequence_compression_api.c | 126 ++++++++++++++++---------- 2 files changed, 77 insertions(+), 51 deletions(-) diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 36232a8cf..b309fa9d9 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -28,7 +28,7 @@ PRGDIR = ../../programs FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \ - -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS) + -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 -DDEBUGLEVEL=5 $(CPPFLAGS) FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef \ diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index c2ec44a27..b44bf147a 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -26,22 +26,39 @@ static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; +static void* literalsBuffer = NULL; +static void* generatedSrc = NULL; +static ZSTD_Sequence* generatedSequences = NULL; #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 25) /* Allow up to 32MB generated data */ #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */ -#define ZSTD_FUZZ_GENERATED_LITERALS_MAXSIZE (1 << 19) /* Allow up to 512KB literals buffer */ #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */ -#define ZSTD_FUZZ_GENERATE_REPCODES 0 /* Disabled repcode fuzzing for now */ +#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 18) /* Fixed size 256KB literals buffer */ +#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */ + +#define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static uint32_t FUZZ_RDG_rand(uint32_t* src) +{ + static const uint32_t prime1 = 2654435761U; + static const uint32_t prime2 = 2246822519U; + uint32_t rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = FUZZ_RDG_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} /* Make a pseudorandom string - this simple function exists to avoid * taking a dependency on datagen.h to have RDG_genBuffer(). We don't need anything fancy. */ static char *generatePseudoRandomString(char *str, size_t size) { const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; + uint32_t seed = 0; if (size) { --size; for (size_t n = 0; n < size; n++) { - int key = rand() % (int) (sizeof charset - 1); + int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1); str[n] = charset[key]; } } @@ -49,26 +66,31 @@ static char *generatePseudoRandomString(char *str, size_t size) { } /* Returns size of source buffer */ -static size_t decodeSequences(void* dst, const ZSTD_Sequence* generatedSequences, size_t nbSequences, - const void* literals, size_t literalsSize, const void* dict, size_t dictSize) { - const uint8_t* ip = literals; +static size_t decodeSequences(void* dst, size_t nbSequences, + size_t literalsSize, const void* dict, size_t dictSize) { + const uint8_t* litPtr = literalsBuffer; + const uint8_t* const litBegin = literalsBuffer; + const uint8_t* const litEnd = literalsBuffer + literalsSize; const uint8_t* dictPtr = dict; uint8_t* op = dst; + const uint8_t* const oend = dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE; size_t generatedSrcBufferSize = 0; size_t bytesWritten = 0; + uint32_t lastLLSize; - /* Note that src is a literals buffer */ for (size_t i = 0; i < nbSequences; ++i) { - assert(generatedSequences[i].matchLength != 0); - assert(generatedSequences[i].offset != 0); + FUZZ_ASSERT(generatedSequences[i].matchLength != 0); + FUZZ_ASSERT(generatedSequences[i].offset != 0); - ZSTD_memcpy(op, ip, generatedSequences[i].litLength); + if (litPtr + generatedSequences[i].litLength > litEnd) { + litPtr = litBegin; + } + ZSTD_memcpy(op, litPtr, generatedSequences[i].litLength); bytesWritten += generatedSequences[i].litLength; op += generatedSequences[i].litLength; - ip += generatedSequences[i].litLength; - literalsSize -= generatedSequences[i].litLength; + litPtr += generatedSequences[i].litLength; - assert(generatedSequences[i].offset != 0); + FUZZ_ASSERT(generatedSequences[i].offset != 0); /* Copy over the match */ { size_t matchLength = generatedSequences[i].matchLength; size_t j = 0; @@ -88,21 +110,25 @@ static size_t decodeSequences(void* dst, const ZSTD_Sequence* generatedSequences op[j] = op[j-(int)generatedSequences[i].offset]; } op += j; - assert(generatedSequences[i].matchLength == j + k); + FUZZ_ASSERT(generatedSequences[i].matchLength == j + k); bytesWritten += generatedSequences[i].matchLength; } } generatedSrcBufferSize = bytesWritten; - assert(ip <= literals + literalsSize); - ZSTD_memcpy(op, ip, literalsSize); + FUZZ_ASSERT(litPtr <= litEnd); + lastLLSize = (uint32_t)(litEnd - litPtr); + if (lastLLSize <= oend - op) { + ZSTD_memcpy(op, litPtr, lastLLSize); + generatedSrcBufferSize += lastLLSize; + } return generatedSrcBufferSize; } /* Returns nb sequences generated * TODO: Add repcode fuzzing once we support repcode match splits */ -static size_t generateRandomSequences(ZSTD_Sequence* generatedSequences, FUZZ_dataProducer_t* producer, - size_t literalsSize, size_t dictSize, +static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, + size_t literalsSizeLimit, size_t dictSize, size_t windowLog) { uint32_t bytesGenerated = 0; uint32_t nbSeqGenerated = 0; @@ -114,10 +140,11 @@ static size_t generateRandomSequences(ZSTD_Sequence* generatedSequences, FUZZ_da uint32_t isFirstSequence = 1; uint32_t windowSize = 1 << windowLog; - while (bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE && !FUZZ_dataProducer_empty(producer)) { - litLength = isFirstSequence ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSize) - : FUZZ_dataProducer_uint32Range(producer, 0, literalsSize); - literalsSize -= litLength; + while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ + && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE + && !FUZZ_dataProducer_empty(producer)) { + litLength = isFirstSequence && dictSize == 0 ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSizeLimit) + : FUZZ_dataProducer_uint32Range(producer, 0, literalsSizeLimit); bytesGenerated += litLength; if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { break; @@ -139,9 +166,9 @@ static size_t generateRandomSequences(ZSTD_Sequence* generatedSequences, FUZZ_da static size_t roundTripTest(void *result, size_t resultCapacity, void *compressed, size_t compressedCapacity, - const void *src, size_t srcSize, + size_t srcSize, const void *dict, size_t dictSize, - const ZSTD_Sequence* generatedSequences, size_t generatedSequencesSize, + size_t generatedSequencesSize, size_t wLog, unsigned cLevel, unsigned hasDict) { size_t cSize; @@ -156,18 +183,13 @@ static size_t roundTripTest(void *result, size_t resultCapacity, /* TODO: Add block delim mode fuzzing */ ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters); if (hasDict) { - cdict = ZSTD_createCDict(dict, dictSize, cLevel); - FUZZ_ASSERT(cdict); - ZSTD_CCtx_refCDict(cctx, cdict); - - ddict = ZSTD_createDDict(dict, dictSize); - FUZZ_ASSERT(ddict); - ZSTD_DCtx_refDDict(dctx, ddict); + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize)); + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize)); } cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity, generatedSequences, generatedSequencesSize, - src, srcSize); + generatedSrc, srcSize); FUZZ_ZASSERT(cSize); dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); FUZZ_ZASSERT(dSize); @@ -187,12 +209,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size_t rBufSize; void* cBuf; size_t cBufSize; - void* generatedSrc; size_t generatedSrcSize; - ZSTD_Sequence* generatedSequences; size_t nbSequences; - void* literalsBuffer; - size_t literalsSize; void* dictBuffer; size_t dictSize = 0; unsigned hasDict; @@ -200,9 +218,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) int cLevel; FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); - literalsSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_LITERALS_MAXSIZE); - literalsBuffer = FUZZ_malloc(literalsSize); - literalsBuffer = generatePseudoRandomString(literalsBuffer, literalsSize); + if (literalsBuffer == NULL) { + literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE); + literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE); + } hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1); if (hasDict) { @@ -210,14 +229,18 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) dictBuffer = FUZZ_malloc(dictSize); dictBuffer = generatePseudoRandomString(dictBuffer, dictSize); } - // Generate window log first so we dont generate offsets too large + /* Generate window log first so we dont generate offsets too large */ wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); cLevel = FUZZ_dataProducer_int32Range(producer, (int)ZSTD_minCLevel, (int)ZSTD_maxCLevel); - generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); - generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); - nbSequences = generateRandomSequences(generatedSequences, producer, literalsSize, dictSize, wLog); - generatedSrcSize = decodeSequences(generatedSrc, generatedSequences, nbSequences, literalsBuffer, literalsSize, dictBuffer, dictSize); + if (!generatedSequences) { + generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ); + } + if (!generatedSrc) { + generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); + } + nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog); + generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize); cBufSize = ZSTD_compressBound(generatedSrcSize); cBuf = FUZZ_malloc(cBufSize); @@ -236,9 +259,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size_t const result = roundTripTest(rBuf, rBufSize, cBuf, cBufSize, - generatedSrc, generatedSrcSize, + generatedSrcSize, dictBuffer, dictSize, - generatedSequences, nbSequences, + nbSequences, wLog, cLevel, hasDict); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size"); @@ -246,13 +269,16 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) free(rBuf); free(cBuf); - free(generatedSequences); - free(generatedSrc); - free(literalsBuffer); FUZZ_dataProducer_free(producer); + if (hasDict) { + free(dictBuffer); + } #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; + free(generatedSequences); generatedSequences = NULL; + free(generatedSrc); generatedSrc = NULL; + free(literalsBuffer); literalsBuffer = NULL; #endif return 0; -} \ No newline at end of file +} From a73a07b189c1eef5a53c3f1588088b663197210e Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Tue, 17 Nov 2020 09:57:10 -0500 Subject: [PATCH 5/7] Add a bound for matchlength dependent on window size --- tests/fuzz/Makefile | 2 +- tests/fuzz/sequence_compression_api.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index b309fa9d9..36232a8cf 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -28,7 +28,7 @@ PRGDIR = ../../programs FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \ - -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 -DDEBUGLEVEL=5 $(CPPFLAGS) + -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS) FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef \ diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index b44bf147a..97667ef5c 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -134,6 +134,7 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, uint32_t nbSeqGenerated = 0; uint32_t litLength; uint32_t matchLength; + uint32_t matchBound; uint32_t offset; uint32_t offsetBound; uint32_t repCode = 0; @@ -143,6 +144,7 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE && !FUZZ_dataProducer_empty(producer)) { + matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE; litLength = isFirstSequence && dictSize == 0 ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSizeLimit) : FUZZ_dataProducer_uint32Range(producer, 0, literalsSizeLimit); bytesGenerated += litLength; @@ -151,7 +153,16 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, } offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize; offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound); - matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, ZSTD_FUZZ_MATCHLENGTH_MAXSIZE); + if (dictSize > 0 && bytesGenerated <= windowSize) { + uint32_t bytesToReachWindowSize = windowSize - bytesGenerated; + if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) { + offset = FUZZ_dataProducer_uint32Range(producer, 1, windowSize); + } else { + matchBound = bytesToReachWindowSize > ZSTD_FUZZ_MATCHLENGTH_MAXSIZE ? + ZSTD_FUZZ_MATCHLENGTH_MAXSIZE : bytesToReachWindowSize; + } + } + matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound); bytesGenerated += matchLength; if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { break; From 5b0c8f0a7c0db3c449451eeded7cb86562e42000 Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Wed, 18 Nov 2020 10:25:48 -0500 Subject: [PATCH 6/7] Add appropriate bound to matchlengths, and reduce srcSize max --- tests/fuzz/sequence_compression_api.c | 29 +++++++++++++++++---------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index 97667ef5c..bc34b3915 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -9,8 +9,10 @@ */ /** - * This fuzz target performs a zstd round-trip test (compress & decompress), - * compares the result with the original, and calls abort() on corruption. + * This fuzz target performs a zstd round-trip test by generating an arbitrary + * array of sequences, generating the associated source buffer, calling + * ZSTD_compressSequences(), and then decompresses and compares the result with + * the original generated source buffer. */ #define ZSTD_STATIC_LINKING_ONLY @@ -30,12 +32,13 @@ static void* literalsBuffer = NULL; static void* generatedSrc = NULL; static ZSTD_Sequence* generatedSequences = NULL; -#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 25) /* Allow up to 32MB generated data */ +#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */ #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */ #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */ #define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 18) /* Fixed size 256KB literals buffer */ #define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */ +/* Deterministic random number generator */ #define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) static uint32_t FUZZ_RDG_rand(uint32_t* src) { @@ -50,7 +53,7 @@ static uint32_t FUZZ_RDG_rand(uint32_t* src) } /* Make a pseudorandom string - this simple function exists to avoid - * taking a dependency on datagen.h to have RDG_genBuffer(). We don't need anything fancy. + * taking a dependency on datagen.h to have RDG_genBuffer(). */ static char *generatePseudoRandomString(char *str, size_t size) { const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; @@ -124,7 +127,7 @@ static size_t decodeSequences(void* dst, size_t nbSequences, return generatedSrcBufferSize; } -/* Returns nb sequences generated +/* Returns nb sequences generated * TODO: Add repcode fuzzing once we support repcode match splits */ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, @@ -141,7 +144,7 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, uint32_t isFirstSequence = 1; uint32_t windowSize = 1 << windowLog; - while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ + while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE && !FUZZ_dataProducer_empty(producer)) { matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE; @@ -154,9 +157,14 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize; offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound); if (dictSize > 0 && bytesGenerated <= windowSize) { - uint32_t bytesToReachWindowSize = windowSize - bytesGenerated; + /* Prevent match length from being such that it would be associated with an offset too large + * from the decoder's perspective. If not possible (match would be too small), + * then reduce the offset if necessary. + */ + size_t bytesToReachWindowSize = windowSize - bytesGenerated; if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) { - offset = FUZZ_dataProducer_uint32Range(producer, 1, windowSize); + uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound; + offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound); } else { matchBound = bytesToReachWindowSize > ZSTD_FUZZ_MATCHLENGTH_MAXSIZE ? ZSTD_FUZZ_MATCHLENGTH_MAXSIZE : bytesToReachWindowSize; @@ -241,8 +249,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) dictBuffer = generatePseudoRandomString(dictBuffer, dictSize); } /* Generate window log first so we dont generate offsets too large */ - wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - cLevel = FUZZ_dataProducer_int32Range(producer, (int)ZSTD_minCLevel, (int)ZSTD_maxCLevel); + wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX_32); + cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22); if (!generatedSequences) { generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ); @@ -252,7 +260,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) } nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog); generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize); - cBufSize = ZSTD_compressBound(generatedSrcSize); cBuf = FUZZ_malloc(cBufSize); From c502cd33e56e4d8bd9788d10915443260001026b Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Thu, 19 Nov 2020 17:34:39 -0500 Subject: [PATCH 7/7] Fix generating 1 too few characters in random string generator --- tests/fuzz/sequence_compression_api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index bc34b3915..8d225b1d7 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -59,7 +59,6 @@ static char *generatePseudoRandomString(char *str, size_t size) { const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; uint32_t seed = 0; if (size) { - --size; for (size_t n = 0; n < size; n++) { int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1); str[n] = charset[key];