From bfcd5b81d7a1f3612503e0e6ef0c6fe275e7017a Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 8 Apr 2019 19:57:41 -0700 Subject: [PATCH 01/10] [libzstd] Don't check the dictID in fuzzing mode When `FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` is defined don't check the dictID. This check makes the fuzzers job harder, and it is at the very beginning. --- lib/decompress/zstd_decompress.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index aa7f6f580..14cc12a41 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -427,8 +427,13 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); if (ZSTD_isError(result)) return result; /* invalid header */ RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), dictionary_wrong); +#endif if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); return 0; } From e649fad7aacebb4df62257fa430f301b4a00ccdd Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 8 Apr 2019 20:00:18 -0700 Subject: [PATCH 02/10] [dictBuilder] Fix displayLevel for corpus warning Pass the displaylevel into the corpus warning, because it is used in fast cover and cover, so it needs to respect the local level. --- lib/dictBuilder/cover.c | 21 +++++++++++---------- lib/dictBuilder/cover.h | 2 +- lib/dictBuilder/fastcover.c | 4 ++-- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 180a68ae8..ed5a02f98 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -627,19 +627,20 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, return 1; } -void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers) +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) { const double ratio = (double)nbDmers / maxDictSize; if (ratio >= 10) { return; } - DISPLAYLEVEL(1, "WARNING: The maximum dictionary size %u is too large " - "compared to the source size %u! " - "size(source)/size(dictionary) = %f, but it should be >= " - "10! This may lead to a subpar dictionary! We recommend " - "training on sources at least 10x, and up to 100x the " - "size of the dictionary!\n", (U32)maxDictSize, - (U32)nbDmers, ratio); + LOCALDISPLAYLEVEL(displayLevel, 1, + "WARNING: The maximum dictionary size %u is too large " + "compared to the source size %u! " + "size(source)/size(dictionary) = %f, but it should be >= " + "10! This may lead to a subpar dictionary! We recommend " + "training on sources at least 10x, and up to 100x the " + "size of the dictionary!\n", (U32)maxDictSize, + (U32)nbDmers, ratio); } COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, @@ -744,7 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( parameters.d, parameters.splitPoint)) { return ERROR(GENERIC); } - COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize); + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); COVER_ctx_destroy(&ctx); @@ -1060,7 +1061,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( return ERROR(GENERIC); } if (!warned) { - COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize); + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); warned = 1; } /* Loop through k reusing the same context */ diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h index 71c520e9e..27e6fb7a3 100644 --- a/lib/dictBuilder/cover.h +++ b/lib/dictBuilder/cover.h @@ -65,7 +65,7 @@ COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, /** * Warns the user when their corpus is too small. */ -void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers); +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); /** * Checks total compressed size of a dictionary diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c index 8cb89c938..6cf37026f 100644 --- a/lib/dictBuilder/fastcover.c +++ b/lib/dictBuilder/fastcover.c @@ -570,7 +570,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, DISPLAYLEVEL(1, "Failed to initialize context\n"); return ERROR(GENERIC); } - COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers); + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); /* Build the dictionary */ DISPLAYLEVEL(2, "Building dictionary\n"); { @@ -673,7 +673,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( return ERROR(GENERIC); } if (!warned) { - COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers); + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); warned = 1; } /* Loop through k reusing the same context */ From f871b5144e1c3ac7b628fdece1032c22cb0dabd8 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 8 Apr 2019 20:01:38 -0700 Subject: [PATCH 03/10] [fuzz] Use the new advanced API --- tests/fuzz/simple_round_trip.c | 10 +--------- tests/fuzz/stream_decompress.c | 4 +--- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 83608b6e7..21ab32c7f 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -36,16 +36,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity, { size_t cSize; if (FUZZ_rand(&seed) & 1) { - ZSTD_inBuffer in = {src, srcSize, 0}; - ZSTD_outBuffer out = {compressed, compressedCapacity, 0}; - size_t err; - - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); FUZZ_setRandomParameters(cctx, srcSize, &seed); - err = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); - FUZZ_ZASSERT(err); - FUZZ_ASSERT(err == 0); - cSize = out.pos; + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); } else { int const cLevel = FUZZ_rand(&seed) % kMaxClevel; cSize = ZSTD_compressCCtx( diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index 7ad571221..68e120d7e 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -62,9 +62,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) if (!dstream) { dstream = ZSTD_createDStream(); FUZZ_ASSERT(dstream); - FUZZ_ASSERT(!ZSTD_isError(ZSTD_initDStream(dstream))); } else { - FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream))); + FUZZ_ZASSERT(ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only)); } while (size > 0) { @@ -73,7 +72,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) ZSTD_outBuffer out = makeOutBuffer(); size_t const rc = ZSTD_decompressStream(dstream, &out, &in); if (ZSTD_isError(rc)) goto error; - if (rc == 0) FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream))); } } From 462918560c0234ce69637f73ddc5193bad9515f1 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 8 Apr 2019 21:06:19 -0700 Subject: [PATCH 04/10] [fuzzer] Fix stream_round_trip for the new options --- tests/fuzz/stream_round_trip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index d903bcb29..d13c2dbe7 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -63,7 +63,7 @@ static size_t compress(uint8_t *dst, size_t capacity, ZSTD_inBuffer in = makeInBuffer(&src, &srcSize); /* Mode controls the action. If mode == -1 we pick a new mode */ int mode = -1; - while (in.pos < in.size) { + while (in.pos < in.size || mode != -1) { ZSTD_outBuffer out = makeOutBuffer(dst, capacity); /* Previous action finished, pick a new mode. */ if (mode == -1) mode = FUZZ_rand(&seed) % 10; From 7a1fde2957788c07b69772f48d95389118206806 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 8 Apr 2019 21:07:28 -0700 Subject: [PATCH 05/10] [fuzzer] Add dictionary fuzzers --- tests/fuzz/Makefile | 18 ++++-- tests/fuzz/dictionary_decompress.c | 57 +++++++++++++++++ tests/fuzz/dictionary_round_trip.c | 99 ++++++++++++++++++++++++++++++ tests/fuzz/fuzz.py | 2 + tests/fuzz/zstd_helpers.c | 55 ++++++++++++++++- tests/fuzz/zstd_helpers.h | 13 ++++ 6 files changed, 239 insertions(+), 5 deletions(-) create mode 100644 tests/fuzz/dictionary_decompress.c create mode 100644 tests/fuzz/dictionary_round_trip.c diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 8a22ad1c5..12ec9524b 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -27,7 +27,7 @@ PRGDIR = ../../programs FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \ - $(CPPFLAGS) + -DZSTD_MULTITHREAD $(CPPFLAGS) FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef \ @@ -36,7 +36,7 @@ FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -g -fno-omit-frame-pointer FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS) FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS) -FUZZ_LDFLAGS := $(LDFLAGS) +FUZZ_LDFLAGS := -pthread $(LDFLAGS) FUZZ_ARFLAGS := $(ARFLAGS) FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) @@ -46,11 +46,13 @@ FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c +ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c FUZZ_SRC := \ $(FUZZ_SRC) \ $(ZSTDDECOMP_SRC) \ $(ZSTDCOMMON_SRC) \ - $(ZSTDCOMP_SRC) + $(ZSTDCOMP_SRC) \ + $(ZSTDDICT_SRC) FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC))) @@ -65,7 +67,9 @@ FUZZ_TARGETS := \ block_round_trip \ simple_decompress \ stream_decompress \ - block_decompress + block_decompress \ + dictionary_round_trip \ + dictionary_decompress all: $(FUZZ_TARGETS) @@ -90,6 +94,12 @@ stream_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) stream_decompress.o block_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) block_decompress.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) block_decompress.o $(LIB_FUZZING_ENGINE) -o $@ +dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c new file mode 100644 index 000000000..bdecdef7a --- /dev/null +++ b/tests/fuzz/dictionary_decompress.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the dictionary + * decompression function to ensure the decompressor never crashes. It does not + * fuzz the dictionary. + */ + +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +static ZSTD_DCtx *dctx = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + uint32_t seed = FUZZ_seed(&src, &size); + neededBufSize = MAX(20 * size, (size_t)256 << 10); + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(rBuf); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(rBuf); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + { + FUZZ_dict_t dict = FUZZ_train(src, size, &seed); + ZSTD_decompress_usingDict(dctx, + rBuf, neededBufSize, + src, size, + dict.buff, dict.size); + free(dict.buff); + } + +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c new file mode 100644 index 000000000..57a2eff3b --- /dev/null +++ b/tests/fuzz/dictionary_round_trip.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress) with + * a dictionary, compares the result with the original, and calls abort() on + * corruption. + */ + +#include +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +static const int kMaxClevel = 19; + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static void* cBuf = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; +static uint32_t seed; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize) +{ + FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed); + size_t cSize; + if ((FUZZ_rand(&seed) & 15) == 0) { + int const cLevel = FUZZ_rand(&seed) % kMaxClevel; + + cSize = ZSTD_compress_usingDict(cctx, + compressed, compressedCapacity, + src, srcSize, + dict.buff, dict.size, + cLevel); + } else { + FUZZ_setRandomParameters(cctx, srcSize, &seed); + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict.buff, dict.size)); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + } + FUZZ_ZASSERT(cSize); + { + size_t const ret = ZSTD_decompress_usingDict(dctx, + result, resultCapacity, + compressed, cSize, + dict.buff, dict.size); + free(dict.buff); + return ret; + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t neededBufSize; + + seed = FUZZ_seed(&src, &size); + neededBufSize = ZSTD_compressBound(size); + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(cBuf); + free(rBuf); + cBuf = malloc(neededBufSize); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(cBuf && rBuf); + } + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); + } +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 693762985..ee27015a5 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -34,6 +34,8 @@ TARGETS = [ 'simple_decompress', 'stream_decompress', 'block_decompress', + 'dictionary_round_trip', + 'dictionary_decompress', ] ALL_TARGETS = TARGETS + ['all'] FUZZ_RNG_SEED_SIZE = 4 diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 10163e151..0e64400e6 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -8,10 +8,14 @@ */ #define ZSTD_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY + +#include #include "zstd_helpers.h" #include "fuzz_helpers.h" #include "zstd.h" +#include "zdict.h" static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) { @@ -71,7 +75,6 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, state); setRand(cctx, ZSTD_c_checksumFlag, 0, 1, state); setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, state); - setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state); /* Select long distance matchig parameters */ setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, state); setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state); @@ -81,4 +84,54 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) state); setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX, state); + /* Set misc parameters */ + setRand(cctx, ZSTD_c_nbWorkers, 0, 2, state); + setRand(cctx, ZSTD_c_rsyncable, 0, 1, state); + setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state); + setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state); + setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state); +} + +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state) +{ + size_t const dictSize = MAX(srcSize / 8, 1024); + size_t const totalSampleSize = dictSize * 11; + FUZZ_dict_t dict = { malloc(dictSize), dictSize }; + char* const samples = (char*)malloc(totalSampleSize); + unsigned nbSamples = 100; + size_t* const samplesSizes = (size_t*)malloc(sizeof(size_t) * nbSamples); + size_t pos = 0; + size_t sample = 0; + ZDICT_fastCover_params_t params; + FUZZ_ASSERT(dict.buff && samples && samplesSizes); + + for (sample = 0; sample < nbSamples; ++sample) { + size_t const remaining = totalSampleSize - pos; + size_t const offset = FUZZ_rand32(state, 0, MAX(srcSize, 1) - 1); + size_t const limit = MIN(srcSize - offset, remaining); + size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); + memcpy(samples + pos, src + offset, toCopy); + pos += toCopy; + samplesSizes[sample] = toCopy; + + } + memset(samples + pos, 0, totalSampleSize - pos); + + memset(¶ms, 0, sizeof(params)); + params.accel = 5; + params.k = 40; + params.d = 8; + params.f = 14; + params.zParams.compressionLevel = 1; + dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, + samples, samplesSizes, nbSamples, params); + if (ZSTD_isError(dict.size)) { + free(dict.buff); + memset(&dict, 0, sizeof(dict)); + } + + free(samplesSizes); + free(samples); + + return dict; } diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h index 3856bebec..457e6e995 100644 --- a/tests/fuzz/zstd_helpers.h +++ b/tests/fuzz/zstd_helpers.h @@ -14,6 +14,8 @@ #ifndef ZSTD_HELPERS_H #define ZSTD_HELPERS_H +#define ZSTD_STATIC_LINKING_ONLY + #include "zstd.h" #include @@ -27,6 +29,17 @@ ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state); ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state); ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state); +typedef struct { + void* buff; + size_t size; +} FUZZ_dict_t; + +/* Quickly train a dictionary from a source for fuzzing. + * NOTE: Don't use this to train production dictionaries, it is only optimized + * for speed, and doesn't care about dictionary quality. + */ +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state); + #ifdef __cplusplus } From 48a6427d22f290157b8acc3f7c03c0f762a768be Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 9 Apr 2019 16:24:17 -0700 Subject: [PATCH 06/10] [libzstd] Fix ZSTD_compress2() for multithreaded compression `ZSTD_compress2()` wouldn't wait for multithreaded compression to finish. We didn't find this because ZSTDMT will block when it can compress all in one go, but it can't do that if it doesn't have enough output space, or if `ZSTD_c_rsyncable` is enabled. Since we will already sometimes block when using `ZSTD_e_end`, I've changed `ZSTD_e_end` and `ZSTD_e_flush` to guarantee maximum forward progress. This simplifies the API, and helps users avoid the easy bug that was made in `ZSTD_compress2()` * Found by the libfuzzer fuzzers. * Added a test case that catches the problem. * I will make the fuzzers sometimes allocate less than `ZSTD_compressBound()` output space. --- lib/compress/zstd_compress.c | 18 ++++++++++++++---- lib/zstd.h | 15 +++++++++++++-- tests/fuzzer.c | 13 +++++++++++++ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f2b9e03ed..4a9f6b7c8 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4179,18 +4179,28 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, /* compression stage */ #ifdef ZSTD_MULTITHREAD if (cctx->appliedParams.nbWorkers > 0) { + int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + size_t flushMin; + assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); if (cctx->cParamsChanged) { ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); cctx->cParamsChanged = 0; } - { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + do { + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); if ( ZSTD_isError(flushMin) || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); } - DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); - return flushMin; - } } + FORWARD_IF_ERROR(flushMin); + } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); + return flushMin; + } #endif FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); DEBUGLOG(5, "completed ZSTD_compressStream2"); diff --git a/lib/zstd.h b/lib/zstd.h index dc6348659..0c9ebe5b6 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -577,6 +577,11 @@ typedef struct ZSTD_outBuffer_s { * The caller must check if input has been entirely consumed. * If not, the caller must make some room to receive more compressed data, * and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. * @return : provides a minimum amount of data remaining to be flushed from internal buffers * or an error code, which can be tested using ZSTD_isError(). * @@ -586,6 +591,8 @@ typedef struct ZSTD_outBuffer_s { * In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. * You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the * operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. * @return : 0 if internal buffers are entirely flushed, * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), * or an error code, which can be tested using ZSTD_isError(). @@ -596,6 +603,8 @@ typedef struct ZSTD_outBuffer_s { * flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. * You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to * start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. * @return : 0 if frame fully completed and fully flushed, * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), * or an error code, which can be tested using ZSTD_isError(). @@ -613,11 +622,13 @@ typedef enum { ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ ZSTD_e_flush=1, /* flush any data provided so far, * it creates (at least) one new block, that can be decoded immediately on reception; - * frame will continue: any future data can still reference previously compressed data, improving compression. */ + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. * note that frame is only closed after compressed data is fully flushed (return value == 0). * After that point, any additional data starts a new frame. - * note : each frame is independent (does not reference any content from previous frame). */ + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ } ZSTD_EndDirective; /*! ZSTD_compressStream2() : diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 7bc2f10cb..cfb07eb10 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -880,6 +880,19 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : Multithreaded ZSTD_compress2() with rsyncable : ", testNb++) + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + /* Set rsyncable and don't give the ZSTD_compressBound(CNBuffSize) so + * ZSTDMT is forced to not take the shortcut. + */ + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) ); + CHECK( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) ); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : setting multithreaded parameters : ", testNb++) { ZSTD_CCtx_params* params = ZSTD_createCCtxParams(); int value; From c5d70b7dbb0a74272a5541d9e6ab523b3df0b31b Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 9 Apr 2019 16:47:59 -0700 Subject: [PATCH 07/10] [fuzzer] Sometimes fuzz with one less output byte Zstd compression sometimes does different stuff when it has at least `ZSTD_compressBound()` output bytes, or not. Half of the time fuzz with `ZSTD_compressBound() - 1` output bytes. Ensure that we have at least one byte of overhead by disabling either the dictionary ID or checksum. --- tests/fuzz/dictionary_round_trip.c | 30 +++++++++++++++--------------- tests/fuzz/simple_round_trip.c | 30 +++++++++++++++--------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c index 57a2eff3b..ceadbbc9d 100644 --- a/tests/fuzz/dictionary_round_trip.c +++ b/tests/fuzz/dictionary_round_trip.c @@ -24,9 +24,6 @@ static const int kMaxClevel = 19; static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; -static void* cBuf = NULL; -static void* rBuf = NULL; -static size_t bufSize = 0; static uint32_t seed; static size_t roundTripTest(void *result, size_t resultCapacity, @@ -45,6 +42,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity, cLevel); } else { FUZZ_setRandomParameters(cctx, srcSize, &seed); + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict.buff, dict.size)); cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); } @@ -61,20 +60,19 @@ static size_t roundTripTest(void *result, size_t resultCapacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t neededBufSize; + size_t const rBufSize = size; + void* rBuf = malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void* cBuf; seed = FUZZ_seed(&src, &size); - neededBufSize = ZSTD_compressBound(size); + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we force the checksum to be disabled, + * giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_rand32(&seed, 0, 1); + cBuf = malloc(cBufSize); - /* Allocate all buffers and contexts if not already allocated */ - if (neededBufSize > bufSize) { - free(cBuf); - free(rBuf); - cBuf = malloc(neededBufSize); - rBuf = malloc(neededBufSize); - bufSize = neededBufSize; - FUZZ_ASSERT(cBuf && rBuf); - } if (!cctx) { cctx = ZSTD_createCCtx(); FUZZ_ASSERT(cctx); @@ -86,11 +84,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const result = - roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } + free(rBuf); + free(cBuf); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 21ab32c7f..7e3b66098 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -25,9 +25,6 @@ static const int kMaxClevel = 19; static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; -static void* cBuf = NULL; -static void* rBuf = NULL; -static size_t bufSize = 0; static uint32_t seed; static size_t roundTripTest(void *result, size_t resultCapacity, @@ -49,20 +46,21 @@ static size_t roundTripTest(void *result, size_t resultCapacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t neededBufSize; + size_t const rBufSize = size; + void* rBuf = malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void* cBuf; seed = FUZZ_seed(&src, &size); - neededBufSize = ZSTD_compressBound(size); + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we don't use a dictionary, so the dictID + * field is empty, giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_rand32(&seed, 0, 1); + cBuf = malloc(cBufSize); + + FUZZ_ASSERT(cBuf && rBuf); - /* Allocate all buffers and contexts if not already allocated */ - if (neededBufSize > bufSize) { - free(cBuf); - free(rBuf); - cBuf = malloc(neededBufSize); - rBuf = malloc(neededBufSize); - bufSize = neededBufSize; - FUZZ_ASSERT(cBuf && rBuf); - } if (!cctx) { cctx = ZSTD_createCCtx(); FUZZ_ASSERT(cctx); @@ -74,11 +72,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const result = - roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } + free(rBuf); + free(cBuf); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; From 824aaa695f811d081fb29ee0fb198e6cb0efdcfd Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 9 Apr 2019 17:59:27 -0700 Subject: [PATCH 08/10] [libzstd] Fix ZSTD_decompressDCtx() with a dictionary * `ZSTD_decompressDCtx()` did not use the dictionary loaded by `ZSTD_DCtx_loadDictionary()`. * Add a unit test. * A stacked diff uses `ZSTD_decompressDCtx()` in the `dictionary_round_trip` and `dictionary_decompress` fuzzers. --- lib/decompress/zstd_decompress.c | 2 +- tests/fuzzer.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 14cc12a41..d8f14882d 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -788,7 +788,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, dctx->ddict); } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index cfb07eb10..c38aef610 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1438,6 +1438,32 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, dictBuffer, dictSize) ); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + CHECK_Z(cSize); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with dictionary : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t ret; + /* We should fail to decompress without a dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + /* We should succeed to decompress with the dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictSize) ); + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* The dictionary should presist across calls. */ + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* When we reset the context the dictionary is cleared. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++); { U32 u; for (u=0; u Date: Tue, 9 Apr 2019 18:01:49 -0700 Subject: [PATCH 09/10] [fuzzer] Make the regression_driver work while fuzzers are active --- tests/fuzz/regression_driver.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c index 1553d436c..658c685f4 100644 --- a/tests/fuzz/regression_driver.c +++ b/tests/fuzz/regression_driver.c @@ -40,8 +40,13 @@ int main(int argc, char const **argv) { size_t readSize; FILE *file; - /* Check that it is a regular file, and that the fileSize is valid */ - FUZZ_ASSERT_MSG(UTIL_isRegularFile(fileName), fileName); + /* Check that it is a regular file, and that the fileSize is valid. + * If it is not a regular file, then it may have been deleted since we + * constructed the list, so just skip it. + */ + if (!UTIL_isRegularFile(fileName)) { + continue; + } FUZZ_ASSERT_MSG(fileSize <= kMaxFileSize, fileName); /* Ensure we have a large enough buffer allocated */ if (fileSize > bufferSize) { From c45dec12c59beafacd14507bee0150a69cac7726 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 9 Apr 2019 18:02:22 -0700 Subject: [PATCH 10/10] [fuzzer] Use ZSTD_DCtx_loadDictionary_advanced() half the time --- tests/fuzz/dictionary_decompress.c | 13 ++++++++++--- tests/fuzz/dictionary_round_trip.c | 17 ++++++++++++----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c index bdecdef7a..7d3a7678a 100644 --- a/tests/fuzz/dictionary_decompress.c +++ b/tests/fuzz/dictionary_decompress.c @@ -25,6 +25,7 @@ static size_t bufSize = 0; int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { + FUZZ_dict_t dict; size_t neededBufSize; uint32_t seed = FUZZ_seed(&src, &size); @@ -41,15 +42,21 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) dctx = ZSTD_createDCtx(); FUZZ_ASSERT(dctx); } - { - FUZZ_dict_t dict = FUZZ_train(src, size, &seed); + dict = FUZZ_train(src, size, &seed); + if (FUZZ_rand32(&seed, 0, 1) == 0) { ZSTD_decompress_usingDict(dctx, rBuf, neededBufSize, src, size, dict.buff, dict.size); - free(dict.buff); + } else { + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2))); + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size); } + free(dict.buff); #ifndef STATEFUL_FUZZING ZSTD_freeDCtx(dctx); dctx = NULL; #endif diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c index ceadbbc9d..e28c65c98 100644 --- a/tests/fuzz/dictionary_round_trip.c +++ b/tests/fuzz/dictionary_round_trip.c @@ -30,6 +30,7 @@ static size_t roundTripTest(void *result, size_t resultCapacity, void *compressed, size_t compressedCapacity, const void *src, size_t srcSize) { + ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed); size_t cSize; if ((FUZZ_rand(&seed) & 15) == 0) { @@ -41,18 +42,24 @@ static size_t roundTripTest(void *result, size_t resultCapacity, dict.buff, dict.size, cLevel); } else { + dictContentType = FUZZ_rand32(&seed, 0, 2); FUZZ_setRandomParameters(cctx, srcSize, &seed); /* Disable checksum so we can use sizes smaller than compress bound. */ FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); - FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict.buff, dict.size)); + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + dictContentType)); cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); } FUZZ_ZASSERT(cSize); + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + dictContentType)); { - size_t const ret = ZSTD_decompress_usingDict(dctx, - result, resultCapacity, - compressed, cSize, - dict.buff, dict.size); + size_t const ret = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, cSize); free(dict.buff); return ret; }