diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f2b9e03ed..4a9f6b7c8 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4179,18 +4179,28 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, /* compression stage */ #ifdef ZSTD_MULTITHREAD if (cctx->appliedParams.nbWorkers > 0) { + int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + size_t flushMin; + assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); if (cctx->cParamsChanged) { ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); cctx->cParamsChanged = 0; } - { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + do { + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); if ( ZSTD_isError(flushMin) || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); } - DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); - return flushMin; - } } + FORWARD_IF_ERROR(flushMin); + } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); + return flushMin; + } #endif FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); DEBUGLOG(5, "completed ZSTD_compressStream2"); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index aa7f6f580..d8f14882d 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -427,8 +427,13 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); if (ZSTD_isError(result)) return result; /* invalid header */ RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), dictionary_wrong); +#endif if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); return 0; } @@ -783,7 +788,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, dctx->ddict); } diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 180a68ae8..ed5a02f98 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -627,19 +627,20 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, return 1; } -void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers) +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) { const double ratio = (double)nbDmers / maxDictSize; if (ratio >= 10) { return; } - DISPLAYLEVEL(1, "WARNING: The maximum dictionary size %u is too large " - "compared to the source size %u! " - "size(source)/size(dictionary) = %f, but it should be >= " - "10! This may lead to a subpar dictionary! We recommend " - "training on sources at least 10x, and up to 100x the " - "size of the dictionary!\n", (U32)maxDictSize, - (U32)nbDmers, ratio); + LOCALDISPLAYLEVEL(displayLevel, 1, + "WARNING: The maximum dictionary size %u is too large " + "compared to the source size %u! " + "size(source)/size(dictionary) = %f, but it should be >= " + "10! This may lead to a subpar dictionary! We recommend " + "training on sources at least 10x, and up to 100x the " + "size of the dictionary!\n", (U32)maxDictSize, + (U32)nbDmers, ratio); } COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, @@ -744,7 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( parameters.d, parameters.splitPoint)) { return ERROR(GENERIC); } - COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize); + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); COVER_ctx_destroy(&ctx); @@ -1060,7 +1061,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( return ERROR(GENERIC); } if (!warned) { - COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize); + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); warned = 1; } /* Loop through k reusing the same context */ diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h index 71c520e9e..27e6fb7a3 100644 --- a/lib/dictBuilder/cover.h +++ b/lib/dictBuilder/cover.h @@ -65,7 +65,7 @@ COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, /** * Warns the user when their corpus is too small. */ -void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers); +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); /** * Checks total compressed size of a dictionary diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c index 8cb89c938..6cf37026f 100644 --- a/lib/dictBuilder/fastcover.c +++ b/lib/dictBuilder/fastcover.c @@ -570,7 +570,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, DISPLAYLEVEL(1, "Failed to initialize context\n"); return ERROR(GENERIC); } - COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers); + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); /* Build the dictionary */ DISPLAYLEVEL(2, "Building dictionary\n"); { @@ -673,7 +673,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( return ERROR(GENERIC); } if (!warned) { - COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers); + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); warned = 1; } /* Loop through k reusing the same context */ diff --git a/lib/zstd.h b/lib/zstd.h index dc6348659..0c9ebe5b6 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -577,6 +577,11 @@ typedef struct ZSTD_outBuffer_s { * The caller must check if input has been entirely consumed. * If not, the caller must make some room to receive more compressed data, * and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. * @return : provides a minimum amount of data remaining to be flushed from internal buffers * or an error code, which can be tested using ZSTD_isError(). * @@ -586,6 +591,8 @@ typedef struct ZSTD_outBuffer_s { * In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. * You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the * operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. * @return : 0 if internal buffers are entirely flushed, * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), * or an error code, which can be tested using ZSTD_isError(). @@ -596,6 +603,8 @@ typedef struct ZSTD_outBuffer_s { * flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. * You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to * start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. * @return : 0 if frame fully completed and fully flushed, * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), * or an error code, which can be tested using ZSTD_isError(). @@ -613,11 +622,13 @@ typedef enum { ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ ZSTD_e_flush=1, /* flush any data provided so far, * it creates (at least) one new block, that can be decoded immediately on reception; - * frame will continue: any future data can still reference previously compressed data, improving compression. */ + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. * note that frame is only closed after compressed data is fully flushed (return value == 0). * After that point, any additional data starts a new frame. - * note : each frame is independent (does not reference any content from previous frame). */ + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ } ZSTD_EndDirective; /*! ZSTD_compressStream2() : diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 8a22ad1c5..12ec9524b 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -27,7 +27,7 @@ PRGDIR = ../../programs FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \ - $(CPPFLAGS) + -DZSTD_MULTITHREAD $(CPPFLAGS) FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-prototypes -Wundef \ @@ -36,7 +36,7 @@ FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -g -fno-omit-frame-pointer FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS) FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS) -FUZZ_LDFLAGS := $(LDFLAGS) +FUZZ_LDFLAGS := -pthread $(LDFLAGS) FUZZ_ARFLAGS := $(ARFLAGS) FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) @@ -46,11 +46,13 @@ FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c +ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c FUZZ_SRC := \ $(FUZZ_SRC) \ $(ZSTDDECOMP_SRC) \ $(ZSTDCOMMON_SRC) \ - $(ZSTDCOMP_SRC) + $(ZSTDCOMP_SRC) \ + $(ZSTDDICT_SRC) FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC))) @@ -65,7 +67,9 @@ FUZZ_TARGETS := \ block_round_trip \ simple_decompress \ stream_decompress \ - block_decompress + block_decompress \ + dictionary_round_trip \ + dictionary_decompress all: $(FUZZ_TARGETS) @@ -90,6 +94,12 @@ stream_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) stream_decompress.o block_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) block_decompress.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) block_decompress.o $(LIB_FUZZING_ENGINE) -o $@ +dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c new file mode 100644 index 000000000..7d3a7678a --- /dev/null +++ b/tests/fuzz/dictionary_decompress.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the dictionary + * decompression function to ensure the decompressor never crashes. It does not + * fuzz the dictionary. + */ + +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +static ZSTD_DCtx *dctx = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + FUZZ_dict_t dict; + size_t neededBufSize; + + uint32_t seed = FUZZ_seed(&src, &size); + neededBufSize = MAX(20 * size, (size_t)256 << 10); + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(rBuf); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(rBuf); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + dict = FUZZ_train(src, size, &seed); + if (FUZZ_rand32(&seed, 0, 1) == 0) { + ZSTD_decompress_usingDict(dctx, + rBuf, neededBufSize, + src, size, + dict.buff, dict.size); + } else { + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2))); + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size); + } + + free(dict.buff); +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c new file mode 100644 index 000000000..e28c65c98 --- /dev/null +++ b/tests/fuzz/dictionary_round_trip.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress) with + * a dictionary, compares the result with the original, and calls abort() on + * corruption. + */ + +#include +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +static const int kMaxClevel = 19; + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static uint32_t seed; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize) +{ + ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; + FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed); + size_t cSize; + if ((FUZZ_rand(&seed) & 15) == 0) { + int const cLevel = FUZZ_rand(&seed) % kMaxClevel; + + cSize = ZSTD_compress_usingDict(cctx, + compressed, compressedCapacity, + src, srcSize, + dict.buff, dict.size, + cLevel); + } else { + dictContentType = FUZZ_rand32(&seed, 0, 2); + FUZZ_setRandomParameters(cctx, srcSize, &seed); + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + dictContentType)); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + } + FUZZ_ZASSERT(cSize); + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + dictContentType)); + { + size_t const ret = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, cSize); + free(dict.buff); + return ret; + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t const rBufSize = size; + void* rBuf = malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void* cBuf; + + seed = FUZZ_seed(&src, &size); + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we force the checksum to be disabled, + * giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_rand32(&seed, 0, 1); + cBuf = malloc(cBufSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); + } + free(rBuf); + free(cBuf); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 693762985..ee27015a5 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -34,6 +34,8 @@ TARGETS = [ 'simple_decompress', 'stream_decompress', 'block_decompress', + 'dictionary_round_trip', + 'dictionary_decompress', ] ALL_TARGETS = TARGETS + ['all'] FUZZ_RNG_SEED_SIZE = 4 diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c index 1553d436c..658c685f4 100644 --- a/tests/fuzz/regression_driver.c +++ b/tests/fuzz/regression_driver.c @@ -40,8 +40,13 @@ int main(int argc, char const **argv) { size_t readSize; FILE *file; - /* Check that it is a regular file, and that the fileSize is valid */ - FUZZ_ASSERT_MSG(UTIL_isRegularFile(fileName), fileName); + /* Check that it is a regular file, and that the fileSize is valid. + * If it is not a regular file, then it may have been deleted since we + * constructed the list, so just skip it. + */ + if (!UTIL_isRegularFile(fileName)) { + continue; + } FUZZ_ASSERT_MSG(fileSize <= kMaxFileSize, fileName); /* Ensure we have a large enough buffer allocated */ if (fileSize > bufferSize) { diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 83608b6e7..7e3b66098 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -25,9 +25,6 @@ static const int kMaxClevel = 19; static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; -static void* cBuf = NULL; -static void* rBuf = NULL; -static size_t bufSize = 0; static uint32_t seed; static size_t roundTripTest(void *result, size_t resultCapacity, @@ -36,16 +33,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity, { size_t cSize; if (FUZZ_rand(&seed) & 1) { - ZSTD_inBuffer in = {src, srcSize, 0}; - ZSTD_outBuffer out = {compressed, compressedCapacity, 0}; - size_t err; - - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); FUZZ_setRandomParameters(cctx, srcSize, &seed); - err = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); - FUZZ_ZASSERT(err); - FUZZ_ASSERT(err == 0); - cSize = out.pos; + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); } else { int const cLevel = FUZZ_rand(&seed) % kMaxClevel; cSize = ZSTD_compressCCtx( @@ -57,20 +46,21 @@ static size_t roundTripTest(void *result, size_t resultCapacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t neededBufSize; + size_t const rBufSize = size; + void* rBuf = malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void* cBuf; seed = FUZZ_seed(&src, &size); - neededBufSize = ZSTD_compressBound(size); + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we don't use a dictionary, so the dictID + * field is empty, giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_rand32(&seed, 0, 1); + cBuf = malloc(cBufSize); + + FUZZ_ASSERT(cBuf && rBuf); - /* Allocate all buffers and contexts if not already allocated */ - if (neededBufSize > bufSize) { - free(cBuf); - free(rBuf); - cBuf = malloc(neededBufSize); - rBuf = malloc(neededBufSize); - bufSize = neededBufSize; - FUZZ_ASSERT(cBuf && rBuf); - } if (!cctx) { cctx = ZSTD_createCCtx(); FUZZ_ASSERT(cctx); @@ -82,11 +72,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const result = - roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } + free(rBuf); + free(cBuf); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index 7ad571221..68e120d7e 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -62,9 +62,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) if (!dstream) { dstream = ZSTD_createDStream(); FUZZ_ASSERT(dstream); - FUZZ_ASSERT(!ZSTD_isError(ZSTD_initDStream(dstream))); } else { - FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream))); + FUZZ_ZASSERT(ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only)); } while (size > 0) { @@ -73,7 +72,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) ZSTD_outBuffer out = makeOutBuffer(); size_t const rc = ZSTD_decompressStream(dstream, &out, &in); if (ZSTD_isError(rc)) goto error; - if (rc == 0) FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream))); } } diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index d903bcb29..d13c2dbe7 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -63,7 +63,7 @@ static size_t compress(uint8_t *dst, size_t capacity, ZSTD_inBuffer in = makeInBuffer(&src, &srcSize); /* Mode controls the action. If mode == -1 we pick a new mode */ int mode = -1; - while (in.pos < in.size) { + while (in.pos < in.size || mode != -1) { ZSTD_outBuffer out = makeOutBuffer(dst, capacity); /* Previous action finished, pick a new mode. */ if (mode == -1) mode = FUZZ_rand(&seed) % 10; diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 10163e151..0e64400e6 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -8,10 +8,14 @@ */ #define ZSTD_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY + +#include #include "zstd_helpers.h" #include "fuzz_helpers.h" #include "zstd.h" +#include "zdict.h" static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) { @@ -71,7 +75,6 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, state); setRand(cctx, ZSTD_c_checksumFlag, 0, 1, state); setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, state); - setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state); /* Select long distance matchig parameters */ setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, state); setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state); @@ -81,4 +84,54 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) state); setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX, state); + /* Set misc parameters */ + setRand(cctx, ZSTD_c_nbWorkers, 0, 2, state); + setRand(cctx, ZSTD_c_rsyncable, 0, 1, state); + setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state); + setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state); + setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state); +} + +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state) +{ + size_t const dictSize = MAX(srcSize / 8, 1024); + size_t const totalSampleSize = dictSize * 11; + FUZZ_dict_t dict = { malloc(dictSize), dictSize }; + char* const samples = (char*)malloc(totalSampleSize); + unsigned nbSamples = 100; + size_t* const samplesSizes = (size_t*)malloc(sizeof(size_t) * nbSamples); + size_t pos = 0; + size_t sample = 0; + ZDICT_fastCover_params_t params; + FUZZ_ASSERT(dict.buff && samples && samplesSizes); + + for (sample = 0; sample < nbSamples; ++sample) { + size_t const remaining = totalSampleSize - pos; + size_t const offset = FUZZ_rand32(state, 0, MAX(srcSize, 1) - 1); + size_t const limit = MIN(srcSize - offset, remaining); + size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); + memcpy(samples + pos, src + offset, toCopy); + pos += toCopy; + samplesSizes[sample] = toCopy; + + } + memset(samples + pos, 0, totalSampleSize - pos); + + memset(¶ms, 0, sizeof(params)); + params.accel = 5; + params.k = 40; + params.d = 8; + params.f = 14; + params.zParams.compressionLevel = 1; + dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, + samples, samplesSizes, nbSamples, params); + if (ZSTD_isError(dict.size)) { + free(dict.buff); + memset(&dict, 0, sizeof(dict)); + } + + free(samplesSizes); + free(samples); + + return dict; } diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h index 3856bebec..457e6e995 100644 --- a/tests/fuzz/zstd_helpers.h +++ b/tests/fuzz/zstd_helpers.h @@ -14,6 +14,8 @@ #ifndef ZSTD_HELPERS_H #define ZSTD_HELPERS_H +#define ZSTD_STATIC_LINKING_ONLY + #include "zstd.h" #include @@ -27,6 +29,17 @@ ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state); ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state); ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state); +typedef struct { + void* buff; + size_t size; +} FUZZ_dict_t; + +/* Quickly train a dictionary from a source for fuzzing. + * NOTE: Don't use this to train production dictionaries, it is only optimized + * for speed, and doesn't care about dictionary quality. + */ +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state); + #ifdef __cplusplus } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 7bc2f10cb..c38aef610 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -880,6 +880,19 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : Multithreaded ZSTD_compress2() with rsyncable : ", testNb++) + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + /* Set rsyncable and don't give the ZSTD_compressBound(CNBuffSize) so + * ZSTDMT is forced to not take the shortcut. + */ + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) ); + CHECK( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) ); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : setting multithreaded parameters : ", testNb++) { ZSTD_CCtx_params* params = ZSTD_createCCtxParams(); int value; @@ -1425,6 +1438,32 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, dictBuffer, dictSize) ); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + CHECK_Z(cSize); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with dictionary : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t ret; + /* We should fail to decompress without a dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + /* We should succeed to decompress with the dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictSize) ); + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* The dictionary should presist across calls. */ + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* When we reset the context the dictionary is cleared. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++); { U32 u; for (u=0; u