diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index f2b9e03ed..4a9f6b7c8 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -4179,18 +4179,28 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
     /* compression stage */
 #ifdef ZSTD_MULTITHREAD
     if (cctx->appliedParams.nbWorkers > 0) {
+        int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
+        size_t flushMin;
+        assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */);
         if (cctx->cParamsChanged) {
             ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
             cctx->cParamsChanged = 0;
         }
-        {   size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
+        do {
+            flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
             if ( ZSTD_isError(flushMin)
               || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
                 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
             }
-            DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
-            return flushMin;
-    }   }
+            FORWARD_IF_ERROR(flushMin);
+        } while (forceMaxProgress && flushMin != 0 && output->pos < output->size);
+        DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
+        /* Either we don't require maximum forward progress, we've finished the
+         * flush, or we are out of output space.
+         */
+        assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size);
+        return flushMin;
+    }
 #endif
     FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) );
     DEBUGLOG(5, "completed ZSTD_compressStream2");
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index aa7f6f580..d8f14882d 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -427,8 +427,13 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
     size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
     if (ZSTD_isError(result)) return result;    /* invalid header */
     RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    /* Skip the dictID check in fuzzing mode, because it makes the search
+     * harder.
+     */
     RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
                     dictionary_wrong);
+#endif
     if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
     return 0;
 }
@@ -783,7 +788,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
 
 size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
+    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, dctx->ddict);
 }
 
 
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index 180a68ae8..ed5a02f98 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -627,19 +627,20 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
   return 1;
 }
 
-void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers)
+void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
 {
   const double ratio = (double)nbDmers / maxDictSize;
   if (ratio >= 10) {
       return;
   }
-  DISPLAYLEVEL(1, "WARNING: The maximum dictionary size %u is too large "
-                  "compared to the source size %u! "
-                  "size(source)/size(dictionary) = %f, but it should be >= "
-                  "10! This may lead to a subpar dictionary! We recommend "
-                  "training on sources at least 10x, and up to 100x the "
-                  "size of the dictionary!\n", (U32)maxDictSize,
-                  (U32)nbDmers, ratio);
+  LOCALDISPLAYLEVEL(displayLevel, 1,
+                    "WARNING: The maximum dictionary size %u is too large "
+                    "compared to the source size %u! "
+                    "size(source)/size(dictionary) = %f, but it should be >= "
+                    "10! This may lead to a subpar dictionary! We recommend "
+                    "training on sources at least 10x, and up to 100x the "
+                    "size of the dictionary!\n", (U32)maxDictSize,
+                    (U32)nbDmers, ratio);
 }
 
 COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
@@ -744,7 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
                       parameters.d, parameters.splitPoint)) {
     return ERROR(GENERIC);
   }
-  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize);
+  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
   if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
     DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
     COVER_ctx_destroy(&ctx);
@@ -1060,7 +1061,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
       return ERROR(GENERIC);
     }
     if (!warned) {
-      COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize);
+      COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
       warned = 1;
     }
     /* Loop through k reusing the same context */
diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h
index 71c520e9e..27e6fb7a3 100644
--- a/lib/dictBuilder/cover.h
+++ b/lib/dictBuilder/cover.h
@@ -65,7 +65,7 @@ COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
 /**
  * Warns the user when their corpus is too small.
  */
-void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers);
+void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
 
 /**
  *  Checks total compressed size of a dictionary
diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c
index 8cb89c938..6cf37026f 100644
--- a/lib/dictBuilder/fastcover.c
+++ b/lib/dictBuilder/fastcover.c
@@ -570,7 +570,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
       DISPLAYLEVEL(1, "Failed to initialize context\n");
       return ERROR(GENERIC);
     }
-    COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers);
+    COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
     /* Build the dictionary */
     DISPLAYLEVEL(2, "Building dictionary\n");
     {
@@ -673,7 +673,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
         return ERROR(GENERIC);
       }
       if (!warned) {
-        COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers);
+        COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
         warned = 1;
       }
       /* Loop through k reusing the same context */
diff --git a/lib/zstd.h b/lib/zstd.h
index dc6348659..0c9ebe5b6 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -577,6 +577,11 @@ typedef struct ZSTD_outBuffer_s {
 *  The caller must check if input has been entirely consumed.
 *  If not, the caller must make some room to receive more compressed data,
 *  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
 * @return : provides a minimum amount of data remaining to be flushed from internal buffers
 *           or an error code, which can be tested using ZSTD_isError().
 *
@@ -586,6 +591,8 @@ typedef struct ZSTD_outBuffer_s {
 *  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
 *  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
 *  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
 *  @return : 0 if internal buffers are entirely flushed,
 *            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
 *            or an error code, which can be tested using ZSTD_isError().
@@ -596,6 +603,8 @@ typedef struct ZSTD_outBuffer_s {
 *  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
 *  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
 *  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
 *  @return : 0 if frame fully completed and fully flushed,
 *            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
 *            or an error code, which can be tested using ZSTD_isError().
@@ -613,11 +622,13 @@ typedef enum {
     ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
     ZSTD_e_flush=1,    /* flush any data provided so far,
                         * it creates (at least) one new block, that can be decoded immediately on reception;
-                        * frame will continue: any future data can still reference previously compressed data, improving compression. */
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
     ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
                         * note that frame is only closed after compressed data is fully flushed (return value == 0).
                         * After that point, any additional data starts a new frame.
-                        * note : each frame is independent (does not reference any content from previous frame). */
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
 } ZSTD_EndDirective;
 
 /*! ZSTD_compressStream2() :
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index 8a22ad1c5..12ec9524b 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -27,7 +27,7 @@ PRGDIR = ../../programs
 
 FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
 	-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
-	$(CPPFLAGS)
+	-DZSTD_MULTITHREAD $(CPPFLAGS)
 FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
 	-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
 	-Wstrict-prototypes -Wundef \
@@ -36,7 +36,7 @@ FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
 	-g -fno-omit-frame-pointer
 FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS)
 FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS)
-FUZZ_LDFLAGS := $(LDFLAGS)
+FUZZ_LDFLAGS := -pthread $(LDFLAGS)
 FUZZ_ARFLAGS := $(ARFLAGS)
 FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS)
 
@@ -46,11 +46,13 @@ FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c
 ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c
 ZSTDCOMP_SRC   := $(ZSTDDIR)/compress/*.c
 ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c
+ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c
 FUZZ_SRC       := \
 	$(FUZZ_SRC) \
 	$(ZSTDDECOMP_SRC) \
 	$(ZSTDCOMMON_SRC) \
-	$(ZSTDCOMP_SRC)
+	$(ZSTDCOMP_SRC) \
+	$(ZSTDDICT_SRC)
 
 FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))
 
@@ -65,7 +67,9 @@ FUZZ_TARGETS :=       \
 	block_round_trip  \
 	simple_decompress \
 	stream_decompress \
-	block_decompress
+	block_decompress  \
+	dictionary_round_trip \
+	dictionary_decompress
 
 all: $(FUZZ_TARGETS)
 
@@ -90,6 +94,12 @@ stream_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) stream_decompress.o
 block_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) block_decompress.o
 	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) block_decompress.o $(LIB_FUZZING_ENGINE) -o $@
 
+dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_round_trip.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@
+
+dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_decompress.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@
+
 libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
 	$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
 
diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c
new file mode 100644
index 000000000..7d3a7678a
--- /dev/null
+++ b/tests/fuzz/dictionary_decompress.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target attempts to decompress the fuzzed data with the dictionary
+ * decompression function to ensure the decompressor never crashes. It does not
+ * fuzz the dictionary.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+
+static ZSTD_DCtx *dctx = NULL;
+static void* rBuf = NULL;
+static size_t bufSize = 0;
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    FUZZ_dict_t dict;
+    size_t neededBufSize;
+
+    uint32_t seed = FUZZ_seed(&src, &size);
+    neededBufSize = MAX(20 * size, (size_t)256 << 10);
+
+    /* Allocate all buffers and contexts if not already allocated */
+    if (neededBufSize > bufSize) {
+        free(rBuf);
+        rBuf = malloc(neededBufSize);
+        bufSize = neededBufSize;
+        FUZZ_ASSERT(rBuf);
+    }
+    if (!dctx) {
+        dctx = ZSTD_createDCtx();
+        FUZZ_ASSERT(dctx);
+    }
+    dict = FUZZ_train(src, size, &seed);
+    if (FUZZ_rand32(&seed, 0, 1) == 0) {
+        ZSTD_decompress_usingDict(dctx,
+                rBuf, neededBufSize,
+                src, size,
+                dict.buff, dict.size);
+    } else {
+        FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
+                dctx, dict.buff, dict.size,
+                (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
+                (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2)));
+        ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size);
+    }
+
+    free(dict.buff);
+#ifndef STATEFUL_FUZZING
+    ZSTD_freeDCtx(dctx); dctx = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c
new file mode 100644
index 000000000..e28c65c98
--- /dev/null
+++ b/tests/fuzz/dictionary_round_trip.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target performs a zstd round-trip test (compress & decompress) with
+ * a dictionary, compares the result with the original, and calls abort() on
+ * corruption.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+
+static const int kMaxClevel = 19;
+
+static ZSTD_CCtx *cctx = NULL;
+static ZSTD_DCtx *dctx = NULL;
+static uint32_t seed;
+
+static size_t roundTripTest(void *result, size_t resultCapacity,
+                            void *compressed, size_t compressedCapacity,
+                            const void *src, size_t srcSize)
+{
+    ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
+    FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed);
+    size_t cSize;
+    if ((FUZZ_rand(&seed) & 15) == 0) {
+        int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
+
+        cSize = ZSTD_compress_usingDict(cctx,
+                compressed, compressedCapacity,
+                src, srcSize,
+                dict.buff, dict.size,
+                cLevel);
+    } else {
+        dictContentType = FUZZ_rand32(&seed, 0, 2);
+        FUZZ_setRandomParameters(cctx, srcSize, &seed);
+        /* Disable checksum so we can use sizes smaller than compress bound. */
+        FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
+        FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
+                cctx, dict.buff, dict.size,
+                (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
+                dictContentType));
+        cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
+    }
+    FUZZ_ZASSERT(cSize);
+    FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
+        dctx, dict.buff, dict.size,
+        (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
+        dictContentType));
+    {
+        size_t const ret = ZSTD_decompressDCtx(
+                dctx, result, resultCapacity, compressed, cSize);
+        free(dict.buff);
+        return ret;
+    }
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    size_t const rBufSize = size;
+    void* rBuf = malloc(rBufSize);
+    size_t cBufSize = ZSTD_compressBound(size);
+    void* cBuf;
+
+    seed = FUZZ_seed(&src, &size);
+    /* Half of the time fuzz with a 1 byte smaller output size.
+     * This will still succeed because we force the checksum to be disabled,
+     * giving us 4 bytes of overhead.
+     */
+    cBufSize -= FUZZ_rand32(&seed, 0, 1);
+    cBuf = malloc(cBufSize);
+
+    if (!cctx) {
+        cctx = ZSTD_createCCtx();
+        FUZZ_ASSERT(cctx);
+    }
+    if (!dctx) {
+        dctx = ZSTD_createDCtx();
+        FUZZ_ASSERT(dctx);
+    }
+
+    {
+        size_t const result =
+            roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size);
+        FUZZ_ZASSERT(result);
+        FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
+        FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
+    }
+    free(rBuf);
+    free(cBuf);
+#ifndef STATEFUL_FUZZING
+    ZSTD_freeCCtx(cctx); cctx = NULL;
+    ZSTD_freeDCtx(dctx); dctx = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index 693762985..ee27015a5 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -34,6 +34,8 @@ TARGETS = [
     'simple_decompress',
     'stream_decompress',
     'block_decompress',
+    'dictionary_round_trip',
+    'dictionary_decompress',
 ]
 ALL_TARGETS = TARGETS + ['all']
 FUZZ_RNG_SEED_SIZE = 4
diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c
index 1553d436c..658c685f4 100644
--- a/tests/fuzz/regression_driver.c
+++ b/tests/fuzz/regression_driver.c
@@ -40,8 +40,13 @@ int main(int argc, char const **argv) {
     size_t readSize;
     FILE *file;
 
-    /* Check that it is a regular file, and that the fileSize is valid */
-    FUZZ_ASSERT_MSG(UTIL_isRegularFile(fileName), fileName);
+    /* Check that it is a regular file, and that the fileSize is valid.
+     * If it is not a regular file, then it may have been deleted since we
+     * constructed the list, so just skip it.
+     */
+    if (!UTIL_isRegularFile(fileName)) {
+      continue;
+    }
     FUZZ_ASSERT_MSG(fileSize <= kMaxFileSize, fileName);
     /* Ensure we have a large enough buffer allocated */
     if (fileSize > bufferSize) {
diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c
index 83608b6e7..7e3b66098 100644
--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@@ -25,9 +25,6 @@ static const int kMaxClevel = 19;
 
 static ZSTD_CCtx *cctx = NULL;
 static ZSTD_DCtx *dctx = NULL;
-static void* cBuf = NULL;
-static void* rBuf = NULL;
-static size_t bufSize = 0;
 static uint32_t seed;
 
 static size_t roundTripTest(void *result, size_t resultCapacity,
@@ -36,16 +33,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
 {
     size_t cSize;
     if (FUZZ_rand(&seed) & 1) {
-        ZSTD_inBuffer in = {src, srcSize, 0};
-        ZSTD_outBuffer out = {compressed, compressedCapacity, 0};
-        size_t err;
-
-        ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
         FUZZ_setRandomParameters(cctx, srcSize, &seed);
-        err = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
-        FUZZ_ZASSERT(err);
-        FUZZ_ASSERT(err == 0);
-        cSize = out.pos;
+        cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
     } else {
         int const cLevel = FUZZ_rand(&seed) % kMaxClevel;
         cSize = ZSTD_compressCCtx(
@@ -57,20 +46,21 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
 
 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    size_t neededBufSize;
+    size_t const rBufSize = size;
+    void* rBuf = malloc(rBufSize);
+    size_t cBufSize = ZSTD_compressBound(size);
+    void* cBuf;
 
     seed = FUZZ_seed(&src, &size);
-    neededBufSize = ZSTD_compressBound(size);
+    /* Half of the time fuzz with a 1 byte smaller output size.
+     * This will still succeed because we don't use a dictionary, so the dictID
+     * field is empty, giving us 4 bytes of overhead.
+     */
+    cBufSize -= FUZZ_rand32(&seed, 0, 1);
+    cBuf = malloc(cBufSize);
+
+    FUZZ_ASSERT(cBuf && rBuf);
 
-    /* Allocate all buffers and contexts if not already allocated */
-    if (neededBufSize > bufSize) {
-        free(cBuf);
-        free(rBuf);
-        cBuf = malloc(neededBufSize);
-        rBuf = malloc(neededBufSize);
-        bufSize = neededBufSize;
-        FUZZ_ASSERT(cBuf && rBuf);
-    }
     if (!cctx) {
         cctx = ZSTD_createCCtx();
         FUZZ_ASSERT(cctx);
@@ -82,11 +72,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 
     {
         size_t const result =
-            roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size);
+            roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size);
         FUZZ_ZASSERT(result);
         FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size");
         FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!");
     }
+    free(rBuf);
+    free(cBuf);
 #ifndef STATEFUL_FUZZING
     ZSTD_freeCCtx(cctx); cctx = NULL;
     ZSTD_freeDCtx(dctx); dctx = NULL;
diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c
index 7ad571221..68e120d7e 100644
--- a/tests/fuzz/stream_decompress.c
+++ b/tests/fuzz/stream_decompress.c
@@ -62,9 +62,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
     if (!dstream) {
         dstream = ZSTD_createDStream();
         FUZZ_ASSERT(dstream);
-        FUZZ_ASSERT(!ZSTD_isError(ZSTD_initDStream(dstream)));
     } else {
-        FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream)));
+        FUZZ_ZASSERT(ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only));
     }
 
     while (size > 0) {
@@ -73,7 +72,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
             ZSTD_outBuffer out = makeOutBuffer();
             size_t const rc = ZSTD_decompressStream(dstream, &out, &in);
             if (ZSTD_isError(rc)) goto error;
-            if (rc == 0) FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream)));
         }
     }
 
diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c
index d903bcb29..d13c2dbe7 100644
--- a/tests/fuzz/stream_round_trip.c
+++ b/tests/fuzz/stream_round_trip.c
@@ -63,7 +63,7 @@ static size_t compress(uint8_t *dst, size_t capacity,
         ZSTD_inBuffer in = makeInBuffer(&src, &srcSize);
         /* Mode controls the action. If mode == -1 we pick a new mode */
         int mode = -1;
-        while (in.pos < in.size) {
+        while (in.pos < in.size || mode != -1) {
             ZSTD_outBuffer out = makeOutBuffer(dst, capacity);
             /* Previous action finished, pick a new mode. */
             if (mode == -1) mode = FUZZ_rand(&seed) % 10;
diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c
index 10163e151..0e64400e6 100644
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -8,10 +8,14 @@
  */
 
 #define ZSTD_STATIC_LINKING_ONLY
+#define ZDICT_STATIC_LINKING_ONLY
+
+#include <string.h>
 
 #include "zstd_helpers.h"
 #include "fuzz_helpers.h"
 #include "zstd.h"
+#include "zdict.h"
 
 static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value)
 {
@@ -71,7 +75,6 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
     setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, state);
     setRand(cctx, ZSTD_c_checksumFlag, 0, 1, state);
     setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, state);
-    setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
     /* Select long distance matchig parameters */
     setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, state);
     setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state);
@@ -81,4 +84,54 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
             state);
     setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN,
             ZSTD_LDM_HASHRATELOG_MAX, state);
+    /* Set misc parameters */
+    setRand(cctx, ZSTD_c_nbWorkers, 0, 2, state);
+    setRand(cctx, ZSTD_c_rsyncable, 0, 1, state);
+    setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state);
+    setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state);
+    setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
+}
+
+FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)
+{
+    size_t const dictSize = MAX(srcSize / 8, 1024);
+    size_t const totalSampleSize = dictSize * 11;
+    FUZZ_dict_t dict = { malloc(dictSize), dictSize };
+    char* const samples = (char*)malloc(totalSampleSize);
+    unsigned nbSamples = 100;
+    size_t* const samplesSizes = (size_t*)malloc(sizeof(size_t) * nbSamples);
+    size_t pos = 0;
+    size_t sample = 0;
+    ZDICT_fastCover_params_t params;
+    FUZZ_ASSERT(dict.buff && samples && samplesSizes);
+
+    for (sample = 0; sample < nbSamples; ++sample) {
+      size_t const remaining = totalSampleSize - pos;
+      size_t const offset = FUZZ_rand32(state, 0, MAX(srcSize, 1) - 1);
+      size_t const limit = MIN(srcSize - offset, remaining);
+      size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
+      memcpy(samples + pos, src + offset, toCopy);
+      pos += toCopy;
+      samplesSizes[sample] = toCopy;
+
+    }
+    memset(samples + pos, 0, totalSampleSize - pos);
+
+    memset(&params, 0, sizeof(params));
+    params.accel = 5;
+    params.k = 40;
+    params.d = 8;
+    params.f = 14;
+    params.zParams.compressionLevel = 1;
+    dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize,
+        samples, samplesSizes, nbSamples, params);
+    if (ZSTD_isError(dict.size)) {
+        free(dict.buff);
+        memset(&dict, 0, sizeof(dict));
+    }
+
+    free(samplesSizes);
+    free(samples);
+
+    return dict;
 }
diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h
index 3856bebec..457e6e995 100644
--- a/tests/fuzz/zstd_helpers.h
+++ b/tests/fuzz/zstd_helpers.h
@@ -14,6 +14,8 @@
 #ifndef ZSTD_HELPERS_H
 #define ZSTD_HELPERS_H
 
+#define ZSTD_STATIC_LINKING_ONLY
+
 #include "zstd.h"
 #include <stdint.h>
 
@@ -27,6 +29,17 @@ ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state);
 ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state);
 ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state);
 
+typedef struct {
+  void* buff;
+  size_t size;
+} FUZZ_dict_t;
+
+/* Quickly train a dictionary from a source for fuzzing.
+ * NOTE: Don't use this to train production dictionaries, it is only optimized
+ * for speed, and doesn't care about dictionary quality.
+ */
+FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state);
+
 
 #ifdef __cplusplus
 }
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 7bc2f10cb..c38aef610 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -880,6 +880,19 @@ static int basicUnitTests(U32 seed, double compressibility)
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3i : Multithreaded ZSTD_compress2() with rsyncable : ", testNb++)
+    {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        /* Set rsyncable and don't give the ZSTD_compressBound(CNBuffSize) so
+         * ZSTDMT is forced to not take the shortcut.
+         */
+        CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
+        CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) );
+        CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) );
+        CHECK( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) );
+        ZSTD_freeCCtx(cctx);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "test%3i : setting multithreaded parameters : ", testNb++)
     {   ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
         int value;
@@ -1425,6 +1438,32 @@ static int basicUnitTests(U32 seed, double compressibility)
         }
         DISPLAYLEVEL(3, "OK \n");
 
+        ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+        CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, dictBuffer, dictSize) );
+        cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB));
+        CHECK_Z(cSize);
+        DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with dictionary : ", testNb++);
+        {
+            ZSTD_DCtx* dctx = ZSTD_createDCtx();
+            size_t ret;
+            /* We should fail to decompress without a dictionary. */
+            ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
+            ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
+            if (!ZSTD_isError(ret)) goto _output_error;
+            /* We should succeed to decompress with the dictionary. */
+            ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
+            CHECK_Z( ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictSize) );
+            CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+            /* The dictionary should presist across calls. */
+            CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+            /* When we reset the context the dictionary is cleared. */
+            ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
+            ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
+            if (!ZSTD_isError(ret)) goto _output_error;
+            ZSTD_freeDCtx(dctx);
+        }
+        DISPLAYLEVEL(3, "OK \n");
+
         DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++);
         { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
         dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,