From 782bfb858afb88f014271f8efbb7bef909cee6db Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Thu, 15 Aug 2019 16:41:34 +0200
Subject: [PATCH 01/16] fixed very minor inefficiency (nbSeq==127)

The nbSeq "short" format (1-byte)
is compatible with any value < 128.

However, the code would cautiously only accept values < 127.
This is not an error, because the general 2-bytes format
is compatible with small values < 128.
Hence the inefficiency never triggered any warning.

Spotted by Intel's Smita Kumar.
---
 lib/compress/zstd_compress.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index cd73db13b..b4ae4e877 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1981,12 +1981,17 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
     /* Sequences Header */
     RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
                     dstSize_tooSmall);
-    if (nbSeq < 0x7F)
+    if (nbSeq < 128) {
         *op++ = (BYTE)nbSeq;
-    else if (nbSeq < LONGNBSEQ)
-        op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
-    else
-        op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    } else if (nbSeq < LONGNBSEQ) {
+        op[0] = (BYTE)((nbSeq>>8) + 0x80);
+        op[1] = (BYTE)nbSeq;
+        op+=2;
+    } else {
+        op[0]=0xFF;
+        MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
+        op+=3;
+    }
     assert(op <= oend);
     if (nbSeq==0) {
         /* Copy the old tables over as if we repeated them */

From b81d7cc6a070ec67d9d511ad96c0fe0ab98f81e9 Mon Sep 17 00:00:00 2001
From: Ed Maste <emaste@freebsd.org>
Date: Thu, 15 Aug 2019 21:17:06 -0400
Subject: [PATCH 02/16] remove extraneous doubled ;s

---
 lib/dictBuilder/zdict.c | 2 +-
 programs/dibio.c        | 2 +-
 tests/fuzzer.c          | 6 +++---
 tests/zbufftest.c       | 4 ++--
 tests/zstreamtest.c     | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index ee21ee1a9..4a263d822 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -571,7 +571,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
     unsigned const prime1 = 2654435761U;
     unsigned const prime2 = 2246822519U;
     unsigned acc = prime1;
-    size_t p=0;;
+    size_t p=0;
     for (p=0; p<length; p++) {
         acc *= prime2;
         ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
diff --git a/programs/dibio.c b/programs/dibio.c
index 12eb32680..ea4bb4bf1 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -201,7 +201,7 @@ static void DiB_fillNoise(void* buffer, size_t length)
     unsigned const prime1 = 2654435761U;
     unsigned const prime2 = 2246822519U;
     unsigned acc = prime1;
-    size_t p=0;;
+    size_t p=0;
 
     for (p=0; p<length; p++) {
         acc *= prime2;
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 2de7c0096..4788c356a 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -401,7 +401,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
     DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
     {   size_t u;
         for (u=0; u<CNBuffSize; u++) {
-            if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;;
+            if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;
     }   }
     DISPLAYLEVEL(3, "OK \n");
 
@@ -758,7 +758,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
             {   size_t u;
                 for (u=0; u<CNBuffSize; u++) {
                     if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u])
-                        goto _output_error;;
+                        goto _output_error;
             }   }
             DISPLAYLEVEL(3, "OK \n");
 
@@ -839,7 +839,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
         DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
         {   size_t u;
             for (u=0; u<CNBuffSize; u++) {
-                if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;;
+                if (((BYTE*)decodedBuffer)[u] != ((BYTE*)CNBuffer)[u]) goto _output_error;
         }   }
         DISPLAYLEVEL(3, "OK \n");
 
diff --git a/tests/zbufftest.c b/tests/zbufftest.c
index 8cbde3f4f..944148262 100644
--- a/tests/zbufftest.c
+++ b/tests/zbufftest.c
@@ -184,7 +184,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
     DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
     {   size_t i;
         for (i=0; i<CNBufferSize; i++) {
-            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
     }   }
     DISPLAYLEVEL(4, "OK \n");
 
@@ -213,7 +213,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
     DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
     {   size_t i;
         for (i=0; i<CNBufferSize; i++) {
-            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
     }   }
     DISPLAYLEVEL(4, "OK \n");
 
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 97d4e33e1..56f16766e 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -481,7 +481,7 @@ static int basicUnitTests(U32 seed, double compressibility)
     DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
     {   size_t i;
         for (i=0; i<CNBufferSize; i++) {
-            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;;
+            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
     }   }
     DISPLAYLEVEL(3, "OK \n");
 

From 1e07eb4d5ce620a6dddc284d1f20072de52c1531 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Fri, 16 Aug 2019 15:13:42 +0200
Subject: [PATCH 03/16] clarifications on the meaning of field `Block_Size`

following comments from Intel's Smita Kumar.
---
 doc/zstd_compression_format.md | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 7d02426a5..39a8f1776 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
 
 ### Version
 
-0.3.2 (17/07/19)
+0.3.3 (16/08/19)
 
 
 Introduction
@@ -358,6 +358,7 @@ It may be followed by an optional `Content_Checksum`
 __`Block_Type`__
 
 The next 2 bits represent the `Block_Type`.
+`Block_Type` influences the meaning of `Block_Size`.
 There are 4 block types :
 
 |    Value     |      0      |      1      |         2          |     3     |
@@ -384,9 +385,12 @@ There are 4 block types :
 __`Block_Size`__
 
 The upper 21 bits of `Block_Header` represent the `Block_Size`.
-`Block_Size` is the size of the block excluding the header.
-A block can contain any number of bytes (even zero), up to
-`Block_Maximum_Decompressed_Size`, which is the smallest of:
+When `Block_Type` is `Compressed_Block` or `Raw_Block`,
+`Block_Size` is the size of `Block_Content`, hence excluding `Block_Header`.  
+When `Block_Type` is `RLE_Block`, `Block_Content`’s size is always 1,
+and `Block_Size` represents the nb of times this byte must be repeated.
+A block can contain and decompress into any number of bytes (even zero),
+up to `Block_Maximum_Decompressed_Size`, which is the smallest of:
 -  Window_Size
 -  128 KB
 
@@ -1653,6 +1657,7 @@ or at least provide a meaningful error code explaining for which reason it canno
 
 Version changes
 ---------------
+- 0.3.3 : clarifications for field Block_Size
 - 0.3.2 : remove additional block size restriction on compressed blocks
 - 0.3.1 : minor clarification regarding offset history update rules
 - 0.3.0 : minor edits to match RFC8478

From af0c9501d120ff83a7e0871a0b18ad7237813a8c Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Thu, 15 Aug 2019 23:57:55 -0700
Subject: [PATCH 04/16] Add --stream-size=# command

---
 programs/fileio.c  | 21 +++++++++++++++++++--
 programs/fileio.h  |  1 +
 programs/zstdcli.c |  6 +++++-
 tests/playTests.sh | 22 +++++++++++++++++++++-
 4 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/programs/fileio.c b/programs/fileio.c
index 569a410c1..82d70075b 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -304,6 +304,7 @@ struct FIO_prefs_s {
     int ldmMinMatch;
     int ldmBucketSizeLog;
     int ldmHashRateLog;
+    size_t streamSrcSize;
     size_t targetCBlockSize;
     ZSTD_literalCompressionMode_e literalCompressionMode;
 
@@ -349,6 +350,7 @@ FIO_prefs_t* FIO_createPreferences(void)
     ret->ldmMinMatch = 0;
     ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
     ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
+    ret->streamSrcSize = 0;
     ret->targetCBlockSize = 0;
     ret->literalCompressionMode = ZSTD_lcm_auto;
     return ret;
@@ -418,6 +420,10 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
     prefs->rsyncable = rsyncable;
 }
 
+void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
+    prefs->streamSrcSize = streamSrcSize;
+}
+
 void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
     prefs->targetCBlockSize = targetCBlockSize;
 }
@@ -698,9 +704,20 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
 #endif
         /* dictionary */
-        CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );  /* set the value temporarily for dictionary loading, to adapt compression parameters */
+        /* set the pledged size for dictionary loading, to adapt compression parameters */
+        if (srcSize == ZSTD_CONTENTSIZE_UNKNOWN && prefs->streamSrcSize > 0) {
+          /* unknown source size; use the declared stream size and disable writing this size to frame during compression */
+          CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
+          CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) );
+        } else {
+          /* use the known source size for adaption */
+          CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
+        }
         CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
-        CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) );  /* reset */
+        if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN || prefs->streamSrcSize == 0) {
+            /* reset pledge when src size is known or stream size is declared */
+            CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) );
+        }
 
         free(dictBuffer);
     }
diff --git a/programs/fileio.h b/programs/fileio.h
index 311f8c0e1..13f6f1d05 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -71,6 +71,7 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse);  /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
+void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
 void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
 void FIO_setLiteralCompressionMode(
         FIO_prefs_t* const prefs,
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index de286cdf2..401e1ee2c 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -141,6 +141,7 @@ static int usage_advanced(const char* programName)
     DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
     DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
     DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
+    DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n");
     DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
 #ifdef ZSTD_MULTITHREAD
     DISPLAY( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
@@ -588,6 +589,7 @@ int main(int argCount, const char* argv[])
     const char* suffix = ZSTD_EXTENSION;
     unsigned maxDictSize = g_defaultMaxDictSize;
     unsigned dictID = 0;
+    size_t streamSrcSize = 0;
     size_t targetCBlockSize = 0;
     int dictCLevel = g_defaultDictCLevel;
     unsigned dictSelect = g_defaultSelectivityLevel;
@@ -745,6 +747,7 @@ int main(int argCount, const char* argv[])
                     if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
+                    if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--long")) {
                         unsigned ldmWindowLog = 0;
@@ -1150,6 +1153,7 @@ int main(int argCount, const char* argv[])
         FIO_setAdaptMin(prefs, adaptMin);
         FIO_setAdaptMax(prefs, adaptMax);
         FIO_setRsyncable(prefs, rsyncable);
+        FIO_setStreamSrcSize(prefs, streamSrcSize);
         FIO_setTargetCBlockSize(prefs, targetCBlockSize);
         FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
         if (adaptMin > cLevel) cLevel = adaptMin;
@@ -1160,7 +1164,7 @@ int main(int argCount, const char* argv[])
         else
           operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
 #else
-        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
+        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)streamSrcSize; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
         DISPLAY("Compression not supported \n");
 #endif
     } else {  /* decompression or test */
diff --git a/tests/playTests.sh b/tests/playTests.sh
index 69387321f..431a53a18 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -108,7 +108,6 @@ else
 fi
 
 
-
 println "\n===>  simple tests "
 
 ./datagen > tmp
@@ -1020,4 +1019,25 @@ test -f dictionary
 rm -f tmp* dictionary
 
 
+println "\n===>  stream-size mode"
+
+./datagen -g11000 > tmp
+println "test : basic file compression vs sized streaming compression"
+$ZSTD -14 -f tmp -o tmp.zst |& tee file.out
+cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 |& tee stream_sized.out
+
+file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g')
+stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g')
+rm file.out stream_sized.out
+
+ratio_diff=$(echo $file_ratio - $stream_sized_ratio | bc)
+if [ $(echo "(100 * $ratio_diff) > 5" | bc -l) == 1 ]
+then
+  die "greater than 0.05% difference between file and sized-streaming compression"
+fi
+
+println "test : incorrect stream size"
+cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
+
+
 rm -f tmp*

From 85d07c6c474abc8f16b069332fbaba3054819d14 Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Fri, 16 Aug 2019 12:49:21 -0700
Subject: [PATCH 05/16] Tweak stdout, stderr redirection in new  playTests

---
 tests/playTests.sh | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/playTests.sh b/tests/playTests.sh
index 431a53a18..c516aa712 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -1023,8 +1023,8 @@ println "\n===>  stream-size mode"
 
 ./datagen -g11000 > tmp
 println "test : basic file compression vs sized streaming compression"
-$ZSTD -14 -f tmp -o tmp.zst |& tee file.out
-cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 |& tee stream_sized.out
+$ZSTD -14 -f tmp -o tmp.zst 2>&1 | tee file.out
+cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 2>&1 | tee stream_sized.out
 
 file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g')
 stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g')
@@ -1035,7 +1035,10 @@ if [ $(echo "(100 * $ratio_diff) > 5" | bc -l) == 1 ]
 then
   die "greater than 0.05% difference between file and sized-streaming compression"
 fi
-
+println "test : sized streaming compression and decompression"
+cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
+$ZSTD -df tmp.zst -o tmp_decompress
+cmp tmp tmp_decompress || die "difference between original and decompressed file"
 println "test : incorrect stream size"
 cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
 

From 97bb38635c0616ab024af0a71b3d115fcbc4382a Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sat, 17 Aug 2019 08:04:42 +0200
Subject: [PATCH 06/16] `number` instead of `nb`

suggested by @terrelln
---
 doc/zstd_compression_format.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 39a8f1776..111dd98ae 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -388,7 +388,7 @@ The upper 21 bits of `Block_Header` represent the `Block_Size`.
 When `Block_Type` is `Compressed_Block` or `Raw_Block`,
 `Block_Size` is the size of `Block_Content`, hence excluding `Block_Header`.  
 When `Block_Type` is `RLE_Block`, `Block_Content`’s size is always 1,
-and `Block_Size` represents the nb of times this byte must be repeated.
+and `Block_Size` represents the number of times this byte must be repeated.
 A block can contain and decompress into any number of bytes (even zero),
 up to `Block_Maximum_Decompressed_Size`, which is the smallest of:
 -  Window_Size

From c403b12f9dbab9ba3ab30fca6e1ce3c31d2f8923 Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Mon, 19 Aug 2019 09:01:31 -0700
Subject: [PATCH 07/16] Set pledged size just before compression

---
 programs/fileio.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/programs/fileio.c b/programs/fileio.c
index 82d70075b..75b271a8f 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -704,21 +704,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
 #endif
         /* dictionary */
-        /* set the pledged size for dictionary loading, to adapt compression parameters */
-        if (srcSize == ZSTD_CONTENTSIZE_UNKNOWN && prefs->streamSrcSize > 0) {
-          /* unknown source size; use the declared stream size and disable writing this size to frame during compression */
-          CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
-          CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) );
-        } else {
-          /* use the known source size for adaption */
-          CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
-        }
         CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
-        if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN || prefs->streamSrcSize == 0) {
-            /* reset pledge when src size is known or stream size is declared */
-            CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) );
-        }
-
         free(dictBuffer);
     }
 
@@ -1020,6 +1006,10 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs,
     /* init */
     if (fileSize != UTIL_FILESIZE_UNKNOWN) {
         CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
+    } else if (prefs->streamSrcSize > 0) {
+      /* unknown source size; use the declared stream size and disable writing this size to frame during compression */
+      CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
+      CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) );
     }
     (void)srcFileName;
 

From bbd83c2ab34dc498a63ff10e869e26fbe1e221a4 Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Mon, 19 Aug 2019 09:11:22 -0700
Subject: [PATCH 08/16] Update man page

---
 programs/zstd.1.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 3ab2667a0..e3f729282 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -152,6 +152,12 @@ the last one takes effect.
     This feature does not work with `--single-thread`. You probably don't want
     to use it with long range mode, since it will decrease the effectiveness of
     the synchronization points, but your milage may vary.
+* `--stream-size` :
+    When handling input from a stream, `zstd` must guess how large the source size
+    will be when optimizing compression parameters. If the stream size is relatively
+    small, this guess may be a poor one, resulting in a higher compression ratio than
+    expected. This feature will set the source size of a stream. Note that it must
+    be exact; incorrect stream sizes will cause an error.
 * `-D file`:
     use `file` as Dictionary to compress or decompress FILE(s)
 * `--no-dictID`:

From f781cf672bfd23dd467a6dcf08ba094180032c76 Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Mon, 19 Aug 2019 11:07:43 -0700
Subject: [PATCH 09/16] Remove extraneous parameter

---
 programs/fileio.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/programs/fileio.c b/programs/fileio.c
index 75b271a8f..f5ecf7296 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -639,7 +639,6 @@ typedef struct {
 
 static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
                                     const char* dictFileName, int cLevel,
-                                    U64 srcSize,
                                     ZSTD_compressionParameters comprParams) {
     cRess_t ress;
     memset(&ress, 0, sizeof(ress));
@@ -1371,7 +1370,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
     U64 const fileSize = UTIL_getFileSize(srcFileName);
     U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
 
-    cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
+    cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
     int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
 
 
@@ -1424,8 +1423,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
     int error = 0;
     U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
     U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
-    U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
-    cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
+    cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
 
     /* init */
     assert(outFileName != NULL || suffix != NULL);

From a24dc3a935a43ef8088711afadece9b94ae59a27 Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Mon, 19 Aug 2019 11:14:56 -0700
Subject: [PATCH 10/16] Remove extraneous variables

---
 programs/fileio.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/programs/fileio.c b/programs/fileio.c
index f5ecf7296..5492f9448 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1367,9 +1367,6 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
                          const char* dictFileName, int compressionLevel,
                          ZSTD_compressionParameters comprParams)
 {
-    U64 const fileSize = UTIL_getFileSize(srcFileName);
-    U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
-
     cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
     int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
 
@@ -1421,8 +1418,6 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
                                   ZSTD_compressionParameters comprParams)
 {
     int error = 0;
-    U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
-    U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
     cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
 
     /* init */

From 30bfa228e84500855e80e792f7de796257d99ab3 Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Mon, 19 Aug 2019 11:20:28 -0700
Subject: [PATCH 11/16] Keep content size flag set in stream size mode

---
 programs/fileio.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/programs/fileio.c b/programs/fileio.c
index 5492f9448..873013a51 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1006,9 +1006,8 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs,
     if (fileSize != UTIL_FILESIZE_UNKNOWN) {
         CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
     } else if (prefs->streamSrcSize > 0) {
-      /* unknown source size; use the declared stream size and disable writing this size to frame during compression */
+      /* unknown source size; use the declared stream size */
       CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
-      CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) );
     }
     (void)srcFileName;
 

From 3982935aefbd6e8c0a8ceb7d54afc5b97f188fc7 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Tue, 20 Aug 2019 11:33:33 -0700
Subject: [PATCH 12/16] [fuzz] Improve fuzzer build script and docs

* Remove the `make libFuzzer` target since it is broken and obsoleted
  by `CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer`. The
  new `-fsanitize=fuzzer` is much better because it works with MSAN
  by default.
* Improve the `./fuzz.py gen` command by making the input type explicit
  when creating a new target.
* Update the `README` for `--enable-fuzzer`.

Fixes #1727.
---
 tests/fuzz/Makefile  |  9 --------
 tests/fuzz/README.md | 19 ++++++++++------
 tests/fuzz/fuzz.py   | 53 ++++++++++++++++++++++++++++++--------------
 3 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index 8bf16b1fb..08dedd66f 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -113,15 +113,6 @@ zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
 libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
 	$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
 
-# Install libfuzzer (not usable for MSAN testing)
-# Provided for convenience. To use this library run make libFuzzer and
-# set LDFLAGS=-L.
-.PHONY: libFuzzer
-libFuzzer:
-	@$(RM) -rf Fuzzer
-	@git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer Fuzzer
-	@cd Fuzzer && ./build.sh
-
 corpora/%_seed_corpus.zip:
 	@mkdir -p corpora
 	$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip
diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
index 9e0bb259a..856a57f82 100644
--- a/tests/fuzz/README.md
+++ b/tests/fuzz/README.md
@@ -35,6 +35,8 @@ The environment variables can be overridden with the corresponding flags
 `--cc`, `--cflags`, etc.
 The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or
 `--lib-fuzzing-engine`, the default is `libregression.a`.
+Alternatively, you can use Clang's built in fuzzing engine with
+`--enable-fuzzer`.
 It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and
 coverage instrumentation `--enable-coverage`.
 It sets sane defaults which can be overridden with flags `--debug`,
@@ -51,22 +53,25 @@ The command used to run the fuzzer is printed for debugging.
 ## LibFuzzer
 
 ```
-# Build libfuzzer if necessary
-make libFuzzer
 # Build the fuzz targets
-./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++
+./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++
 # OR equivalently
-CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan
+CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan
 # Run the fuzzer
-./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4
+./fuzz.py libfuzzer TARGET <libfuzzer args like -jobs=4>
 ```
 
 where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc.
 
 ### MSAN
 
-Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library
-and libFuzzer with MSAN.
+Fuzzing with `libFuzzer` and `MSAN` is as easy as:
+
+```
+CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan
+./fuzz.py libfuzzer TARGET <libfuzzer args>
+```
+
 `fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`,
 `MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass
 the extra parameters only for MSAN.
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index d993209a0..faf8ce8ae 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -24,21 +24,38 @@ def abs_join(a, *p):
     return os.path.abspath(os.path.join(a, *p))
 
 
+class InputType(object):
+    RAW_DATA = 1
+    COMPRESSED_DATA = 2
+
+
+class FrameType(object):
+    ZSTD = 1
+    BLOCK = 2
+
+
+class TargetInfo(object):
+    def __init__(self, input_type, frame_type=FrameType.ZSTD):
+        self.input_type = input_type
+        self.frame_type = frame_type
+
+
 # Constants
 FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
 CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
-TARGETS = [
-    'simple_round_trip',
-    'stream_round_trip',
-    'block_round_trip',
-    'simple_decompress',
-    'stream_decompress',
-    'block_decompress',
-    'dictionary_round_trip',
-    'dictionary_decompress',
-    'zstd_frame_info',
-    'simple_compress',
-]
+TARGET_INFO = {
+    'simple_round_trip': TargetInfo(InputType.RAW_DATA),
+    'stream_round_trip': TargetInfo(InputType.RAW_DATA),
+    'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
+    'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+    'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+    'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
+    'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
+    'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
+    'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
+    'simple_compress': TargetInfo(InputType.RAW_DATA),
+}
+TARGETS = list(TARGET_INFO.keys())
 ALL_TARGETS = TARGETS + ['all']
 FUZZ_RNG_SEED_SIZE = 4
 
@@ -67,7 +84,7 @@ MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
 def create(r):
     d = os.path.abspath(r)
     if not os.path.isdir(d):
-        os.mkdir(d)
+        os.makedirs(d)
     return d
 
 
@@ -158,7 +175,7 @@ def compiler_version(cc, cxx):
         assert(b'clang' in cxx_version_bytes)
         compiler = 'clang'
     elif b'gcc' in cc_version_bytes:
-        assert(b'gcc' in cxx_version_bytes)
+        assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
         compiler = 'gcc'
     if compiler is not None:
         version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
@@ -699,7 +716,8 @@ def gen(args):
                 '-o{}'.format(decompressed),
             ]
 
-            if 'block_' in args.TARGET:
+            info = TARGET_INFO[args.TARGET]
+            if info.frame_type == FrameType.BLOCK:
                 cmd += [
                     '--gen-blocks',
                     '--max-block-size-log={}'.format(args.max_size_log)
@@ -710,10 +728,11 @@ def gen(args):
             print(' '.join(cmd))
             subprocess.check_call(cmd)
 
-            if '_round_trip' in args.TARGET:
+            if info.input_type == InputType.RAW_DATA:
                 print('using decompressed data in {}'.format(decompressed))
                 samples = decompressed
-            elif '_decompress' in args.TARGET:
+            else:
+                assert info.input_type == InputType.COMPRESSED_DATA
                 print('using compressed data in {}'.format(compressed))
                 samples = compressed
 

From 07f22d465d0f85aa00f20fc2f0b59a50ddfe494f Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Tue, 20 Aug 2019 17:13:04 -0700
Subject: [PATCH 13/16] [legacy] Fix buffer overflow in v0.2 and v0.4 raw
 literals decompression

Extends the fix in PR#1722 to v0.2 and v0.4. These aren't built into
zstd by default, and v0.5 onward are not affected.

I only add the `srcSize > BLOCKSIZE` check to v0.4 because the comments
say that it must hold, but the equivalent comment isn't present in v0.2.

Credit to OSS-Fuzz.
---
 lib/legacy/zstd_v02.c | 1 +
 lib/legacy/zstd_v04.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 793df6024..de0a4bd6b 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -2889,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
             const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
             if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
             {
+                if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
                 if (litSize > srcSize-3) return ERROR(corruption_detected);
                 memcpy(dctx->litBuffer, istart, litSize);
                 dctx->litPtr = dctx->litBuffer;
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 645a6e313..201ce2b69 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -2655,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
             const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
             if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
             {
+                if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
                 if (litSize > srcSize-3) return ERROR(corruption_detected);
                 memcpy(dctx->litBuffer, istart, litSize);
                 dctx->litPtr = dctx->litBuffer;
@@ -3034,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 {
     /* blockType == blockCompressed */
     const BYTE* ip = (const BYTE*)src;
+    size_t litCSize;
+
+    if (srcSize > BLOCKSIZE) return ERROR(corruption_detected);
 
     /* Decode literals sub-block */
-    size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+    litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
     if (ZSTD_isError(litCSize)) return litCSize;
     ip += litCSize;
     srcSize -= litCSize;

From b3540507f5239f9d0810c3b9e2b920601687738d Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Wed, 21 Aug 2019 10:27:54 -0700
Subject: [PATCH 14/16] Remove bc from play tests

---
 tests/playTests.sh | 41 +++++++++++++++++------------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/tests/playTests.sh b/tests/playTests.sh
index c516aa712..b74076763 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -408,6 +408,23 @@ println "compress multiple files including a missing one (notHere) : "
 $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
 
 
+println "\n===>  stream-size mode"
+
+./datagen -g11000 > tmp
+println "test : basic file compression vs sized streaming compression"
+file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
+stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c)
+if [ "$stream_size" -gt "$file_size" ]; then
+  die "hinted compression larger than expected"
+fi
+println "test : sized streaming compression and decompression"
+cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
+$ZSTD -df tmp.zst -o tmp_decompress
+cmp tmp tmp_decompress || die "difference between original and decompressed file"
+println "test : incorrect stream size"
+cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
+
+
 println "\n===>  dictionary tests "
 
 println "- test with raw dict (content only) "
@@ -1019,28 +1036,4 @@ test -f dictionary
 rm -f tmp* dictionary
 
 
-println "\n===>  stream-size mode"
-
-./datagen -g11000 > tmp
-println "test : basic file compression vs sized streaming compression"
-$ZSTD -14 -f tmp -o tmp.zst 2>&1 | tee file.out
-cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 2>&1 | tee stream_sized.out
-
-file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g')
-stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g')
-rm file.out stream_sized.out
-
-ratio_diff=$(echo $file_ratio - $stream_sized_ratio | bc)
-if [ $(echo "(100 * $ratio_diff) > 5" | bc -l) == 1 ]
-then
-  die "greater than 0.05% difference between file and sized-streaming compression"
-fi
-println "test : sized streaming compression and decompression"
-cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
-$ZSTD -df tmp.zst -o tmp_decompress
-cmp tmp tmp_decompress || die "difference between original and decompressed file"
-println "test : incorrect stream size"
-cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
-
-
 rm -f tmp*

From 2cdda8b3c43c23981941cdd97c1a517981d2339e Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Thu, 22 Aug 2019 09:13:28 -0700
Subject: [PATCH 15/16] Minor documentation update

---
 programs/zstd.1.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index e3f729282..ee5db9d63 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -144,6 +144,11 @@ the last one takes effect.
     Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
     _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
     when combined with multiple worker threads (>=2).
+* `--stream-size=#` :
+    When handling input from a stream, `zstd` must guess how large the source size
+    will be when optimizing compression parameters. This option sets the pledged source
+    size of a stream to eliminate that guesswork. Note that the pledged size must be exact;
+    incorrect stream sizes will cause an error.
 * `--rsyncable` :
     `zstd` will periodically synchronize the compression state to make the
     compressed file more rsync-friendly. There is a negligible impact to
@@ -152,12 +157,6 @@ the last one takes effect.
     This feature does not work with `--single-thread`. You probably don't want
     to use it with long range mode, since it will decrease the effectiveness of
     the synchronization points, but your milage may vary.
-* `--stream-size` :
-    When handling input from a stream, `zstd` must guess how large the source size
-    will be when optimizing compression parameters. If the stream size is relatively
-    small, this guess may be a poor one, resulting in a higher compression ratio than
-    expected. This feature will set the source size of a stream. Note that it must
-    be exact; incorrect stream sizes will cause an error.
 * `-D file`:
     use `file` as Dictionary to compress or decompress FILE(s)
 * `--no-dictID`:

From fd486a846abcbf6c92496dd4915d6a46bd4790c2 Mon Sep 17 00:00:00 2001
From: Nick Magerko <nmagerko@fb.com>
Date: Thu, 22 Aug 2019 09:37:47 -0700
Subject: [PATCH 16/16] Differentiate --stream-size from --size-hint

---
 programs/zstd.1.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index ee5db9d63..1bdc42654 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -145,10 +145,10 @@ the last one takes effect.
     _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
     when combined with multiple worker threads (>=2).
 * `--stream-size=#` :
-    When handling input from a stream, `zstd` must guess how large the source size
-    will be when optimizing compression parameters. This option sets the pledged source
-    size of a stream to eliminate that guesswork. Note that the pledged size must be exact;
-    incorrect stream sizes will cause an error.
+    Sets the pledged source size of input coming from a stream. This value must be exact, as it
+    will be included in the produced frame header. Incorrect stream sizes will cause an error.
+    This information will be used to better optimize compression parameters, resulting in
+    better and potentially faster compression, especially for smaller source sizes.
 * `--rsyncable` :
     `zstd` will periodically synchronize the compression state to make the
     compressed file more rsync-friendly. There is a negligible impact to