From 637b2d7a24faf32b3cd465b6d46d890ed1e8ff6d Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sat, 29 Jan 2022 16:23:21 -0800
Subject: [PATCH] fixed bug 44168

discovered by oss-fuzz

It's a bug in the test itself :
ZSTD_compressBound() as an upper bound of the compress size
only works for data compressed "normally".
But in situations where many flushes are forcefully introduced,
this creates many more blocks,
each of which has a potential to increase the size by 3 bytes.
In extreme cases (lots of small incompressible blocks), the expansion can go beyond ZSTD_compressBound().

This situation is similar when using the CompressSequences() API
with Explicit Block Delimiters.
In which case, each explicit block acts like a deliberate flush.
When employed by a fuzzer, it's possible to generate scenarios like the one described above,
with tons of incompressible blocks of small sizes,
thus going beyond ZSTD_compressBound().

fix : when using Explicit Block Delimiters, use a larger bound, to account for this scenario.
---
 lib/common/zstd_internal.h            | 4 ++--
 lib/compress/zstd_compress.c          | 2 +-
 tests/fuzz/sequence_compression_api.c | 6 +++++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 4e19cb3c2..485b23a69 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -311,8 +311,8 @@ typedef struct {
      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
      * the existing value of the litLength or matchLength by 0x10000.
      */
-    ZSTD_longLengthType_e   longLengthType;
-    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
+    ZSTD_longLengthType_e longLengthType;
+    U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
 } seqStore_t;
 
 typedef struct {
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index c82901570..b9358fa0c 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -6140,7 +6140,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
         FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
         assert(blockSize <= remaining);
         ZSTD_resetSeqStore(&cctx->seqStore);
-        DEBUGLOG(5, "Working on new block. Blocksize: %zu", blockSize);
+        DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize);
 
         additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
         FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c
index 443e0a181..68923e11e 100644
--- a/tests/fuzz/sequence_compression_api.c
+++ b/tests/fuzz/sequence_compression_api.c
@@ -297,7 +297,11 @@ int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
     }
     nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
     generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);
-    cBufSize = ZSTD_compressBound(generatedSrcSize);
+    /* Note : in explicit block delimiters mode,
+     * the fuzzer might generate a lot of small incompressible blocks.
+     * In which case, the final compressed size might be > ZSTD_compressBound().
+     * Solution : provide a much more generous cBufSize to cover these scenarios */
+    cBufSize = (mode == ZSTD_sf_noBlockDelimiters) ? ZSTD_compressBound(generatedSrcSize) : 256 + (generatedSrcSize * 2);
     cBuf = FUZZ_malloc(cBufSize);
 
     rBufSize = generatedSrcSize;