From 4c90d862e35f4195dca913b111ef14adb64dfe8c Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Thu, 22 Aug 2019 11:27:20 -0700 Subject: [PATCH 01/14] Generate RLE blocks in the encoder --- lib/compress/zstd_compress.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index cd73db13b..dd8c6db10 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2323,6 +2323,15 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params } } +/* Returns true if the given block is a RLE block */ +static int ZSTD_isRLE(const BYTE *ip, size_t length) { + if (length < 2) return 1; + size_t i; + for (i = 1; i < length; ++i) { + if (ip[0] != ip[i]) return 0; + } + return 1; +} /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. @@ -2372,9 +2381,21 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cSize); } else { - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader24); - cSize += ZSTD_blockHeaderSize; + U32 cBlockHeader; + /* + We check to see if the regularly compressed block size is + less than 10 (the upper bound for RLE blocks) and we check + to see if the block is an RLE + */ + if (cSize < 10 && ZSTD_isRLE(ip, blockSize)) { + op[ZSTD_blockHeaderSize] = ip[0]; + cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3); + cSize = ZSTD_blockHeaderSize + 1; + } else { + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + cSize += ZSTD_blockHeaderSize; + } + MEM_writeLE24(op, (const U32) cBlockHeader); } ip += blockSize; From cba5350f88680f2b7f115b23cfde457c330202a5 Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Thu, 22 Aug 2019 12:12:44 -0700 Subject: [PATCH 02/14] Moving RLE logic to inside ZSTD_compressBlock_internal and adding assert --- lib/compress/zstd_compress.c | 47 +++++++++++++++++------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index dd8c6db10..e56557125 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2264,6 +2264,16 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) return ZSTDbss_compress; } +/* Returns true if the given block is a RLE block */ +static int ZSTD_isRLE(const BYTE *ip, size_t length) { + size_t i; + if (length < 2) return 1; + for (i = 1; i < length; ++i) { + if (ip[0] != ip[i]) return 0; + } + return 1; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -2287,8 +2297,15 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); + if (cSize < 10 && ZSTD_isRLE(src, srcSize)) { + cSize = 1; + ((BYTE*)dst)[0] = ((const BYTE*)src)[0]; + } + out: - if (!ZSTD_isError(cSize) && cSize != 0) { + if (!ZSTD_isError(cSize) && cSize > 1) { + assert(!ZSTD_isRLE(src, srcSize)); + /* confirm repcodes and entropy tables when emitting a compressed block */ ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; zc->blockState.prevCBlock = zc->blockState.nextCBlock; @@ -2323,16 +2340,6 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params } } -/* Returns true if the given block is a RLE block */ -static int ZSTD_isRLE(const BYTE *ip, size_t length) { - if (length < 2) return 1; - size_t i; - for (i = 1; i < length; ++i) { - if (ip[0] != ip[i]) return 0; - } - return 1; -} - /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. @@ -2381,21 +2388,11 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cSize); } else { - U32 cBlockHeader; - /* - We check to see if the regularly compressed block size is - less than 10 (the upper bound for RLE blocks) and we check - to see if the block is an RLE - */ - if (cSize < 10 && ZSTD_isRLE(ip, blockSize)) { - op[ZSTD_blockHeaderSize] = ip[0]; - cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3); - cSize = ZSTD_blockHeaderSize + 1; - } else { - cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - cSize += ZSTD_blockHeaderSize; - } + const U32 cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); MEM_writeLE24(op, (const U32) cBlockHeader); + cSize += ZSTD_blockHeaderSize; } ip += blockSize; From 4faf3a59117e3dac999f80bc6416955dbb1af55f Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Thu, 22 Aug 2019 13:46:15 -0700 Subject: [PATCH 03/14] Fixing ci-circle test failure issues --- lib/compress/zstd_compress.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e56557125..560548a97 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2279,6 +2279,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, const void* src, size_t srcSize) { size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); @@ -2297,9 +2299,9 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); - if (cSize < 10 && ZSTD_isRLE(src, srcSize)) { + if (cSize < 10 && ZSTD_isRLE(ip, srcSize)) { cSize = 1; - ((BYTE*)dst)[0] = ((const BYTE*)src)[0]; + op[0] = ip[0]; } out: @@ -2391,7 +2393,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, const U32 cBlockHeader = cSize == 1 ? lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, (const U32) cBlockHeader); + MEM_writeLE24(op, cBlockHeader); cSize += ZSTD_blockHeaderSize; } From 0e3ba02cf167cb6660eee70fc5874cddb31defae Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Thu, 22 Aug 2019 13:54:41 -0700 Subject: [PATCH 04/14] Fixing more test falure errors --- lib/compress/zstd_compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 560548a97..6e02da4f0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2306,12 +2306,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, out: if (!ZSTD_isError(cSize) && cSize > 1) { - assert(!ZSTD_isRLE(src, srcSize)); - /* confirm repcodes and entropy tables when emitting a compressed block */ ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; zc->blockState.prevCBlock = zc->blockState.nextCBlock; zc->blockState.nextCBlock = tmp; + + assert(!ZSTD_isRLE(src, srcSize)); } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large From ba469324927a0c205a6756d307b0436fc101112c Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Mon, 26 Aug 2019 08:51:34 -0700 Subject: [PATCH 05/14] Removing implicit conversion from const void* to const BYTE* and added constant for threshold --- lib/compress/zstd_compress.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6e02da4f0..7ea929aff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2278,6 +2278,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { + /* + This the upper bound for the length of an rle block. + This isn't the actual upper bound. Finding the real threshold + needs further investigation. + */ + const int rleMaxLength = 25; size_t cSize; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; @@ -2299,7 +2305,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); - if (cSize < 10 && ZSTD_isRLE(ip, srcSize)) { + if (cSize < rleMaxLength && ZSTD_isRLE(ip, srcSize)) { cSize = 1; op[0] = ip[0]; } @@ -2311,7 +2317,7 @@ out: zc->blockState.prevCBlock = zc->blockState.nextCBlock; zc->blockState.nextCBlock = tmp; - assert(!ZSTD_isRLE(src, srcSize)); + assert(!ZSTD_isRLE(ip, srcSize)); } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large From 1f2bf77f2afebd266c1222e3018e4412b12dd2ec Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Mon, 26 Aug 2019 09:00:22 -0700 Subject: [PATCH 06/14] Using typedef U32 instead of int --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7ea929aff..62261f6f3 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2283,7 +2283,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, This isn't the actual upper bound. Finding the real threshold needs further investigation. */ - const int rleMaxLength = 25; + const U32 rleMaxLength = 25; size_t cSize; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; From 7b041b552eff2315babc9784a8284d64050d19e2 Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Mon, 26 Aug 2019 12:26:53 -0700 Subject: [PATCH 07/14] Removing assert for rle that doesn't always hold --- lib/compress/zstd_compress.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 62261f6f3..71ede67ce 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2316,8 +2316,6 @@ out: ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; zc->blockState.prevCBlock = zc->blockState.nextCBlock; zc->blockState.nextCBlock = tmp; - - assert(!ZSTD_isRLE(ip, srcSize)); } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large @@ -2399,6 +2397,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, const U32 cBlockHeader = cSize == 1 ? lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + ZSTD_printBlockStructure(cBlockHeader); MEM_writeLE24(op, cBlockHeader); cSize += ZSTD_blockHeaderSize; } From 33b6446ca77f95ebb07bcd03e28aaf6e6c37bc4e Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Mon, 26 Aug 2019 14:34:43 -0700 Subject: [PATCH 08/14] Removing accidental method call --- lib/compress/zstd_compress.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 71ede67ce..ffa53cd5b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2397,7 +2397,6 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, const U32 cBlockHeader = cSize == 1 ? lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - ZSTD_printBlockStructure(cBlockHeader); MEM_writeLE24(op, cBlockHeader); cSize += ZSTD_blockHeaderSize; } From ce264ce53bd1f8984522ad38596cf7a09a447c4d Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Mon, 26 Aug 2019 14:54:29 -0700 Subject: [PATCH 09/14] Forbiding emission of RLE when its the first block --- lib/compress/zstd_compress.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ffa53cd5b..1bf3657d7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2276,7 +2276,7 @@ static int ZSTD_isRLE(const BYTE *ip, size_t length) { static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t srcSize) + const void* src, size_t srcSize, U32 frame) { /* This the upper bound for the length of an rle block. @@ -2305,7 +2305,18 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); - if (cSize < rleMaxLength && ZSTD_isRLE(ip, srcSize)) { + /* + We don't want to emit our first block as a RLE even if it qualifies because + doing so will cause the decoder to throw a "should consume all input error." + https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723 + */ + U32 isFirstBlock = zc->inBuffPos == srcSize; + + if (frame && + !isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { cSize = 1; op[0] = ip[0]; } @@ -2387,7 +2398,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, { size_t cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, - ip, blockSize); + ip, blockSize, 1 /* frame */); FORWARD_IF_ERROR(cSize); if (cSize == 0) { /* block is not compressible */ @@ -2527,7 +2538,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : - ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); FORWARD_IF_ERROR(cSize); cctx->consumedSrcSize += srcSize; cctx->producedCSize += (cSize + fhSize); From 991cbc9024b11068b0f9340b1f4a85cbfa3c57e2 Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Mon, 26 Aug 2019 15:00:50 -0700 Subject: [PATCH 10/14] Fixing mixed declaration compiler complaint --- lib/compress/zstd_compress.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1bf3657d7..9589b5737 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2284,6 +2284,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, needs further investigation. */ const U32 rleMaxLength = 25; + /* + We don't want to emit our first block as a RLE even if it qualifies because + doing so will cause the decoder to throw a "should consume all input error." + https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723 + */ + U32 isFirstBlock = zc->inBuffPos == srcSize; size_t cSize; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; @@ -2305,13 +2311,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); - /* - We don't want to emit our first block as a RLE even if it qualifies because - doing so will cause the decoder to throw a "should consume all input error." - https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723 - */ - U32 isFirstBlock = zc->inBuffPos == srcSize; - if (frame && !isFirstBlock && cSize < rleMaxLength && From 96201d97745e551c3f12255ce84a8d7492099edf Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Mon, 26 Aug 2019 15:30:41 -0700 Subject: [PATCH 11/14] Added bool to cctx and fixed some comment nits --- lib/compress/zstd_compress.c | 22 +++++++++++----------- lib/compress/zstd_compress_internal.h | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9589b5737..f8a2919e2 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1314,6 +1314,7 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl cctx->blockState.matchState.cParams = params.cParams; cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; cctx->consumedSrcSize = 0; + cctx->isFirstBlock = 1; cctx->producedCSize = 0; if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) cctx->appliedParams.fParams.contentSizeFlag = 0; @@ -1416,6 +1417,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, (U32)pledgedSrcSize, params.cParams.windowLog); assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + zc->isFirstBlock = 1; if (crp == ZSTDcrp_continue) { if (ZSTD_equivalentParams(zc->appliedParams, params, zc->inBuffSize, @@ -2278,18 +2280,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame) { - /* - This the upper bound for the length of an rle block. - This isn't the actual upper bound. Finding the real threshold - needs further investigation. + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. */ const U32 rleMaxLength = 25; - /* - We don't want to emit our first block as a RLE even if it qualifies because - doing so will cause the decoder to throw a "should consume all input error." - https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723 - */ - U32 isFirstBlock = zc->inBuffPos == srcSize; size_t cSize; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; @@ -2312,7 +2307,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->bmi2); if (frame && - !isFirstBlock && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder to throw a "should consume all input error." + * https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723 + */ + !zc->isFirstBlock && cSize < rleMaxLength && ZSTD_isRLE(ip, srcSize)) { @@ -2417,6 +2416,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, op += cSize; assert(dstCapacity >= cSize); dstCapacity -= cSize; + cctx->isFirstBlock = 0; DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", (unsigned)cSize); } } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 6d623cc6b..3a5c7f2d1 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -238,6 +238,7 @@ struct ZSTD_CCtx_s { XXH64_state_t xxhState; ZSTD_customMem customMem; size_t staticSize; + int isFirstBlock; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ From e5704bbfdf6cb00dbf17c33c72e8be9ddcae739f Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Wed, 28 Aug 2019 08:32:34 -0700 Subject: [PATCH 12/14] Added test for multiple blocks of zeros and fixed nit about comments --- lib/compress/zstd_compress.c | 4 ++-- tests/fuzzer.c | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f8a2919e2..62cab4ed5 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2283,7 +2283,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, /* This the upper bound for the length of an rle block. * This isn't the actual upper bound. Finding the real threshold * needs further investigation. - */ + */ const U32 rleMaxLength = 25; size_t cSize; const BYTE* ip = (const BYTE*)src; @@ -2310,7 +2310,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, /* We don't want to emit our first block as a RLE even if it qualifies because * doing so will cause the decoder to throw a "should consume all input error." * https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723 - */ + */ !zc->isFirstBlock && cSize < rleMaxLength && ZSTD_isRLE(ip, srcSize)) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 2de7c0096..c256389a5 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1960,6 +1960,19 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); } + /* Multiple blocks of zeros test */ + #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */ + DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH); + memset(CNBuffer, 0, LONGZEROSLENGTH); + CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(LONGZEROSLENGTH), CNBuffer, LONGZEROSLENGTH, 1) ); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/LONGZEROSLENGTH*100); + + DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, LONGZEROSLENGTH); + { CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, LONGZEROSLENGTH, compressedBuffer, cSize) ); + if (r != LONGZEROSLENGTH) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + + /* All zeroes test (test bug #137) */ #define ZEROESLENGTH 100 DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); From 4a1ca5e0a8bbdca7a7b3c4664ec4d2ab2483580f Mon Sep 17 00:00:00 2001 From: bimbashrestha Date: Thu, 29 Aug 2019 11:55:12 -0700 Subject: [PATCH 13/14] Adding method for extracting sequences. --- lib/compress/zstd_compress.c | 71 ++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 62cab4ed5..6ae07156e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2192,6 +2192,77 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthID = 0; } +typedef struct { + U32 matchPos; + U32 offset; + U32 litLength; + U32 matchLength; + int rep; +} Sequence; + +static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict, + void* dst, size_t dstSize, + const void* src, size_t srcSize, + Sequence* outSeqs, size_t outSeqsSize) +{ + const seqStore_t* seqStore; + const seqDef* seqs; + size_t seqsSize; + + int i; int repIdx; U32 position; + + size_t blockSize = ZSTD_getBlockSize(zc); + size_t maxOutput = ZSTD_compressBound(blockSize); + + ASSERT(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict))); + ASSERT(dstSize >= maxOutput); dstSize = maxOutput; + ASSERT(srcSize >= blockSize); srcSize = blockSize; + ASSERT(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize))); + + seqStore = ZSTD_getSeqStore(zc); + seqs = seqStore->sequencesStart; + seqsSize = seqStore->sequences - seqStore->sequencesStart; + + ASSERT(outSeqsSize >= seqsSize); outSeqsSize = seqsSize; + + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= 3 /* num reps */) { + outSeqs[i].rep = 1; + repIdx = i - outSeqs[i].offset; + + if (repIdx >= 0) { + outSeqs[i].offset = outSeqs[repIdx].offset; + } + + if (repIdx == -1) { + outSeqs[i].offset = 1; + } else if (repIdx == -2) { + outSeqs[i].offset = 4; + } else if (repIdx == -3) { + outSeqs[i].offset = 8; + } + } else { + outSeqs[i].offset -= 3 /* num reps */; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = position; + position += outSeqs[i].matchLength; + } +} + typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) From c3e3c8bf325ec0aea9aadeaedca4d7b03e85de63 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 29 Aug 2019 12:05:47 -0700 Subject: [PATCH 14/14] Undoing the last commit (that was an accident) --- lib/compress/zstd_compress.c | 71 ------------------------------------ 1 file changed, 71 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6ae07156e..62cab4ed5 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2192,77 +2192,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthID = 0; } -typedef struct { - U32 matchPos; - U32 offset; - U32 litLength; - U32 matchLength; - int rep; -} Sequence; - -static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict, - void* dst, size_t dstSize, - const void* src, size_t srcSize, - Sequence* outSeqs, size_t outSeqsSize) -{ - const seqStore_t* seqStore; - const seqDef* seqs; - size_t seqsSize; - - int i; int repIdx; U32 position; - - size_t blockSize = ZSTD_getBlockSize(zc); - size_t maxOutput = ZSTD_compressBound(blockSize); - - ASSERT(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict))); - ASSERT(dstSize >= maxOutput); dstSize = maxOutput; - ASSERT(srcSize >= blockSize); srcSize = blockSize; - ASSERT(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize))); - - seqStore = ZSTD_getSeqStore(zc); - seqs = seqStore->sequencesStart; - seqsSize = seqStore->sequences - seqStore->sequencesStart; - - ASSERT(outSeqsSize >= seqsSize); outSeqsSize = seqsSize; - - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; - - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - - if (outSeqs[i].offset <= 3 /* num reps */) { - outSeqs[i].rep = 1; - repIdx = i - outSeqs[i].offset; - - if (repIdx >= 0) { - outSeqs[i].offset = outSeqs[repIdx].offset; - } - - if (repIdx == -1) { - outSeqs[i].offset = 1; - } else if (repIdx == -2) { - outSeqs[i].offset = 4; - } else if (repIdx == -3) { - outSeqs[i].offset = 8; - } - } else { - outSeqs[i].offset -= 3 /* num reps */; - } - - position += outSeqs[i].litLength; - outSeqs[i].matchPos = position; - position += outSeqs[i].matchLength; - } -} - typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)