mirror of
https://github.com/facebook/zstd.git
synced 2025-07-30 22:23:13 +03:00
Merge pull request #1744 from bimbashrestha/dev
Generate RLE blocks in the encoder
This commit is contained in:
@ -1334,6 +1334,7 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
|
|||||||
cctx->blockState.matchState.cParams = params.cParams;
|
cctx->blockState.matchState.cParams = params.cParams;
|
||||||
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
||||||
cctx->consumedSrcSize = 0;
|
cctx->consumedSrcSize = 0;
|
||||||
|
cctx->isFirstBlock = 1;
|
||||||
cctx->producedCSize = 0;
|
cctx->producedCSize = 0;
|
||||||
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
|
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
|
||||||
cctx->appliedParams.fParams.contentSizeFlag = 0;
|
cctx->appliedParams.fParams.contentSizeFlag = 0;
|
||||||
@ -1436,6 +1437,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|||||||
(U32)pledgedSrcSize, params.cParams.windowLog);
|
(U32)pledgedSrcSize, params.cParams.windowLog);
|
||||||
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
||||||
|
|
||||||
|
zc->isFirstBlock = 1;
|
||||||
if (crp == ZSTDcrp_continue) {
|
if (crp == ZSTDcrp_continue) {
|
||||||
if (ZSTD_equivalentParams(zc->appliedParams, params,
|
if (ZSTD_equivalentParams(zc->appliedParams, params,
|
||||||
zc->inBuffSize,
|
zc->inBuffSize,
|
||||||
@ -2289,11 +2291,28 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|||||||
return ZSTDbss_compress;
|
return ZSTDbss_compress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns true if the given block is a RLE block */
|
||||||
|
static int ZSTD_isRLE(const BYTE *ip, size_t length) {
|
||||||
|
size_t i;
|
||||||
|
if (length < 2) return 1;
|
||||||
|
for (i = 1; i < length; ++i) {
|
||||||
|
if (ip[0] != ip[i]) return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||||
void* dst, size_t dstCapacity,
|
void* dst, size_t dstCapacity,
|
||||||
const void* src, size_t srcSize)
|
const void* src, size_t srcSize, U32 frame)
|
||||||
{
|
{
|
||||||
|
/* This the upper bound for the length of an rle block.
|
||||||
|
* This isn't the actual upper bound. Finding the real threshold
|
||||||
|
* needs further investigation.
|
||||||
|
*/
|
||||||
|
const U32 rleMaxLength = 25;
|
||||||
size_t cSize;
|
size_t cSize;
|
||||||
|
const BYTE* ip = (const BYTE*)src;
|
||||||
|
BYTE* op = (BYTE*)dst;
|
||||||
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
||||||
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
||||||
(unsigned)zc->blockState.matchState.nextToUpdate);
|
(unsigned)zc->blockState.matchState.nextToUpdate);
|
||||||
@ -2312,8 +2331,21 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|||||||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
||||||
zc->bmi2);
|
zc->bmi2);
|
||||||
|
|
||||||
|
if (frame &&
|
||||||
|
/* We don't want to emit our first block as a RLE even if it qualifies because
|
||||||
|
* doing so will cause the decoder to throw a "should consume all input error."
|
||||||
|
* https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723
|
||||||
|
*/
|
||||||
|
!zc->isFirstBlock &&
|
||||||
|
cSize < rleMaxLength &&
|
||||||
|
ZSTD_isRLE(ip, srcSize))
|
||||||
|
{
|
||||||
|
cSize = 1;
|
||||||
|
op[0] = ip[0];
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (!ZSTD_isError(cSize) && cSize != 0) {
|
if (!ZSTD_isError(cSize) && cSize > 1) {
|
||||||
/* confirm repcodes and entropy tables when emitting a compressed block */
|
/* confirm repcodes and entropy tables when emitting a compressed block */
|
||||||
ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
|
ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
|
||||||
zc->blockState.prevCBlock = zc->blockState.nextCBlock;
|
zc->blockState.prevCBlock = zc->blockState.nextCBlock;
|
||||||
@ -2348,7 +2380,6 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*! ZSTD_compress_frameChunk() :
|
/*! ZSTD_compress_frameChunk() :
|
||||||
* Compress a chunk of data into one or multiple blocks.
|
* Compress a chunk of data into one or multiple blocks.
|
||||||
* All blocks will be terminated, all input will be consumed.
|
* All blocks will be terminated, all input will be consumed.
|
||||||
@ -2390,15 +2421,17 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
|||||||
|
|
||||||
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
|
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
|
||||||
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
|
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
|
||||||
ip, blockSize);
|
ip, blockSize, 1 /* frame */);
|
||||||
FORWARD_IF_ERROR(cSize);
|
FORWARD_IF_ERROR(cSize);
|
||||||
|
|
||||||
if (cSize == 0) { /* block is not compressible */
|
if (cSize == 0) { /* block is not compressible */
|
||||||
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
||||||
FORWARD_IF_ERROR(cSize);
|
FORWARD_IF_ERROR(cSize);
|
||||||
} else {
|
} else {
|
||||||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
const U32 cBlockHeader = cSize == 1 ?
|
||||||
MEM_writeLE24(op, cBlockHeader24);
|
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
|
||||||
|
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||||
|
MEM_writeLE24(op, cBlockHeader);
|
||||||
cSize += ZSTD_blockHeaderSize;
|
cSize += ZSTD_blockHeaderSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2408,6 +2441,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
|||||||
op += cSize;
|
op += cSize;
|
||||||
assert(dstCapacity >= cSize);
|
assert(dstCapacity >= cSize);
|
||||||
dstCapacity -= cSize;
|
dstCapacity -= cSize;
|
||||||
|
cctx->isFirstBlock = 0;
|
||||||
DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
|
DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
|
||||||
(unsigned)cSize);
|
(unsigned)cSize);
|
||||||
} }
|
} }
|
||||||
@ -2528,7 +2562,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
|||||||
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
|
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
|
||||||
{ size_t const cSize = frame ?
|
{ size_t const cSize = frame ?
|
||||||
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
|
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
|
||||||
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
|
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
|
||||||
FORWARD_IF_ERROR(cSize);
|
FORWARD_IF_ERROR(cSize);
|
||||||
cctx->consumedSrcSize += srcSize;
|
cctx->consumedSrcSize += srcSize;
|
||||||
cctx->producedCSize += (cSize + fhSize);
|
cctx->producedCSize += (cSize + fhSize);
|
||||||
|
@ -241,6 +241,7 @@ struct ZSTD_CCtx_s {
|
|||||||
XXH64_state_t xxhState;
|
XXH64_state_t xxhState;
|
||||||
ZSTD_customMem customMem;
|
ZSTD_customMem customMem;
|
||||||
size_t staticSize;
|
size_t staticSize;
|
||||||
|
int isFirstBlock;
|
||||||
|
|
||||||
seqStore_t seqStore; /* sequences storage ptrs */
|
seqStore_t seqStore; /* sequences storage ptrs */
|
||||||
ldmState_t ldmState; /* long distance matching state */
|
ldmState_t ldmState; /* long distance matching state */
|
||||||
|
@ -1960,6 +1960,19 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
DISPLAYLEVEL(3, "OK \n");
|
DISPLAYLEVEL(3, "OK \n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Multiple blocks of zeros test */
|
||||||
|
#define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
|
||||||
|
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);
|
||||||
|
memset(CNBuffer, 0, LONGZEROSLENGTH);
|
||||||
|
CHECK_VAR(cSize, ZSTD_compress(compressedBuffer, ZSTD_compressBound(LONGZEROSLENGTH), CNBuffer, LONGZEROSLENGTH, 1) );
|
||||||
|
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/LONGZEROSLENGTH*100);
|
||||||
|
|
||||||
|
DISPLAYLEVEL(3, "test%3i : decompress %u zeroes : ", testNb++, LONGZEROSLENGTH);
|
||||||
|
{ CHECK_NEWV(r, ZSTD_decompress(decodedBuffer, LONGZEROSLENGTH, compressedBuffer, cSize) );
|
||||||
|
if (r != LONGZEROSLENGTH) goto _output_error; }
|
||||||
|
DISPLAYLEVEL(3, "OK \n");
|
||||||
|
|
||||||
|
|
||||||
/* All zeroes test (test bug #137) */
|
/* All zeroes test (test bug #137) */
|
||||||
#define ZEROESLENGTH 100
|
#define ZEROESLENGTH 100
|
||||||
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
|
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
|
||||||
|
Reference in New Issue
Block a user