mirror of
https://github.com/facebook/zstd.git
synced 2025-08-05 19:15:58 +03:00
ZSTD_compressSequencesAndLiterals() now supports multi-blocks frames.
This commit is contained in:
@@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
|||||||
*/
|
*/
|
||||||
FORCE_INLINE_TEMPLATE size_t
|
FORCE_INLINE_TEMPLATE size_t
|
||||||
ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx,
|
ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx,
|
||||||
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
|
ZSTD_SequencePosition* seqPos,
|
||||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||||
size_t blockSize,
|
size_t blockSize,
|
||||||
int const repcodeResolution,
|
int const repcodeResolution,
|
||||||
@@ -7197,37 +7197,36 @@ ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx,
|
|||||||
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
|
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
|
||||||
|
|
||||||
seqPos->idx = idx+1;
|
seqPos->idx = idx+1;
|
||||||
*litConsumedPtr = litConsumed;
|
return litConsumed;
|
||||||
return blockSize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef size_t (*ZSTD_transferSequencesOnly_f) (ZSTD_CCtx* cctx,
|
typedef size_t (*ZSTD_transferSequencesOnly_f) (ZSTD_CCtx* cctx,
|
||||||
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
|
ZSTD_SequencePosition* seqPos,
|
||||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||||
size_t blockSize,
|
size_t blockSize,
|
||||||
int const repcodeResolution);
|
int const repcodeResolution);
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
|
ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
|
||||||
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
|
ZSTD_SequencePosition* seqPos,
|
||||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||||
size_t blockSize,
|
size_t blockSize,
|
||||||
int const repcodeResolution)
|
int const repcodeResolution)
|
||||||
{
|
{
|
||||||
return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx,
|
return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx,
|
||||||
seqPos, litConsumedPtr, inSeqs, nbSequences, blockSize,
|
seqPos, inSeqs, nbSequences, blockSize,
|
||||||
repcodeResolution, 0);
|
repcodeResolution, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
ZSTD_transferSequencesOnly_wBlockDelim_andCheckSequences(ZSTD_CCtx* cctx,
|
ZSTD_transferSequencesOnly_wBlockDelim_andCheckSequences(ZSTD_CCtx* cctx,
|
||||||
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
|
ZSTD_SequencePosition* seqPos,
|
||||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||||
size_t blockSize,
|
size_t blockSize,
|
||||||
int const repcodeResolution)
|
int const repcodeResolution)
|
||||||
{
|
{
|
||||||
return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx,
|
return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx,
|
||||||
seqPos, litConsumedPtr, inSeqs, nbSequences, blockSize,
|
seqPos, inSeqs, nbSequences, blockSize,
|
||||||
repcodeResolution, 1);
|
repcodeResolution, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7269,28 +7268,26 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
|
|||||||
inSeqs, nbSequences, seqPos);
|
inSeqs, nbSequences, seqPos);
|
||||||
U32 const lastBlock = (blockSize == remaining);
|
U32 const lastBlock = (blockSize == remaining);
|
||||||
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
||||||
RETURN_ERROR_IF(!lastBlock, srcSize_wrong, "Only supports single block");
|
|
||||||
assert(blockSize <= remaining);
|
assert(blockSize <= remaining);
|
||||||
ZSTD_resetSeqStore(&cctx->seqStore);
|
ZSTD_resetSeqStore(&cctx->seqStore);
|
||||||
|
|
||||||
blockSize = transfer(cctx,
|
litConsumed = transfer(cctx,
|
||||||
&seqPos, &litConsumed,
|
&seqPos,
|
||||||
inSeqs, nbSequences,
|
inSeqs, nbSequences,
|
||||||
blockSize,
|
blockSize,
|
||||||
repcodeResolution);
|
repcodeResolution);
|
||||||
RETURN_ERROR_IF(blockSize != remaining, externalSequences_invalid, "Must consume the entire block");
|
FORWARD_IF_ERROR(litConsumed, "Bad sequence conversion");
|
||||||
RETURN_ERROR_IF(litConsumed != litSize, externalSequences_invalid, "Must consume the exact amount of literals provided");
|
RETURN_ERROR_IF(litConsumed > litSize, externalSequences_invalid, "discrepancy between literals buffer and Sequences");
|
||||||
FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
|
|
||||||
|
|
||||||
/* Note: when blockSize is very small, other variant send it uncompressed.
|
/* Note: when blockSize is very small, other variant send it uncompressed.
|
||||||
* Here, we still send the sequences, because we don't have the source to send it uncompressed.
|
* Here, we still send the sequences, because we don't have the original source to send it uncompressed.
|
||||||
* In theory, it would be possible to reproduce the source from the sequences,
|
* One could imagine it possible to reproduce the source from the sequences,
|
||||||
* but that's pretty complex and memory intensive, which goes against the principles of this variant. */
|
* but that's complex and costly memory intensive, which goes against the objectives of this variant. */
|
||||||
|
|
||||||
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
|
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
|
||||||
compressedSeqsSize = ZSTD_entropyCompressSeqStore_wExtLitBuffer(
|
compressedSeqsSize = ZSTD_entropyCompressSeqStore_wExtLitBuffer(
|
||||||
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
|
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
|
||||||
literals, litSize,
|
literals, litConsumed,
|
||||||
blockSize,
|
blockSize,
|
||||||
&cctx->seqStore,
|
&cctx->seqStore,
|
||||||
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
||||||
@@ -7299,17 +7296,19 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
|
|||||||
cctx->bmi2);
|
cctx->bmi2);
|
||||||
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
|
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
|
||||||
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
|
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
|
||||||
|
litSize -= litConsumed;
|
||||||
|
literals = (const char*)literals + litConsumed;
|
||||||
|
|
||||||
/* Note: difficult to check source for RLE block when only Literals are provided,
|
/* Note: difficult to check source for RLE block when only Literals are provided,
|
||||||
* but it could be considered from analyzing the sequence directly */
|
* but it could be considered from analyzing the sequence directly */
|
||||||
|
|
||||||
if (compressedSeqsSize == 0) {
|
if (compressedSeqsSize == 0) {
|
||||||
/* Sending uncompressed blocks is difficult, because the source is not provided.
|
/* Sending uncompressed blocks is out of reach, because the source is not provided.
|
||||||
* In theory, one could use the sequences to regenerate the source, like a decompressor,
|
* In theory, one could use the sequences to regenerate the source, like a decompressor,
|
||||||
* but it's complex, and memory hungry, killing the purpose of this variant.
|
* but it's complex, and memory hungry, killing the purpose of this variant.
|
||||||
* Current outcome: generate an error code.
|
* Current outcome: generate an error code.
|
||||||
*/
|
*/
|
||||||
RETURN_ERROR(dstSize_tooSmall, "Data is not compressible"); /* note: error code might be misleading */
|
RETURN_ERROR(dstSize_tooSmall, "Data is not compressible"); /* note: error code could be clearer */
|
||||||
} else {
|
} else {
|
||||||
U32 cBlockHeader;
|
U32 cBlockHeader;
|
||||||
assert(compressedSeqsSize > 1); /* no RLE */
|
assert(compressedSeqsSize > 1); /* no RLE */
|
||||||
@@ -7338,6 +7337,7 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
|
|||||||
DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
|
DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RETURN_ERROR_IF(litSize != 0, externalSequences_invalid, "literals must be entirely and exactly consumed");
|
||||||
DEBUGLOG(4, "cSize final total: %zu", cSize);
|
DEBUGLOG(4, "cSize final total: %zu", cSize);
|
||||||
return cSize;
|
return cSize;
|
||||||
}
|
}
|
||||||
|
@@ -1674,10 +1674,9 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
|||||||
* It's a speed optimization, useful when the right conditions are met,
|
* It's a speed optimization, useful when the right conditions are met,
|
||||||
* but it also features the following limitations:
|
* but it also features the following limitations:
|
||||||
* - Only supports explicit delimiter mode
|
* - Only supports explicit delimiter mode
|
||||||
* - Supports 1 block only (max input 128 KB)
|
|
||||||
* - Not compatible with frame checksum, which must disabled
|
* - Not compatible with frame checksum, which must disabled
|
||||||
* - Can fail (return an error) when input data cannot be compress sufficiently
|
* - If any block is incompressible, will fail and return an error
|
||||||
* - @litSize must be == sum of all @.litLength fields in @inSeqs. Discrepancy will generate an error.
|
* - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
|
||||||
* - the buffer @literals must be larger than @litSize by at least 8 bytes.
|
* - the buffer @literals must be larger than @litSize by at least 8 bytes.
|
||||||
* @return : final compressed size, or a ZSTD error code.
|
* @return : final compressed size, or a ZSTD error code.
|
||||||
*/
|
*/
|
||||||
|
@@ -3880,7 +3880,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
|
|
||||||
DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
|
DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
|
||||||
{
|
{
|
||||||
const size_t srcSize = 100 KB;
|
const size_t srcSize = 500 KB;
|
||||||
const BYTE* const src = (BYTE*)CNBuffer;
|
const BYTE* const src = (BYTE*)CNBuffer;
|
||||||
BYTE* const dst = (BYTE*)compressedBuffer;
|
BYTE* const dst = (BYTE*)compressedBuffer;
|
||||||
const size_t dstCapacity = ZSTD_compressBound(srcSize);
|
const size_t dstCapacity = ZSTD_compressBound(srcSize);
|
||||||
|
Reference in New Issue
Block a user