1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-08 17:22:10 +03:00

added unit tests to ZSTD_compressSequencesAndLiterals()

seems to work as expected,
correctly control that `litSize` and `srcSize` are exactly correct.
This commit is contained in:
Yann Collet
2024-12-11 16:13:22 -08:00
parent ac1e4bef10
commit e94e23c67f
4 changed files with 122 additions and 11 deletions

View File

@@ -1422,7 +1422,7 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
This can be useful if the process generating the sequences also happens to generate the buffer of literals, This can be useful if the process generating the sequences also happens to generate the buffer of literals,
thus skipping an extraction + caching stage. thus skipping an extraction + caching stage.
It's essentially a speed optimization when the right conditions are met, It's essentially a speed optimization when the right conditions are met,
but it also includes so following limitations: but it also is restricted by the following limitations:
- Only supports explicit delimiter mode - Only supports explicit delimiter mode
- Not compatible with frame checksum, which must disabled - Not compatible with frame checksum, which must disabled
- Can fail when unable to compress sufficiently - Can fail when unable to compress sufficiently

View File

@@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
*/ */
static size_t static size_t
ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx, ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
ZSTD_SequencePosition* seqPos, ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
const ZSTD_Sequence* const inSeqs, size_t nbSequences, const ZSTD_Sequence* const inSeqs, size_t nbSequences,
size_t blockSize, size_t blockSize,
ZSTD_ParamSwitch_e externalRepSearch) ZSTD_ParamSwitch_e externalRepSearch)
@@ -7114,6 +7114,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
Repcodes_t updatedRepcodes; Repcodes_t updatedRepcodes;
U32 dictSize; U32 dictSize;
size_t startPosInSrc = seqPos->posInSrc; size_t startPosInSrc = seqPos->posInSrc;
size_t litConsumed = 0;
DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize); DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize);
@@ -7150,10 +7151,15 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength); ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength);
litConsumed += litLength;
} }
/* last sequence (only literals) */ /* last sequence (only literals) */
seqPos->posInSrc += inSeqs[idx].litLength; { size_t const lastLitLength = inSeqs[idx].litLength;
seqPos->posInSrc += lastLitLength;
cctx->seqStore.lit += lastLitLength; /* register proper length */
litConsumed += lastLitLength;
}
/* blockSize must be exactly correct (checked before calling this function) */ /* blockSize must be exactly correct (checked before calling this function) */
assert((seqPos->posInSrc - startPosInSrc) == blockSize); (void)startPosInSrc; assert((seqPos->posInSrc - startPosInSrc) == blockSize); (void)startPosInSrc;
@@ -7184,6 +7190,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
seqPos->idx = idx+1; seqPos->idx = idx+1;
*litConsumedPtr = litConsumed;
return blockSize; return blockSize;
} }
@@ -7214,21 +7221,23 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
} }
while (remaining) { while (remaining) {
size_t compressedSeqsSize; size_t compressedSeqsSize, cBlockSize, litConsumed;
size_t cBlockSize;
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
cctx->blockSize, remaining, cctx->blockSize, remaining,
inSeqs, nbSequences, seqPos); inSeqs, nbSequences, seqPos);
U32 const lastBlock = (blockSize == remaining); U32 const lastBlock = (blockSize == remaining);
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
RETURN_ERROR_IF(!lastBlock, GENERIC, "Only supports single block");
assert(blockSize <= remaining); assert(blockSize <= remaining);
ZSTD_resetSeqStore(&cctx->seqStore); ZSTD_resetSeqStore(&cctx->seqStore);
blockSize = ZSTD_transferSequencesOnly_wBlockDelim(cctx, blockSize = ZSTD_transferSequencesOnly_wBlockDelim(cctx,
&seqPos, &seqPos, &litConsumed,
inSeqs, nbSequences, inSeqs, nbSequences,
blockSize, blockSize,
cctx->appliedParams.searchForExternalRepcodes); cctx->appliedParams.searchForExternalRepcodes);
RETURN_ERROR_IF(blockSize != remaining, GENERIC, "Must consume the entire block");
RETURN_ERROR_IF(litConsumed != litSize, GENERIC, "Must consume the exact amount of literals provided");
FORWARD_IF_ERROR(blockSize, "Bad sequence copy"); FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
/* Note: when blockSize is very small, other variant send it uncompressed. /* Note: when blockSize is very small, other variant send it uncompressed.

View File

@@ -1666,17 +1666,18 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
* This can be useful if the process generating the sequences also happens to generate the buffer of literals, * This can be useful if the process generating the sequences also happens to generate the buffer of literals,
* thus skipping an extraction + caching stage. * thus skipping an extraction + caching stage.
* It's essentially a speed optimization when the right conditions are met, * It's essentially a speed optimization when the right conditions are met,
* but it also is restricted by the following limitations: * but it also features the following limitations:
* - Only supports explicit delimiter mode * - Only supports explicit delimiter mode
* - Supports 1 block only (max input 128 KB)
* - Not compatible with frame checksum, which must disabled * - Not compatible with frame checksum, which must disabled
* - Can fail when unable to compress sufficiently * - Can fail (return an error) when input data cannot be compress sufficiently
* Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs. * Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs.
* @return : final compressed size, or a ZSTD error code. * @return : final compressed size, or a ZSTD error code.
*/ */
ZSTDLIB_STATIC_API size_t ZSTDLIB_STATIC_API size_t
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize, const ZSTD_Sequence* inSeqs, size_t nbSequences,
const void* literals, size_t litSize, size_t srcSize); const void* literals, size_t litSize, size_t srcSize);

View File

@@ -40,7 +40,6 @@
#include "datagen.h" /* RDG_genBuffer */ #include "datagen.h" /* RDG_genBuffer */
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#include "xxhash.h" /* XXH64 */ #include "xxhash.h" /* XXH64 */
#include "util.h"
#include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */ #include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
/* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */ /* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
#include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */ #include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
@@ -339,6 +338,35 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
} }
} }
static size_t FUZ_getLitSize(const ZSTD_Sequence* seqs, size_t nbSeqs)
{
size_t n, litSize = 0;
assert(seqs != NULL);
for (n=0; n<nbSeqs; n++) {
litSize += seqs[n].litLength;
}
return litSize;
}
static void
FUZ_transferLiterals(void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_Sequence* seqs, size_t nbSeqs)
{
size_t n;
const char* ip = (const char*)src;
char* op = (char*)dst;
size_t const litSize = FUZ_getLitSize(seqs, nbSeqs);
assert(litSize <= dstCapacity);
for (n=0; n<nbSeqs; n++) {
size_t const ll = seqs[n].litLength;
memcpy(op, ip, ll);
op += ll;
ip += ll + seqs[n].matchLength;
}
assert((size_t)(ip - (const char*)src) == srcSize);
}
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
typedef struct { typedef struct {
@@ -3808,7 +3836,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
if (seqs == NULL) goto _output_error; if (seqs == NULL) goto _output_error;
assert(cctx != NULL); assert(cctx != NULL);
/* Populate src with random data */ /* Populate src with compressible random data */
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed); RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
/* Roundtrip Test with block delimiters generated by ZSTD_generateSequences() */ /* Roundtrip Test with block delimiters generated by ZSTD_generateSequences() */
@@ -3850,6 +3878,79 @@ static int basicUnitTests(U32 const seed, double compressibility)
} }
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
{
const size_t srcSize = 100 KB;
const BYTE* const src = (BYTE*)CNBuffer;
BYTE* const dst = (BYTE*)compressedBuffer;
const size_t dstCapacity = ZSTD_compressBound(srcSize);
const size_t decompressSize = srcSize;
char* const decompressBuffer = (char*)malloc(decompressSize);
char* const litBuffer = (char*)malloc(decompressSize);
size_t compressedSize;
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
size_t nbSeqs;
if (litBuffer == NULL) goto _output_error;
if (decompressBuffer == NULL) goto _output_error;
if (seqs == NULL) goto _output_error;
assert(cctx != NULL);
/* Populate src with compressible random data */
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
/* Roundtrip Test using the AndLiterals() variant */
nbSeqs = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
{ size_t const litSize = FUZ_getLitSize(seqs, nbSeqs);
FUZ_transferLiterals(litBuffer, decompressSize, CNBuffer, srcSize, seqs, nbSeqs);
/* not enough literals: must fail */
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1, srcSize);
if (!ZSTD_isError(compressedSize)) {
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: not enough literals provided\n");
goto _output_error;
}
/* too many literals: must fail */
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1, srcSize);
if (!ZSTD_isError(compressedSize)) {
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: too many literals provided\n");
goto _output_error;
}
/* correct amount of literals: should compress successfully */
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize, srcSize);
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in ZSTD_compressSequencesAndLiterals()\n");
goto _output_error;
}
}
{ size_t const dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
if (ZSTD_isError(dSize)) {
DISPLAY("Error during decompression of frame produced by ZSTD_compressSequencesAndLiterals()\n");
goto _output_error;
}
if (dSize != srcSize) {
DISPLAY("Error: decompression of frame produced by ZSTD_compressSequencesAndLiterals() has different size\n");
goto _output_error;
}
if (memcmp(decompressBuffer, src, srcSize)) {
DISPLAY("Error: decompression of frame produced by ZSTD_compressSequencesAndLiterals() produces a different content (of same size)\n");
goto _output_error;
}
}
ZSTD_freeCCtx(cctx);
free(litBuffer);
free(decompressBuffer);
free(seqs);
}
DISPLAYLEVEL(3, "OK \n");
/* Multiple blocks of zeros test */ /* Multiple blocks of zeros test */
#define LONGZEROSLENGTH 1000000 /* 1MB of zeros */ #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH); DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);