1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-08 17:22:10 +03:00

added unit tests to ZSTD_compressSequencesAndLiterals()

seems to work as expected,
correctly control that `litSize` and `srcSize` are exactly correct.
This commit is contained in:
Yann Collet
2024-12-11 16:13:22 -08:00
parent ac1e4bef10
commit e94e23c67f
4 changed files with 122 additions and 11 deletions

View File

@@ -1422,7 +1422,7 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
This can be useful if the process generating the sequences also happens to generate the buffer of literals,
thus skipping an extraction + caching stage.
It's essentially a speed optimization when the right conditions are met,
but it also includes so following limitations:
but it also is restricted by the following limitations:
- Only supports explicit delimiter mode
- Not compatible with frame checksum, which must disabled
- Can fail when unable to compress sufficiently

View File

@@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
*/
static size_t
ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
ZSTD_SequencePosition* seqPos,
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
size_t blockSize,
ZSTD_ParamSwitch_e externalRepSearch)
@@ -7114,6 +7114,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
Repcodes_t updatedRepcodes;
U32 dictSize;
size_t startPosInSrc = seqPos->posInSrc;
size_t litConsumed = 0;
DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize);
@@ -7150,10 +7151,15 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength);
litConsumed += litLength;
}
/* last sequence (only literals) */
seqPos->posInSrc += inSeqs[idx].litLength;
{ size_t const lastLitLength = inSeqs[idx].litLength;
seqPos->posInSrc += lastLitLength;
cctx->seqStore.lit += lastLitLength; /* register proper length */
litConsumed += lastLitLength;
}
/* blockSize must be exactly correct (checked before calling this function) */
assert((seqPos->posInSrc - startPosInSrc) == blockSize); (void)startPosInSrc;
@@ -7184,6 +7190,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
seqPos->idx = idx+1;
*litConsumedPtr = litConsumed;
return blockSize;
}
@@ -7214,21 +7221,23 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
}
while (remaining) {
size_t compressedSeqsSize;
size_t cBlockSize;
size_t compressedSeqsSize, cBlockSize, litConsumed;
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
cctx->blockSize, remaining,
inSeqs, nbSequences, seqPos);
U32 const lastBlock = (blockSize == remaining);
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
RETURN_ERROR_IF(!lastBlock, GENERIC, "Only supports single block");
assert(blockSize <= remaining);
ZSTD_resetSeqStore(&cctx->seqStore);
blockSize = ZSTD_transferSequencesOnly_wBlockDelim(cctx,
&seqPos,
&seqPos, &litConsumed,
inSeqs, nbSequences,
blockSize,
cctx->appliedParams.searchForExternalRepcodes);
RETURN_ERROR_IF(blockSize != remaining, GENERIC, "Must consume the entire block");
RETURN_ERROR_IF(litConsumed != litSize, GENERIC, "Must consume the exact amount of literals provided");
FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
/* Note: when blockSize is very small, other variant send it uncompressed.

View File

@@ -1666,17 +1666,18 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
* This can be useful if the process generating the sequences also happens to generate the buffer of literals,
* thus skipping an extraction + caching stage.
* It's essentially a speed optimization when the right conditions are met,
* but it also is restricted by the following limitations:
* but it also features the following limitations:
* - Only supports explicit delimiter mode
* - Supports 1 block only (max input 128 KB)
* - Not compatible with frame checksum, which must disabled
* - Can fail when unable to compress sufficiently
* - Can fail (return an error) when input data cannot be compress sufficiently
* Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs.
* @return : final compressed size, or a ZSTD error code.
*/
ZSTDLIB_STATIC_API size_t
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const ZSTD_Sequence* inSeqs, size_t nbSequences,
const void* literals, size_t litSize, size_t srcSize);

View File

@@ -40,7 +40,6 @@
#include "datagen.h" /* RDG_genBuffer */
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#include "xxhash.h" /* XXH64 */
#include "util.h"
#include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
/* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
#include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
@@ -339,6 +338,35 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
}
}
static size_t FUZ_getLitSize(const ZSTD_Sequence* seqs, size_t nbSeqs)
{
size_t n, litSize = 0;
assert(seqs != NULL);
for (n=0; n<nbSeqs; n++) {
litSize += seqs[n].litLength;
}
return litSize;
}
static void
FUZ_transferLiterals(void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_Sequence* seqs, size_t nbSeqs)
{
size_t n;
const char* ip = (const char*)src;
char* op = (char*)dst;
size_t const litSize = FUZ_getLitSize(seqs, nbSeqs);
assert(litSize <= dstCapacity);
for (n=0; n<nbSeqs; n++) {
size_t const ll = seqs[n].litLength;
memcpy(op, ip, ll);
op += ll;
ip += ll + seqs[n].matchLength;
}
assert((size_t)(ip - (const char*)src) == srcSize);
}
#ifdef ZSTD_MULTITHREAD
typedef struct {
@@ -3808,7 +3836,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
if (seqs == NULL) goto _output_error;
assert(cctx != NULL);
/* Populate src with random data */
/* Populate src with compressible random data */
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
/* Roundtrip Test with block delimiters generated by ZSTD_generateSequences() */
@@ -3850,6 +3878,79 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
{
const size_t srcSize = 100 KB;
const BYTE* const src = (BYTE*)CNBuffer;
BYTE* const dst = (BYTE*)compressedBuffer;
const size_t dstCapacity = ZSTD_compressBound(srcSize);
const size_t decompressSize = srcSize;
char* const decompressBuffer = (char*)malloc(decompressSize);
char* const litBuffer = (char*)malloc(decompressSize);
size_t compressedSize;
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
size_t nbSeqs;
if (litBuffer == NULL) goto _output_error;
if (decompressBuffer == NULL) goto _output_error;
if (seqs == NULL) goto _output_error;
assert(cctx != NULL);
/* Populate src with compressible random data */
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
/* Roundtrip Test using the AndLiterals() variant */
nbSeqs = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
{ size_t const litSize = FUZ_getLitSize(seqs, nbSeqs);
FUZ_transferLiterals(litBuffer, decompressSize, CNBuffer, srcSize, seqs, nbSeqs);
/* not enough literals: must fail */
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1, srcSize);
if (!ZSTD_isError(compressedSize)) {
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: not enough literals provided\n");
goto _output_error;
}
/* too many literals: must fail */
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1, srcSize);
if (!ZSTD_isError(compressedSize)) {
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: too many literals provided\n");
goto _output_error;
}
/* correct amount of literals: should compress successfully */
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize, srcSize);
if (ZSTD_isError(compressedSize)) {
DISPLAY("Error in ZSTD_compressSequencesAndLiterals()\n");
goto _output_error;
}
}
{ size_t const dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
if (ZSTD_isError(dSize)) {
DISPLAY("Error during decompression of frame produced by ZSTD_compressSequencesAndLiterals()\n");
goto _output_error;
}
if (dSize != srcSize) {
DISPLAY("Error: decompression of frame produced by ZSTD_compressSequencesAndLiterals() has different size\n");
goto _output_error;
}
if (memcmp(decompressBuffer, src, srcSize)) {
DISPLAY("Error: decompression of frame produced by ZSTD_compressSequencesAndLiterals() produces a different content (of same size)\n");
goto _output_error;
}
}
ZSTD_freeCCtx(cctx);
free(litBuffer);
free(decompressBuffer);
free(seqs);
}
DISPLAYLEVEL(3, "OK \n");
/* Multiple blocks of zeros test */
#define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);