mirror of
https://github.com/facebook/zstd.git
synced 2025-08-08 17:22:10 +03:00
added unit tests to ZSTD_compressSequencesAndLiterals()
seems to work as expected, correctly control that `litSize` and `srcSize` are exactly correct.
This commit is contained in:
@@ -1422,7 +1422,7 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
|
|||||||
This can be useful if the process generating the sequences also happens to generate the buffer of literals,
|
This can be useful if the process generating the sequences also happens to generate the buffer of literals,
|
||||||
thus skipping an extraction + caching stage.
|
thus skipping an extraction + caching stage.
|
||||||
It's essentially a speed optimization when the right conditions are met,
|
It's essentially a speed optimization when the right conditions are met,
|
||||||
but it also includes so following limitations:
|
but it also is restricted by the following limitations:
|
||||||
- Only supports explicit delimiter mode
|
- Only supports explicit delimiter mode
|
||||||
- Not compatible with frame checksum, which must disabled
|
- Not compatible with frame checksum, which must disabled
|
||||||
- Can fail when unable to compress sufficiently
|
- Can fail when unable to compress sufficiently
|
||||||
|
@@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
|||||||
*/
|
*/
|
||||||
static size_t
|
static size_t
|
||||||
ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
|
ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
|
||||||
ZSTD_SequencePosition* seqPos,
|
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
|
||||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||||
size_t blockSize,
|
size_t blockSize,
|
||||||
ZSTD_ParamSwitch_e externalRepSearch)
|
ZSTD_ParamSwitch_e externalRepSearch)
|
||||||
@@ -7114,6 +7114,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
|
|||||||
Repcodes_t updatedRepcodes;
|
Repcodes_t updatedRepcodes;
|
||||||
U32 dictSize;
|
U32 dictSize;
|
||||||
size_t startPosInSrc = seqPos->posInSrc;
|
size_t startPosInSrc = seqPos->posInSrc;
|
||||||
|
size_t litConsumed = 0;
|
||||||
|
|
||||||
DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize);
|
DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize);
|
||||||
|
|
||||||
@@ -7150,10 +7151,15 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
|
|||||||
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
||||||
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
||||||
ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength);
|
ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength);
|
||||||
|
litConsumed += litLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* last sequence (only literals) */
|
/* last sequence (only literals) */
|
||||||
seqPos->posInSrc += inSeqs[idx].litLength;
|
{ size_t const lastLitLength = inSeqs[idx].litLength;
|
||||||
|
seqPos->posInSrc += lastLitLength;
|
||||||
|
cctx->seqStore.lit += lastLitLength; /* register proper length */
|
||||||
|
litConsumed += lastLitLength;
|
||||||
|
}
|
||||||
|
|
||||||
/* blockSize must be exactly correct (checked before calling this function) */
|
/* blockSize must be exactly correct (checked before calling this function) */
|
||||||
assert((seqPos->posInSrc - startPosInSrc) == blockSize); (void)startPosInSrc;
|
assert((seqPos->posInSrc - startPosInSrc) == blockSize); (void)startPosInSrc;
|
||||||
@@ -7184,6 +7190,7 @@ ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
|
|||||||
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
|
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
|
||||||
|
|
||||||
seqPos->idx = idx+1;
|
seqPos->idx = idx+1;
|
||||||
|
*litConsumedPtr = litConsumed;
|
||||||
return blockSize;
|
return blockSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7214,21 +7221,23 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (remaining) {
|
while (remaining) {
|
||||||
size_t compressedSeqsSize;
|
size_t compressedSeqsSize, cBlockSize, litConsumed;
|
||||||
size_t cBlockSize;
|
|
||||||
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
|
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
|
||||||
cctx->blockSize, remaining,
|
cctx->blockSize, remaining,
|
||||||
inSeqs, nbSequences, seqPos);
|
inSeqs, nbSequences, seqPos);
|
||||||
U32 const lastBlock = (blockSize == remaining);
|
U32 const lastBlock = (blockSize == remaining);
|
||||||
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
||||||
|
RETURN_ERROR_IF(!lastBlock, GENERIC, "Only supports single block");
|
||||||
assert(blockSize <= remaining);
|
assert(blockSize <= remaining);
|
||||||
ZSTD_resetSeqStore(&cctx->seqStore);
|
ZSTD_resetSeqStore(&cctx->seqStore);
|
||||||
|
|
||||||
blockSize = ZSTD_transferSequencesOnly_wBlockDelim(cctx,
|
blockSize = ZSTD_transferSequencesOnly_wBlockDelim(cctx,
|
||||||
&seqPos,
|
&seqPos, &litConsumed,
|
||||||
inSeqs, nbSequences,
|
inSeqs, nbSequences,
|
||||||
blockSize,
|
blockSize,
|
||||||
cctx->appliedParams.searchForExternalRepcodes);
|
cctx->appliedParams.searchForExternalRepcodes);
|
||||||
|
RETURN_ERROR_IF(blockSize != remaining, GENERIC, "Must consume the entire block");
|
||||||
|
RETURN_ERROR_IF(litConsumed != litSize, GENERIC, "Must consume the exact amount of literals provided");
|
||||||
FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
|
FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
|
||||||
|
|
||||||
/* Note: when blockSize is very small, other variant send it uncompressed.
|
/* Note: when blockSize is very small, other variant send it uncompressed.
|
||||||
|
@@ -1666,17 +1666,18 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
|||||||
* This can be useful if the process generating the sequences also happens to generate the buffer of literals,
|
* This can be useful if the process generating the sequences also happens to generate the buffer of literals,
|
||||||
* thus skipping an extraction + caching stage.
|
* thus skipping an extraction + caching stage.
|
||||||
* It's essentially a speed optimization when the right conditions are met,
|
* It's essentially a speed optimization when the right conditions are met,
|
||||||
* but it also is restricted by the following limitations:
|
* but it also features the following limitations:
|
||||||
* - Only supports explicit delimiter mode
|
* - Only supports explicit delimiter mode
|
||||||
|
* - Supports 1 block only (max input 128 KB)
|
||||||
* - Not compatible with frame checksum, which must disabled
|
* - Not compatible with frame checksum, which must disabled
|
||||||
* - Can fail when unable to compress sufficiently
|
* - Can fail (return an error) when input data cannot be compress sufficiently
|
||||||
* Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs.
|
* Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs.
|
||||||
* @return : final compressed size, or a ZSTD error code.
|
* @return : final compressed size, or a ZSTD error code.
|
||||||
*/
|
*/
|
||||||
ZSTDLIB_STATIC_API size_t
|
ZSTDLIB_STATIC_API size_t
|
||||||
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
|
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
|
||||||
void* dst, size_t dstCapacity,
|
void* dst, size_t dstCapacity,
|
||||||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
const ZSTD_Sequence* inSeqs, size_t nbSequences,
|
||||||
const void* literals, size_t litSize, size_t srcSize);
|
const void* literals, size_t litSize, size_t srcSize);
|
||||||
|
|
||||||
|
|
||||||
|
105
tests/fuzzer.c
105
tests/fuzzer.c
@@ -40,7 +40,6 @@
|
|||||||
#include "datagen.h" /* RDG_genBuffer */
|
#include "datagen.h" /* RDG_genBuffer */
|
||||||
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
||||||
#include "xxhash.h" /* XXH64 */
|
#include "xxhash.h" /* XXH64 */
|
||||||
#include "util.h"
|
|
||||||
#include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
|
#include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
|
||||||
/* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
|
/* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
|
||||||
#include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
|
#include "zstd_internal.h" /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
|
||||||
@@ -339,6 +338,35 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t FUZ_getLitSize(const ZSTD_Sequence* seqs, size_t nbSeqs)
|
||||||
|
{
|
||||||
|
size_t n, litSize = 0;
|
||||||
|
assert(seqs != NULL);
|
||||||
|
for (n=0; n<nbSeqs; n++) {
|
||||||
|
litSize += seqs[n].litLength;
|
||||||
|
}
|
||||||
|
return litSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
FUZ_transferLiterals(void* dst, size_t dstCapacity,
|
||||||
|
const void* src, size_t srcSize,
|
||||||
|
const ZSTD_Sequence* seqs, size_t nbSeqs)
|
||||||
|
{
|
||||||
|
size_t n;
|
||||||
|
const char* ip = (const char*)src;
|
||||||
|
char* op = (char*)dst;
|
||||||
|
size_t const litSize = FUZ_getLitSize(seqs, nbSeqs);
|
||||||
|
assert(litSize <= dstCapacity);
|
||||||
|
for (n=0; n<nbSeqs; n++) {
|
||||||
|
size_t const ll = seqs[n].litLength;
|
||||||
|
memcpy(op, ip, ll);
|
||||||
|
op += ll;
|
||||||
|
ip += ll + seqs[n].matchLength;
|
||||||
|
}
|
||||||
|
assert((size_t)(ip - (const char*)src) == srcSize);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef ZSTD_MULTITHREAD
|
#ifdef ZSTD_MULTITHREAD
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -3808,7 +3836,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
if (seqs == NULL) goto _output_error;
|
if (seqs == NULL) goto _output_error;
|
||||||
assert(cctx != NULL);
|
assert(cctx != NULL);
|
||||||
|
|
||||||
/* Populate src with random data */
|
/* Populate src with compressible random data */
|
||||||
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
|
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
|
||||||
|
|
||||||
/* Roundtrip Test with block delimiters generated by ZSTD_generateSequences() */
|
/* Roundtrip Test with block delimiters generated by ZSTD_generateSequences() */
|
||||||
@@ -3850,6 +3878,79 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
}
|
}
|
||||||
DISPLAYLEVEL(3, "OK \n");
|
DISPLAYLEVEL(3, "OK \n");
|
||||||
|
|
||||||
|
DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
|
||||||
|
{
|
||||||
|
const size_t srcSize = 100 KB;
|
||||||
|
const BYTE* const src = (BYTE*)CNBuffer;
|
||||||
|
BYTE* const dst = (BYTE*)compressedBuffer;
|
||||||
|
const size_t dstCapacity = ZSTD_compressBound(srcSize);
|
||||||
|
const size_t decompressSize = srcSize;
|
||||||
|
char* const decompressBuffer = (char*)malloc(decompressSize);
|
||||||
|
char* const litBuffer = (char*)malloc(decompressSize);
|
||||||
|
size_t compressedSize;
|
||||||
|
|
||||||
|
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||||
|
ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence));
|
||||||
|
size_t nbSeqs;
|
||||||
|
|
||||||
|
if (litBuffer == NULL) goto _output_error;
|
||||||
|
if (decompressBuffer == NULL) goto _output_error;
|
||||||
|
if (seqs == NULL) goto _output_error;
|
||||||
|
assert(cctx != NULL);
|
||||||
|
|
||||||
|
/* Populate src with compressible random data */
|
||||||
|
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
|
||||||
|
|
||||||
|
/* Roundtrip Test using the AndLiterals() variant */
|
||||||
|
nbSeqs = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
|
||||||
|
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
|
||||||
|
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
|
||||||
|
{ size_t const litSize = FUZ_getLitSize(seqs, nbSeqs);
|
||||||
|
FUZ_transferLiterals(litBuffer, decompressSize, CNBuffer, srcSize, seqs, nbSeqs);
|
||||||
|
|
||||||
|
/* not enough literals: must fail */
|
||||||
|
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1, srcSize);
|
||||||
|
if (!ZSTD_isError(compressedSize)) {
|
||||||
|
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: not enough literals provided\n");
|
||||||
|
goto _output_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* too many literals: must fail */
|
||||||
|
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1, srcSize);
|
||||||
|
if (!ZSTD_isError(compressedSize)) {
|
||||||
|
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: too many literals provided\n");
|
||||||
|
goto _output_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* correct amount of literals: should compress successfully */
|
||||||
|
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize, srcSize);
|
||||||
|
if (ZSTD_isError(compressedSize)) {
|
||||||
|
DISPLAY("Error in ZSTD_compressSequencesAndLiterals()\n");
|
||||||
|
goto _output_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{ size_t const dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize);
|
||||||
|
if (ZSTD_isError(dSize)) {
|
||||||
|
DISPLAY("Error during decompression of frame produced by ZSTD_compressSequencesAndLiterals()\n");
|
||||||
|
goto _output_error;
|
||||||
|
}
|
||||||
|
if (dSize != srcSize) {
|
||||||
|
DISPLAY("Error: decompression of frame produced by ZSTD_compressSequencesAndLiterals() has different size\n");
|
||||||
|
goto _output_error;
|
||||||
|
}
|
||||||
|
if (memcmp(decompressBuffer, src, srcSize)) {
|
||||||
|
DISPLAY("Error: decompression of frame produced by ZSTD_compressSequencesAndLiterals() produces a different content (of same size)\n");
|
||||||
|
goto _output_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ZSTD_freeCCtx(cctx);
|
||||||
|
free(litBuffer);
|
||||||
|
free(decompressBuffer);
|
||||||
|
free(seqs);
|
||||||
|
}
|
||||||
|
DISPLAYLEVEL(3, "OK \n");
|
||||||
|
|
||||||
/* Multiple blocks of zeros test */
|
/* Multiple blocks of zeros test */
|
||||||
#define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
|
#define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
|
||||||
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);
|
DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH);
|
||||||
|
Reference in New Issue
Block a user