mirror of
https://github.com/facebook/zstd.git
synced 2025-08-07 06:23:00 +03:00
added comments to better understand enforceMaxDist()
This commit is contained in:
@@ -1282,8 +1282,8 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
|
||||
}
|
||||
|
||||
/*! ZSTD_invalidateMatchState()
|
||||
* Invalidate all the matches in the match finder tables.
|
||||
* Requires nextSrc and base to be set (can be NULL).
|
||||
* Invalidate all the matches in the match finder tables.
|
||||
* Requires nextSrc and base to be set (can be NULL).
|
||||
*/
|
||||
static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
|
||||
{
|
||||
@@ -1587,15 +1587,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
|
||||
* handled in _enforceMaxDist */
|
||||
}
|
||||
|
||||
static size_t ZSTD_resetCCtx_byAttachingCDict(
|
||||
ZSTD_CCtx* cctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params,
|
||||
U64 pledgedSrcSize,
|
||||
ZSTD_buffered_policy_e zbuff)
|
||||
static size_t
|
||||
ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_CCtx_params params,
|
||||
U64 pledgedSrcSize,
|
||||
ZSTD_buffered_policy_e zbuff)
|
||||
{
|
||||
{
|
||||
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
|
||||
{ const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
|
||||
unsigned const windowLog = params.cParams.windowLog;
|
||||
assert(windowLog != 0);
|
||||
/* Resize working context table params for input only, since the dict
|
||||
@@ -1607,8 +1606,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
|
||||
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
|
||||
}
|
||||
|
||||
{
|
||||
const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
|
||||
{ const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
|
||||
- cdict->matchState.window.base);
|
||||
const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
|
||||
if (cdictLen == 0) {
|
||||
@@ -1625,9 +1623,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
|
||||
cctx->blockState.matchState.window.base + cdictEnd;
|
||||
ZSTD_window_clear(&cctx->blockState.matchState.window);
|
||||
}
|
||||
/* loadedDictEnd is expressed within the referential of the active context */
|
||||
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
cctx->dictID = cdict->dictID;
|
||||
|
||||
@@ -2844,7 +2842,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* op = ostart;
|
||||
U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
|
||||
assert(cctx->appliedParams.cParams.windowLog <= 31);
|
||||
assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
|
||||
if (cctx->appliedParams.fParams.checksumFlag && srcSize)
|
||||
@@ -2899,7 +2897,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
} }
|
||||
|
||||
if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
|
||||
return op-ostart;
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
|
||||
@@ -2991,6 +2989,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
||||
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
|
||||
cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
|
||||
FORWARD_IF_ERROR(fhSize);
|
||||
assert(fhSize <= dstCapacity);
|
||||
dstCapacity -= fhSize;
|
||||
dst = (char*)dst + fhSize;
|
||||
cctx->stage = ZSTDcs_ongoing;
|
||||
|
@@ -33,13 +33,13 @@ extern "C" {
|
||||
***************************************/
|
||||
#define kSearchStrength 8
|
||||
#define HASH_READ_SIZE 8
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
|
||||
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
||||
It's not a big deal though : candidate will just be sorted again.
|
||||
Additionally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
|
||||
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
|
||||
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@@ -128,13 +128,13 @@ typedef struct {
|
||||
BYTE const* base; /* All regular indexes relative to this position */
|
||||
BYTE const* dictBase; /* extDict indexes relative to this position */
|
||||
U32 dictLimit; /* below that point, need extDict */
|
||||
U32 lowLimit; /* below that point, no more data */
|
||||
U32 lowLimit; /* below that point, no more valid data */
|
||||
} ZSTD_window_t;
|
||||
|
||||
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
||||
struct ZSTD_matchState_t {
|
||||
ZSTD_window_t window; /* State for window round buffer management */
|
||||
U32 loadedDictEnd; /* index of end of dictionary */
|
||||
U32 loadedDictEnd; /* index of end of dictionary, within dictionary's referential. Only used for attached dictionaries. Effectively same value as dictSize, since dictionary indexes start a zero */
|
||||
U32 nextToUpdate; /* index from which to continue table update */
|
||||
U32 nextToUpdate3; /* index from which to continue table update */
|
||||
U32 hashLog3; /* dispatch table : larger == faster, more memory */
|
||||
@@ -675,31 +675,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
* Updates lowLimit so that:
|
||||
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
|
||||
*
|
||||
* This allows a simple check that index >= lowLimit to see if index is valid.
|
||||
* This must be called before a block compression call, with srcEnd as the block
|
||||
* source end.
|
||||
* It ensures index is valid as long as index >= lowLimit.
|
||||
* This must be called before a block compression call.
|
||||
*
|
||||
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
|
||||
* This is because dictionaries are allowed to be referenced as long as the last
|
||||
* byte of the dictionary is in the window, but once they are out of range,
|
||||
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
|
||||
* loadedDictEnd == 0.
|
||||
* loadedDictEnd is only defined if a dictionary is in use for current compression.
|
||||
* As the name implies, loadedDictEnd represents the index at end of dictionary.
|
||||
* The value lies within context's referential, it can be directly compared to blockEndIdx.
|
||||
*
|
||||
* In normal dict mode, the dict is between lowLimit and dictLimit. In
|
||||
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
|
||||
* is below them. forceWindow and dictMatchState are therefore incompatible.
|
||||
* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
|
||||
* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
|
||||
* This is because dictionaries are allowed to be referenced fully
|
||||
* as long as the last byte of the dictionary is in the window.
|
||||
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
|
||||
*
|
||||
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
|
||||
* In dictMatchState mode, lowLimit and dictLimit are the same,
|
||||
* and the dictionary is below them.
|
||||
* forceWindow and dictMatchState are therefore incompatible.
|
||||
*/
|
||||
MEM_STATIC void
|
||||
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||
void const* srcEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const void* blockEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||
{
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
|
||||
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist);
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
||||
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
||||
|
||||
/* - When there is no dictionary : loadedDictEnd == 0.
|
||||
In which case, the test (blockEndIdx > maxDist) is merely to avoid
|
||||
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
|
||||
- When there is a standard dictionary :
|
||||
Index referential is copied from the dictionary,
|
||||
which means it starts from 0.
|
||||
In which case, loadedDictEnd == dictSize,
|
||||
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
|
||||
since `blockEndIdx` also starts from zero.
|
||||
- When there is an attached dictionary :
|
||||
loadedDictEnd is expressed within the referential of the context,
|
||||
so it can be directly compared against blockEndIdx.
|
||||
*/
|
||||
if (blockEndIdx > maxDist + loadedDictEnd) {
|
||||
U32 const newLowLimit = blockEndIdx - maxDist;
|
||||
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
|
||||
@@ -708,10 +726,9 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
|
||||
window->dictLimit = window->lowLimit;
|
||||
}
|
||||
if (loadedDictEndPtr)
|
||||
*loadedDictEndPtr = 0;
|
||||
if (dictMatchStatePtr)
|
||||
*dictMatchStatePtr = NULL;
|
||||
/* On reaching window size, dictionaries are invalidated */
|
||||
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
|
||||
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -62,10 +62,12 @@ static U32 g_displayLevel = 2;
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (g_displayLevel>=4) fflush(stderr); } }
|
||||
#define DISPLAYUPDATE(l, ...) \
|
||||
if (g_displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (g_displayLevel>=4) fflush(stderr); } \
|
||||
}
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
@@ -73,7 +75,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
*********************************************************/
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
/* Declaring the function is it isn't unused */
|
||||
/* Declaring the function, to avoid -Wmissing-prototype */
|
||||
void FUZ_bug976(void);
|
||||
void FUZ_bug976(void)
|
||||
{ /* these constants shall not depend on MIN() macro */
|
||||
@@ -247,7 +249,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
|
||||
|
||||
/* advanced MT streaming API test */
|
||||
if (part <= 4)
|
||||
{ unsigned nbThreads;
|
||||
{ int nbThreads;
|
||||
for (nbThreads=1; nbThreads<=4; nbThreads++) {
|
||||
int compressionLevel;
|
||||
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
|
||||
@@ -261,7 +263,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
|
||||
CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) );
|
||||
while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {}
|
||||
ZSTD_freeCCtx(cctx);
|
||||
DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ",
|
||||
DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ",
|
||||
nbThreads, compressionLevel);
|
||||
FUZ_displayMallocStats(malcount);
|
||||
} } }
|
||||
@@ -768,13 +770,11 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1);
|
||||
{ size_t const r = ZSTD_compressBegin(staticCCtx, 1);
|
||||
if (ZSTD_isError(r)) goto _output_error; }
|
||||
CHECK( ZSTD_compressBegin(staticCCtx, 1) );
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1);
|
||||
{ size_t const r = ZSTD_initCStream(staticCCtx, 1);
|
||||
if (ZSTD_isError(r)) goto _output_error; }
|
||||
CHECK( ZSTD_initCStream(staticCCtx, 1) );
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++);
|
||||
@@ -1059,7 +1059,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
/* Dictionary and dictBuilder tests */
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
size_t const dictBufferCapacity = 16 KB;
|
||||
void* dictBuffer = malloc(dictBufferCapacity);
|
||||
void* const dictBuffer = malloc(dictBufferCapacity);
|
||||
size_t const totalSampleSize = 1 MB;
|
||||
size_t const sampleUnitSize = 8 KB;
|
||||
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
|
||||
@@ -1164,6 +1164,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
|
||||
ZSTD_dlm_byRef, ZSTD_dct_auto,
|
||||
cParams, ZSTD_defaultCMem);
|
||||
assert(cdict != NULL);
|
||||
DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict));
|
||||
cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
|
||||
CNBuffer, CNBuffSize, cdict);
|
||||
@@ -1221,8 +1222,11 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
{ ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem);
|
||||
cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize,
|
||||
CNBuffer, CNBuffSize, cdict, fParams);
|
||||
assert(cdict != NULL);
|
||||
cSize = ZSTD_compress_usingCDict_advanced(cctx,
|
||||
compressedBuffer, compressedBufferSize,
|
||||
CNBuffer, CNBuffSize,
|
||||
cdict, fParams);
|
||||
ZSTD_freeCDict(cdict);
|
||||
if (ZSTD_isError(cSize)) goto _output_error;
|
||||
}
|
||||
@@ -1235,7 +1239,8 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
DISPLAYLEVEL(3, "OK (unknown)\n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++);
|
||||
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
|
||||
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
assert(dctx != NULL);
|
||||
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
|
||||
decodedBuffer, CNBuffSize,
|
||||
compressedBuffer, cSize,
|
||||
|
Reference in New Issue
Block a user