1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-07 06:23:00 +03:00

added comments to better understand enforceMaxDist()

This commit is contained in:
Yann Collet
2019-05-28 13:15:48 -07:00
parent 21bb78e908
commit 4baecdf72a
3 changed files with 79 additions and 58 deletions

View File

@@ -1282,8 +1282,8 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
}
/*! ZSTD_invalidateMatchState()
* Invalidate all the matches in the match finder tables.
* Requires nextSrc and base to be set (can be NULL).
* Invalidate all the matches in the match finder tables.
* Requires nextSrc and base to be set (can be NULL).
*/
static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
{
@@ -1587,15 +1587,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
* handled in _enforceMaxDist */
}
static size_t ZSTD_resetCCtx_byAttachingCDict(
ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
static size_t
ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
{
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
{ const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0);
/* Resize working context table params for input only, since the dict
@@ -1607,8 +1606,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
}
{
const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
{ const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
- cdict->matchState.window.base);
const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
if (cdictLen == 0) {
@@ -1625,9 +1623,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
cctx->blockState.matchState.window.base + cdictEnd;
ZSTD_window_clear(&cctx->blockState.matchState.window);
}
/* loadedDictEnd is expressed within the referential of the active context */
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
}
}
} }
cctx->dictID = cdict->dictID;
@@ -2844,7 +2842,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
assert(cctx->appliedParams.cParams.windowLog <= 31);
assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
if (cctx->appliedParams.fParams.checksumFlag && srcSize)
@@ -2899,7 +2897,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
} }
if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
return op-ostart;
return (size_t)(op-ostart);
}
@@ -2991,6 +2989,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
FORWARD_IF_ERROR(fhSize);
assert(fhSize <= dstCapacity);
dstCapacity -= fhSize;
dst = (char*)dst + fhSize;
cctx->stage = ZSTDcs_ongoing;

View File

@@ -33,13 +33,13 @@ extern "C" {
***************************************/
#define kSearchStrength 8
#define HASH_READ_SIZE 8
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
/*-*************************************
@@ -128,13 +128,13 @@ typedef struct {
BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */
U32 lowLimit; /* below that point, no more data */
U32 lowLimit; /* below that point, no more valid data */
} ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary */
U32 loadedDictEnd; /* index of end of dictionary, within dictionary's referential. Only used for attached dictionaries. Effectively same value as dictSize, since dictionary indexes start a zero */
U32 nextToUpdate; /* index from which to continue table update */
U32 nextToUpdate3; /* index from which to continue table update */
U32 hashLog3; /* dispatch table : larger == faster, more memory */
@@ -675,31 +675,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
*
* This allows a simple check that index >= lowLimit to see if index is valid.
* This must be called before a block compression call, with srcEnd as the block
* source end.
* It ensures index is valid as long as index >= lowLimit.
* This must be called before a block compression call.
*
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
* This is because dictionaries are allowed to be referenced as long as the last
* byte of the dictionary is in the window, but once they are out of range,
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
* loadedDictEnd == 0.
* loadedDictEnd is only defined if a dictionary is in use for current compression.
* As the name implies, loadedDictEnd represents the index at end of dictionary.
* The value lies within context's referential, it can be directly compared to blockEndIdx.
*
* In normal dict mode, the dict is between lowLimit and dictLimit. In
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
* is below them. forceWindow and dictMatchState are therefore incompatible.
* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
* This is because dictionaries are allowed to be referenced fully
* as long as the last byte of the dictionary is in the window.
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
*
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
* In dictMatchState mode, lowLimit and dictLimit are the same,
* and the dictionary is below them.
* forceWindow and dictMatchState are therefore incompatible.
*/
MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
void const* srcEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
(unsigned)blockEndIdx, (unsigned)maxDist);
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
/* - When there is no dictionary : loadedDictEnd == 0.
In which case, the test (blockEndIdx > maxDist) is merely to avoid
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
- When there is a standard dictionary :
Index referential is copied from the dictionary,
which means it starts from 0.
In which case, loadedDictEnd == dictSize,
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
since `blockEndIdx` also starts from zero.
- When there is an attached dictionary :
loadedDictEnd is expressed within the referential of the context,
so it can be directly compared against blockEndIdx.
*/
if (blockEndIdx > maxDist + loadedDictEnd) {
U32 const newLowLimit = blockEndIdx - maxDist;
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@@ -708,10 +726,9 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
window->dictLimit = window->lowLimit;
}
if (loadedDictEndPtr)
*loadedDictEndPtr = 0;
if (dictMatchStatePtr)
*dictMatchStatePtr = NULL;
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
}

View File

@@ -62,10 +62,12 @@ static U32 g_displayLevel = 2;
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (g_displayLevel>=4) fflush(stderr); } }
#define DISPLAYUPDATE(l, ...) \
if (g_displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (g_displayLevel>=4) fflush(stderr); } \
}
/*-*******************************************************
@@ -73,7 +75,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
*********************************************************/
#undef MIN
#undef MAX
/* Declaring the function is it isn't unused */
/* Declaring the function, to avoid -Wmissing-prototype */
void FUZ_bug976(void);
void FUZ_bug976(void)
{ /* these constants shall not depend on MIN() macro */
@@ -247,7 +249,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
/* advanced MT streaming API test */
if (part <= 4)
{ unsigned nbThreads;
{ int nbThreads;
for (nbThreads=1; nbThreads<=4; nbThreads++) {
int compressionLevel;
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
@@ -261,7 +263,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) );
while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {}
ZSTD_freeCCtx(cctx);
DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ",
DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ",
nbThreads, compressionLevel);
FUZ_displayMallocStats(malcount);
} } }
@@ -768,13 +770,11 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1);
{ size_t const r = ZSTD_compressBegin(staticCCtx, 1);
if (ZSTD_isError(r)) goto _output_error; }
CHECK( ZSTD_compressBegin(staticCCtx, 1) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1);
{ size_t const r = ZSTD_initCStream(staticCCtx, 1);
if (ZSTD_isError(r)) goto _output_error; }
CHECK( ZSTD_initCStream(staticCCtx, 1) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++);
@@ -1059,7 +1059,7 @@ static int basicUnitTests(U32 seed, double compressibility)
/* Dictionary and dictBuilder tests */
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const dictBufferCapacity = 16 KB;
void* dictBuffer = malloc(dictBufferCapacity);
void* const dictBuffer = malloc(dictBufferCapacity);
size_t const totalSampleSize = 1 MB;
size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
@@ -1164,6 +1164,7 @@ static int basicUnitTests(U32 seed, double compressibility)
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
ZSTD_dlm_byRef, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
assert(cdict != NULL);
DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict));
cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize, cdict);
@@ -1221,8 +1222,11 @@ static int basicUnitTests(U32 seed, double compressibility)
{ ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem);
cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize, cdict, fParams);
assert(cdict != NULL);
cSize = ZSTD_compress_usingCDict_advanced(cctx,
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
cdict, fParams);
ZSTD_freeCDict(cdict);
if (ZSTD_isError(cSize)) goto _output_error;
}
@@ -1235,7 +1239,8 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "OK (unknown)\n");
DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
assert(dctx != NULL);
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
decodedBuffer, CNBuffSize,
compressedBuffer, cSize,