1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-05 19:15:58 +03:00

added comments to better understand enforceMaxDist()

This commit is contained in:
Yann Collet
2019-05-28 13:15:48 -07:00
parent 21bb78e908
commit 4baecdf72a
3 changed files with 79 additions and 58 deletions

View File

@@ -1282,8 +1282,8 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
} }
/*! ZSTD_invalidateMatchState() /*! ZSTD_invalidateMatchState()
* Invalidate all the matches in the match finder tables. * Invalidate all the matches in the match finder tables.
* Requires nextSrc and base to be set (can be NULL). * Requires nextSrc and base to be set (can be NULL).
*/ */
static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
{ {
@@ -1587,15 +1587,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
* handled in _enforceMaxDist */ * handled in _enforceMaxDist */
} }
static size_t ZSTD_resetCCtx_byAttachingCDict( static size_t
ZSTD_CCtx* cctx, ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict, const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, ZSTD_CCtx_params params,
U64 pledgedSrcSize, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff) ZSTD_buffered_policy_e zbuff)
{ {
{ { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
unsigned const windowLog = params.cParams.windowLog; unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0); assert(windowLog != 0);
/* Resize working context table params for input only, since the dict /* Resize working context table params for input only, since the dict
@@ -1607,8 +1606,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
} }
{ { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
- cdict->matchState.window.base); - cdict->matchState.window.base);
const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
if (cdictLen == 0) { if (cdictLen == 0) {
@@ -1625,9 +1623,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
cctx->blockState.matchState.window.base + cdictEnd; cctx->blockState.matchState.window.base + cdictEnd;
ZSTD_window_clear(&cctx->blockState.matchState.window); ZSTD_window_clear(&cctx->blockState.matchState.window);
} }
/* loadedDictEnd is expressed within the referential of the active context */
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
} } }
}
cctx->dictID = cdict->dictID; cctx->dictID = cdict->dictID;
@@ -2844,7 +2842,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart; BYTE* op = ostart;
U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
assert(cctx->appliedParams.cParams.windowLog <= 31); assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
if (cctx->appliedParams.fParams.checksumFlag && srcSize) if (cctx->appliedParams.fParams.checksumFlag && srcSize)
@@ -2899,7 +2897,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
} } } }
if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
return op-ostart; return (size_t)(op-ostart);
} }
@@ -2991,6 +2989,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
cctx->pledgedSrcSizePlusOne-1, cctx->dictID); cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
FORWARD_IF_ERROR(fhSize); FORWARD_IF_ERROR(fhSize);
assert(fhSize <= dstCapacity);
dstCapacity -= fhSize; dstCapacity -= fhSize;
dst = (char*)dst + fhSize; dst = (char*)dst + fhSize;
cctx->stage = ZSTDcs_ongoing; cctx->stage = ZSTDcs_ongoing;

View File

@@ -33,13 +33,13 @@ extern "C" {
***************************************/ ***************************************/
#define kSearchStrength 8 #define kSearchStrength 8
#define HASH_READ_SIZE 8 #define HASH_READ_SIZE 8
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor. It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again. It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost. Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
/*-************************************* /*-*************************************
@@ -128,13 +128,13 @@ typedef struct {
BYTE const* base; /* All regular indexes relative to this position */ BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */ BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */ U32 dictLimit; /* below that point, need extDict */
U32 lowLimit; /* below that point, no more data */ U32 lowLimit; /* below that point, no more valid data */
} ZSTD_window_t; } ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t; typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t { struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */ ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary */ U32 loadedDictEnd; /* index of end of dictionary, within dictionary's referential. Only used for attached dictionaries. Effectively same value as dictSize, since dictionary indexes start a zero */
U32 nextToUpdate; /* index from which to continue table update */ U32 nextToUpdate; /* index from which to continue table update */
U32 nextToUpdate3; /* index from which to continue table update */ U32 nextToUpdate3; /* index from which to continue table update */
U32 hashLog3; /* dispatch table : larger == faster, more memory */ U32 hashLog3; /* dispatch table : larger == faster, more memory */
@@ -675,31 +675,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* Updates lowLimit so that: * Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
* *
* This allows a simple check that index >= lowLimit to see if index is valid. * It ensures index is valid as long as index >= lowLimit.
* This must be called before a block compression call, with srcEnd as the block * This must be called before a block compression call.
* source end.
* *
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. * loadedDictEnd is only defined if a dictionary is in use for current compression.
* This is because dictionaries are allowed to be referenced as long as the last * As the name implies, loadedDictEnd represents the index at end of dictionary.
* byte of the dictionary is in the window, but once they are out of range, * The value lies within context's referential, it can be directly compared to blockEndIdx.
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
* loadedDictEnd == 0.
* *
* In normal dict mode, the dict is between lowLimit and dictLimit. In * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
* is below them. forceWindow and dictMatchState are therefore incompatible. * This is because dictionaries are allowed to be referenced fully
* as long as the last byte of the dictionary is in the window.
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
*
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
* In dictMatchState mode, lowLimit and dictLimit are the same,
* and the dictionary is below them.
* forceWindow and dictMatchState are therefore incompatible.
*/ */
MEM_STATIC void MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window, ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
void const* srcEnd, const void* blockEnd,
U32 maxDist, U32 maxDist,
U32* loadedDictEndPtr, U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr) const ZSTD_matchState_t** dictMatchStatePtr)
{ {
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base); U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist); (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
/* - When there is no dictionary : loadedDictEnd == 0.
In which case, the test (blockEndIdx > maxDist) is merely to avoid
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
- When there is a standard dictionary :
Index referential is copied from the dictionary,
which means it starts from 0.
In which case, loadedDictEnd == dictSize,
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
since `blockEndIdx` also starts from zero.
- When there is an attached dictionary :
loadedDictEnd is expressed within the referential of the context,
so it can be directly compared against blockEndIdx.
*/
if (blockEndIdx > maxDist + loadedDictEnd) { if (blockEndIdx > maxDist + loadedDictEnd) {
U32 const newLowLimit = blockEndIdx - maxDist; U32 const newLowLimit = blockEndIdx - maxDist;
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@@ -708,10 +726,9 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
(unsigned)window->dictLimit, (unsigned)window->lowLimit); (unsigned)window->dictLimit, (unsigned)window->lowLimit);
window->dictLimit = window->lowLimit; window->dictLimit = window->lowLimit;
} }
if (loadedDictEndPtr) /* On reaching window size, dictionaries are invalidated */
*loadedDictEndPtr = 0; if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
*dictMatchStatePtr = NULL;
} }
} }

View File

@@ -62,10 +62,12 @@ static U32 g_displayLevel = 2;
static const U64 g_refreshRate = SEC_TO_MICRO / 6; static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ #define DISPLAYUPDATE(l, ...) \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ if (g_displayLevel>=l) { \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
if (g_displayLevel>=4) fflush(stderr); } } { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (g_displayLevel>=4) fflush(stderr); } \
}
/*-******************************************************* /*-*******************************************************
@@ -73,7 +75,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
*********************************************************/ *********************************************************/
#undef MIN #undef MIN
#undef MAX #undef MAX
/* Declaring the function is it isn't unused */ /* Declaring the function, to avoid -Wmissing-prototype */
void FUZ_bug976(void); void FUZ_bug976(void);
void FUZ_bug976(void) void FUZ_bug976(void)
{ /* these constants shall not depend on MIN() macro */ { /* these constants shall not depend on MIN() macro */
@@ -247,7 +249,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
/* advanced MT streaming API test */ /* advanced MT streaming API test */
if (part <= 4) if (part <= 4)
{ unsigned nbThreads; { int nbThreads;
for (nbThreads=1; nbThreads<=4; nbThreads++) { for (nbThreads=1; nbThreads<=4; nbThreads++) {
int compressionLevel; int compressionLevel;
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
@@ -261,7 +263,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) ); CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) );
while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {} while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {}
ZSTD_freeCCtx(cctx); ZSTD_freeCCtx(cctx);
DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ", DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ",
nbThreads, compressionLevel); nbThreads, compressionLevel);
FUZ_displayMallocStats(malcount); FUZ_displayMallocStats(malcount);
} } } } } }
@@ -768,13 +770,11 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1); DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1);
{ size_t const r = ZSTD_compressBegin(staticCCtx, 1); CHECK( ZSTD_compressBegin(staticCCtx, 1) );
if (ZSTD_isError(r)) goto _output_error; }
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1); DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1);
{ size_t const r = ZSTD_initCStream(staticCCtx, 1); CHECK( ZSTD_initCStream(staticCCtx, 1) );
if (ZSTD_isError(r)) goto _output_error; }
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++); DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++);
@@ -1059,7 +1059,7 @@ static int basicUnitTests(U32 seed, double compressibility)
/* Dictionary and dictBuilder tests */ /* Dictionary and dictBuilder tests */
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx(); { ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const dictBufferCapacity = 16 KB; size_t const dictBufferCapacity = 16 KB;
void* dictBuffer = malloc(dictBufferCapacity); void* const dictBuffer = malloc(dictBufferCapacity);
size_t const totalSampleSize = 1 MB; size_t const totalSampleSize = 1 MB;
size_t const sampleUnitSize = 8 KB; size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
@@ -1164,6 +1164,7 @@ static int basicUnitTests(U32 seed, double compressibility)
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
ZSTD_dlm_byRef, ZSTD_dct_auto, ZSTD_dlm_byRef, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem); cParams, ZSTD_defaultCMem);
assert(cdict != NULL);
DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict)); DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict));
cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize, cdict); CNBuffer, CNBuffSize, cdict);
@@ -1221,8 +1222,11 @@ static int basicUnitTests(U32 seed, double compressibility)
{ ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ }; { ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem);
cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize, assert(cdict != NULL);
CNBuffer, CNBuffSize, cdict, fParams); cSize = ZSTD_compress_usingCDict_advanced(cctx,
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize,
cdict, fParams);
ZSTD_freeCDict(cdict); ZSTD_freeCDict(cdict);
if (ZSTD_isError(cSize)) goto _output_error; if (ZSTD_isError(cSize)) goto _output_error;
} }
@@ -1235,7 +1239,8 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "OK (unknown)\n"); DISPLAYLEVEL(3, "OK (unknown)\n");
DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++); DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++);
{ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL); { ZSTD_DCtx* const dctx = ZSTD_createDCtx();
assert(dctx != NULL);
CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
decodedBuffer, CNBuffSize, decodedBuffer, CNBuffSize,
compressedBuffer, cSize, compressedBuffer, cSize,