diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2e163c8bf..bd13eebb0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1282,8 +1282,8 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) } /*! ZSTD_invalidateMatchState() - * Invalidate all the matches in the match finder tables. - * Requires nextSrc and base to be set (can be NULL). + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). */ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) { @@ -1587,15 +1587,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, * handled in _enforceMaxDist */ } -static size_t ZSTD_resetCCtx_byAttachingCDict( - ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) { - { - const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Resize working context table params for input only, since the dict @@ -1607,8 +1606,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict( assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); } - { - const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc - cdict->matchState.window.base); const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; if (cdictLen == 0) { @@ -1625,9 +1623,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict( cctx->blockState.matchState.window.base + cdictEnd; ZSTD_window_clear(&cctx->blockState.matchState.window); } + /* loadedDictEnd is expressed within the referential of the active context */ cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; - } - } + } } cctx->dictID = cdict->dictID; @@ -2844,7 +2842,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; - assert(cctx->appliedParams.cParams.windowLog <= 31); + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); if (cctx->appliedParams.fParams.checksumFlag && srcSize) @@ -2899,7 +2897,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, } } if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; - return op-ostart; + return (size_t)(op-ostart); } @@ -2991,6 +2989,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, cctx->pledgedSrcSizePlusOne-1, cctx->dictID); FORWARD_IF_ERROR(fhSize); + assert(fhSize <= dstCapacity); dstCapacity -= fhSize; dst = (char*)dst + fhSize; cctx->stage = ZSTDcs_ongoing; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index cc3cbb9da..cbf15136e 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -33,13 +33,13 @@ extern "C" { ***************************************/ #define kSearchStrength 8 #define HASH_READ_SIZE 8 -#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". It could be confused for a real successor at index "1", if sorted as larger than its predecessor. It's not a big deal though : candidate will just be sorted again. Additionally, candidate position 1 will be lost. But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy - Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ /*-************************************* @@ -128,13 +128,13 @@ typedef struct { BYTE const* base; /* All regular indexes relative to this position */ BYTE const* dictBase; /* extDict indexes relative to this position */ U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more data */ + U32 lowLimit; /* below that point, no more valid data */ } ZSTD_window_t; typedef struct ZSTD_matchState_t ZSTD_matchState_t; struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary */ + U32 loadedDictEnd; /* index of end of dictionary, within dictionary's referential. Only used for attached dictionaries. Effectively same value as dictSize, since dictionary indexes start a zero */ U32 nextToUpdate; /* index from which to continue table update */ U32 nextToUpdate3; /* index from which to continue table update */ U32 hashLog3; /* dispatch table : larger == faster, more memory */ @@ -675,31 +675,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * Updates lowLimit so that: * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd * - * This allows a simple check that index >= lowLimit to see if index is valid. - * This must be called before a block compression call, with srcEnd as the block - * source end. + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. * - * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. - * This is because dictionaries are allowed to be referenced as long as the last - * byte of the dictionary is in the window, but once they are out of range, - * they cannot be referenced. If loadedDictEndPtr is NULL, we use - * loadedDictEnd == 0. + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. * - * In normal dict mode, the dict is between lowLimit and dictLimit. In - * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary - * is below them. forceWindow and dictMatchState are therefore incompatible. + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. */ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, - void const* srcEnd, - U32 maxDist, - U32* loadedDictEndPtr, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, const ZSTD_matchState_t** dictMatchStatePtr) { - U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base); - U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; - DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", - (unsigned)blockEndIdx, (unsigned)maxDist); + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ if (blockEndIdx > maxDist + loadedDictEnd) { U32 const newLowLimit = blockEndIdx - maxDist; if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; @@ -708,10 +726,9 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window, (unsigned)window->dictLimit, (unsigned)window->lowLimit); window->dictLimit = window->lowLimit; } - if (loadedDictEndPtr) - *loadedDictEndPtr = 0; - if (dictMatchStatePtr) - *dictMatchStatePtr = NULL; + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; } } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 1a31c78e2..1fe488a25 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -62,10 +62,12 @@ static U32 g_displayLevel = 2; static const U64 g_refreshRate = SEC_TO_MICRO / 6; static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; -#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ - if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ - { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ - if (g_displayLevel>=4) fflush(stderr); } } +#define DISPLAYUPDATE(l, ...) \ + if (g_displayLevel>=l) { \ + if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \ + { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); } \ + } /*-******************************************************* @@ -73,7 +75,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; *********************************************************/ #undef MIN #undef MAX -/* Declaring the function is it isn't unused */ +/* Declaring the function, to avoid -Wmissing-prototype */ void FUZ_bug976(void); void FUZ_bug976(void) { /* these constants shall not depend on MIN() macro */ @@ -247,7 +249,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig /* advanced MT streaming API test */ if (part <= 4) - { unsigned nbThreads; + { int nbThreads; for (nbThreads=1; nbThreads<=4; nbThreads++) { int compressionLevel; for (compressionLevel=1; compressionLevel<=6; compressionLevel++) { @@ -261,7 +263,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) ); while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {} ZSTD_freeCCtx(cctx); - DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ", + DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ", nbThreads, compressionLevel); FUZ_displayMallocStats(malcount); } } } @@ -768,13 +770,11 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1); - { size_t const r = ZSTD_compressBegin(staticCCtx, 1); - if (ZSTD_isError(r)) goto _output_error; } + CHECK( ZSTD_compressBegin(staticCCtx, 1) ); DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1); - { size_t const r = ZSTD_initCStream(staticCCtx, 1); - if (ZSTD_isError(r)) goto _output_error; } + CHECK( ZSTD_initCStream(staticCCtx, 1) ); DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++); @@ -1059,7 +1059,7 @@ static int basicUnitTests(U32 seed, double compressibility) /* Dictionary and dictBuilder tests */ { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); size_t const dictBufferCapacity = 16 KB; - void* dictBuffer = malloc(dictBufferCapacity); + void* const dictBuffer = malloc(dictBufferCapacity); size_t const totalSampleSize = 1 MB; size_t const sampleUnitSize = 8 KB; U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); @@ -1164,6 +1164,7 @@ static int basicUnitTests(U32 seed, double compressibility) ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); + assert(cdict != NULL); DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict)); cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize, cdict); @@ -1221,8 +1222,11 @@ static int basicUnitTests(U32 seed, double compressibility) { ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ }; ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); - cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize, - CNBuffer, CNBuffSize, cdict, fParams); + assert(cdict != NULL); + cSize = ZSTD_compress_usingCDict_advanced(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize, + cdict, fParams); ZSTD_freeCDict(cdict); if (ZSTD_isError(cSize)) goto _output_error; } @@ -1235,7 +1239,8 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "OK (unknown)\n"); DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++); - { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL); + { ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + assert(dctx != NULL); CHECKPLUS(r, ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize,