From 22b7bff2bcdfab8ece992420bd703fb14fd0f49b Mon Sep 17 00:00:00 2001 From: senhuang42 Date: Mon, 28 Dec 2020 16:43:04 -0500 Subject: [PATCH] Add unit test, improve documentation --- lib/decompress/zstd_decompress.c | 70 +++++++++++++------------ lib/zstd.h | 30 ++++++++--- tests/fuzzer.c | 88 +++++++++++++++++++++++++++----- 3 files changed, 134 insertions(+), 54 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index f4f8269df..72abba3b9 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -73,9 +73,9 @@ -/*************************** - * Multiple DDicts Hashset * - ***************************/ +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ #define DDICT_HASHSET_MAX_LOAD_FACTOR 0.75 #define DDICT_HASHSET_TABLE_BASE_SIZE 64 @@ -85,7 +85,7 @@ * Returns an index between [0, hashSet->ddictPtrTableSize] */ static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { - U32 hash = XXH32(&dictID, sizeof(U32), 0); + U32 hash = XXH64(&dictID, sizeof(U32), 0); /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ return hash & (hashSet->ddictPtrTableSize - 1); } @@ -94,8 +94,8 @@ static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 d * Returns 0 if successful (ddict may or may not have actually been added), or a zstd error code if something went wrong */ static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, U32 dictID) { - RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); while (hashSet->ddictPtrTable[idx] != NULL) { /* Replace existing ddict if inserting ddict with same dictID */ @@ -125,7 +125,7 @@ static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMe const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; size_t oldTableSize = hashSet->ddictPtrTableSize; - size_t i = 0; + size_t i; DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); @@ -152,7 +152,6 @@ static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); if (currDictID == dictID || currDictID == 0) { /* currDictID == 0 implies a NULL ddict entry */ - DEBUGLOG(4, "Dict ID not found"); break; } else { if (idx == hashSet->ddictPtrTableSize - 1) { @@ -172,6 +171,7 @@ static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, */ static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; ret->ddictPtrCount = 0; @@ -182,10 +182,16 @@ static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { } /* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. */ static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { - if (hashSet->ddictPtrTable) ZSTD_customFree(hashSet->ddictPtrTable, customMem); - ZSTD_customFree(hashSet, customMem); + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree(hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } } /* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. @@ -229,6 +235,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; dctx->outBufferMode = ZSTD_bm_buffered; dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; } static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) @@ -248,10 +255,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); - ZSTD_DCtx_resetParameters(dctx); - dctx->validateChecksum = 1; - dctx->refMultipleDDicts = 0; dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION dctx->dictContentEndForFuzzing = NULL; #endif @@ -324,14 +329,19 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ } -/* Public function: Given a dctx with a digested frame params, re-selects the correct DDict based on - * the requested dict ID from the frame. ZSTD_d_refMultipleDDicts must be enabled for this function - * to be called. +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. */ static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { assert(dctx->refMultipleDDicts && dctx->ddictSet); DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); - { const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); if (frameDDict) { DEBUGLOG(4, "DDict found!"); ZSTD_clearDict(dctx); @@ -602,7 +612,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); /* Reference DDict requested by frame if dctx references multiple ddicts */ - if (dctx->refMultipleDDicts && dctx->ddictSet) { + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { ZSTD_DCtx_selectFrameDDict(dctx); } @@ -727,6 +737,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) return bound; } + /*-************************************************************* * Frame decoding ***************************************************************/ @@ -1549,7 +1560,13 @@ size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) if (ddict) { dctx->ddict = ddict; dctx->dictUses = ZSTD_use_indefinitely; - if (dctx->refMultipleDDicts && dctx->ddictSet) { + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, ZSTD_getDictID_fromDDict(ddict), dctx->customMem), ""); @@ -1600,8 +1617,8 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.upperBound = (int)ZSTD_d_ignoreChecksum; return bounds; case ZSTD_d_refMultipleDDicts: - bounds.lowerBound = (int)ZSTD_d_refSingleDict; - bounds.upperBound = (int)ZSTD_d_refMultipleDicts; + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; return bounds; default:; } @@ -1671,23 +1688,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value return 0; case ZSTD_d_refMultipleDDicts: CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); - DEBUGLOG(4, "Referencing multiple ddicts param enabled"); if (dctx->staticSize != 0) { RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); } dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; - if (dctx->refMultipleDDicts == ZSTD_d_refMultipleDicts && dctx->ddictSet == NULL) { - DEBUGLOG(4, "Allocating new hash set"); - dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); - if (!dctx->ddictSet) { - RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); - } - } else { - if (dctx->ddictSet) { - ZSTD_freeDDictHashSet(dctx->ddictSet, dctx->customMem); - dctx->ddictSet = NULL; - } - } return 0; default:; } diff --git a/lib/zstd.h b/lib/zstd.h index ede9c5785..884a2f54a 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1001,6 +1001,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s /*! ZSTD_DCtx_refDDict() : * Reference a prepared dictionary, to be used to decompress next frames. * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Note 1 : Currently, only one dictionary can be managed. * Referencing a new dictionary effectively "discards" any previous one. @@ -1209,8 +1216,8 @@ typedef enum { typedef enum { /* Note: this enum controls ZSTD_d_refMultipleDDicts */ - ZSTD_d_refSingleDict = 0, - ZSTD_d_refMultipleDicts = 1, + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 } ZSTD_refMultipleDDicts_e; typedef enum { @@ -2014,13 +2021,20 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param * * If enabled and dctx is allocated on the heap, then additional memory will be allocated * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() - * using a given ZSTD_DCtx, rather than overwriting the previous DCtx referenced, will - * store all references, and at decompression time, the appropriate dictID is selected - * from the set of DDicts based on the dictID in the frame. + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. * - * WARNING: Enabling this parameter will trigger memory allocation for the hash table, and disabling - * this parameter will free the memory allocated for the hashtable. Memory is allocated as per - * ZSTD_DCtx::customMem. + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. */ #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 7e3b4628e..a67113edd 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1770,6 +1770,19 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t dictSize; U32 dictID; size_t dictHeaderSize; + size_t dictBufferFixedSize = 144; + unsigned char const dictBufferFixed[144] = {0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f, + 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01, + 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18, + 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c, + 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61, + 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65, + 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69}; if (dictBuffer==NULL || samplesSizes==NULL) { free(dictBuffer); @@ -1865,19 +1878,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++); - { unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f, - 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01, - 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, - 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18, - 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c, - 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04, - 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61, - 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65, - 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 }; - dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144); + { dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, dictBufferFixedSize); if (dictHeaderSize != 115) goto _output_error; } DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); @@ -2331,6 +2332,67 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with multiple ddicts : ", testNb++); + { + const size_t numDicts = 128; + const size_t numFrames = 4; + size_t i; + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZSTD_DDict** ddictTable = (ZSTD_DDict**)malloc(sizeof(ZSTD_DDict*)*numDicts); + ZSTD_CDict** cdictTable = (ZSTD_CDict**)malloc(sizeof(ZSTD_CDict*)*numDicts); + U32 dictIDSeed = seed; + /* Create new compressed buffer that will hold frames with differing dictIDs */ + char* dictBufferMulti = (char*)malloc(sizeof(char) * dictBufferFixedSize); /* Modifiable copy of fixed full dict buffer */ + + ZSTD_memcpy(dictBufferMulti, dictBufferFixed, dictBufferFixedSize); + cSize = 0; + /* Create a bunch of DDicts with random dict IDs */ + for (i = 0; i < numDicts; ++i) { + U32 currDictID = FUZ_rand(&dictIDSeed); + MEM_writeLE32(dictBufferMulti+ZSTD_FRAMEIDSIZE, currDictID); + ddictTable[i] = ZSTD_createDDict(dictBufferMulti, dictBufferFixedSize); + cdictTable[i] = ZSTD_createCDict(dictBufferMulti, dictBufferFixedSize, 3); + if (!ddictTable[i] || !cdictTable[i] || ZSTD_getDictID_fromCDict(cdictTable[i]) != ZSTD_getDictID_fromDDict(ddictTable[i])) { + goto _output_error; + } + } + /* Compress a few frames using random CDicts */ + { + size_t off = 0; + /* only use the first half so we don't push against size limit of compressedBuffer */ + size_t const segSize = (CNBuffSize / 2) / numFrames; + for (i = 0; i < numFrames; i++) { + size_t dictIdx = FUZ_rand(&dictIDSeed) % numDicts; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + { CHECK_NEWV(r, ZSTD_compress_usingCDict(cctx, + (BYTE*)compressedBuffer + off, CNBuffSize - off, + (BYTE*)CNBuffer + segSize * (size_t)i, segSize, + cdictTable[dictIdx])); + off += r; + } + } + cSize = off; + } + + /* We should succeed to decompression even though different dicts were used on different frames */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ZSTD_DCtx_setParameter(dctx, ZSTD_d_refMultipleDDicts, ZSTD_rmd_refMultipleDDicts); + /* Reference every single ddict we made */ + for (i = 0; i < numDicts; ++i) { + CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddictTable[i])); + } + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + ZSTD_freeDCtx(dctx); + for (i = 0; i < numDicts; ++i) { + ZSTD_freeCDict(cdictTable[i]); + ZSTD_freeDDict(ddictTable[i]); + } + free(dictBufferMulti); + free(ddictTable); + free(cdictTable); + } + DISPLAYLEVEL(3, "OK \n"); + ZSTD_freeCCtx(cctx); free(dictBuffer); free(samplesSizes);