From 3c36a7f13aa8ec698e044b623ef8d2c3624a0030 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 29 Oct 2019 16:45:11 -0400 Subject: [PATCH 01/15] Add ZDICT_getHeaderSize() --- lib/dictBuilder/zdict.c | 12 ++++++++++++ lib/dictBuilder/zdict.h | 1 + 2 files changed, 13 insertions(+) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 1e7f83432..863a8edf7 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -48,6 +48,7 @@ # define ZDICT_STATIC_LINKING_ONLY #endif #include "zdict.h" +#include "decompress/zstd_decompress_internal.h" /* ZSTD_entropyDTables_t */ /*-************************************* @@ -99,6 +100,17 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) return MEM_readLE32((const char*)dictBuffer + 4); } +size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) +{ + if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; + + { ZSTD_entropyDTables_t dummyEntropyTables; + size_t headerSize; + dummyEntropyTables.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); + headerSize = ZSTD_loadDEntropy(&dummyEntropyTables, dictBuffer, dictSize); + return ZSTD_isError(headerSize) ? 0 : headerSize; + } +} /*-******************************************************** * Dictionary training functions diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index 37978ecdf..f16d573a9 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -64,6 +64,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap /*====== Helper functions ======*/ ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ +ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns zero if error (not a valid dictionary) */ ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); From e1edc554a36fc4e3d006fe456a4799b8513202c0 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Sun, 3 Nov 2019 17:44:28 -0500 Subject: [PATCH 02/15] Added 2 unit tests: one for sanity, one for correctnesson fixed dict --- tests/fuzzer.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 88f3b83f8..9be92471e 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1137,6 +1137,7 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); size_t dictSize; U32 dictID; + U32 dictHeaderSize; if (dictBuffer==NULL || samplesSizes==NULL) { free(dictBuffer); @@ -1226,6 +1227,29 @@ static int basicUnitTests(U32 const seed, double compressibility) if (dictID==0) goto _output_error; DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID); + DISPLAYLEVEL(3, "test%3i : check dict header size no error : ", testNb++); + dictHeaderSize = ZDICT_getDictHeaderSize(dictBuffer, dictSize); + if (dictHeaderSize==0) goto _output_error; + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); + + DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++); + { unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f, + 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01, + 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18, + 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c, + 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61, + 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65, + 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 }; + dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144); + if (dictHeaderSize != 115) goto _output_error; + } + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); + DISPLAYLEVEL(3, "test%3i : compress with dictionary : ", testNb++); cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize, From 97b7f712f3590bf6b81d2211c6a5f65f81b22ae4 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Mon, 4 Nov 2019 14:33:52 -0500 Subject: [PATCH 03/15] Change to heap allocation, remove implicit type conversion --- lib/dictBuilder/zdict.c | 9 +++++---- tests/fuzzer.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 863a8edf7..32f863660 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -104,10 +104,11 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) { if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; - { ZSTD_entropyDTables_t dummyEntropyTables; - size_t headerSize; - dummyEntropyTables.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); - headerSize = ZSTD_loadDEntropy(&dummyEntropyTables, dictBuffer, dictSize); + { size_t headerSize; + ZSTD_entropyDTables_t* dummyEntropyTables = (ZSTD_entropyDTables_t*)malloc(sizeof(ZSTD_entropyDTables_t)); + dummyEntropyTables->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); + headerSize = ZSTD_loadDEntropy(dummyEntropyTables, dictBuffer, dictSize); + free(dummyEntropyTables); return ZSTD_isError(headerSize) ? 0 : headerSize; } } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 9be92471e..9a05daab7 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1137,7 +1137,7 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); size_t dictSize; U32 dictID; - U32 dictHeaderSize; + size_t dictHeaderSize; if (dictBuffer==NULL || samplesSizes==NULL) { free(dictBuffer); From 341e0641ed5ebc1b3ac9695b28e2b711408fadbc Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Mon, 4 Nov 2019 16:13:52 -0500 Subject: [PATCH 04/15] Checks malloc() for failure, returns 0 if so --- lib/dictBuilder/zdict.c | 3 +++ lib/dictBuilder/zdict.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 32f863660..6db57b144 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -106,6 +106,9 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) { size_t headerSize; ZSTD_entropyDTables_t* dummyEntropyTables = (ZSTD_entropyDTables_t*)malloc(sizeof(ZSTD_entropyDTables_t)); + if (!dummyEntropyTables) { + return 0; + } dummyEntropyTables->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); headerSize = ZSTD_loadDEntropy(dummyEntropyTables, dictBuffer, dictSize); free(dummyEntropyTables); diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index f16d573a9..bb89f1f9f 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -64,7 +64,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap /*====== Helper functions ======*/ ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ -ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns zero if error (not a valid dictionary) */ +ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns zero if error (not a valid dictionary or mem alloc failure) */ ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); From 84404cff6efc767dcce0833d4a9eb9c742ad2301 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 5 Nov 2019 15:07:07 -0500 Subject: [PATCH 05/15] Move decompress symbols into zstd_internal.h, remove dependency --- lib/common/zstd_internal.h | 28 +++++++++++++++++++++++ lib/decompress/zstd_decompress_internal.h | 23 ------------------- lib/dictBuilder/zdict.c | 2 +- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index dcdcbdb81..db3ce1fc1 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -251,6 +251,34 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e /*-******************************************* * Private declarations *********************************************/ +/** + * ZSTD_seqSymbol, ZSTD_entropyDTables_t, ZSTD_loadDEntropy(), and SEQSYMBOL_TABLE_SIZE() + * are used by ZDICT_getDictHeaderSize() and various functions in zstd_decompress.h + */ + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + +#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; +} ZSTD_entropyDTables_t; + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read (includes 8-byte magic number and dictionary ID) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + typedef struct seqDef_s { U32 offset; U16 litLength; diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index ccbdfa090..1a71e2364 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -64,23 +64,6 @@ static const U32 ML_base[MaxML+1] = { U32 tableLog; } ZSTD_seqSymbol_header; - typedef struct { - U16 nextState; - BYTE nbAdditionalBits; - BYTE nbBits; - U32 baseValue; - } ZSTD_seqSymbol; - - #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) - -typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U32 rep[ZSTD_REP_NUM]; -} ZSTD_entropyDTables_t; - typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, @@ -158,12 +141,6 @@ struct ZSTD_DCtx_s * Shared internal functions *********************************************************/ -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of entropy tables read */ -size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize); - /*! ZSTD_checkContinuity() : * check if next `dst` follows previous position, where decompression ended. * If yes, do nothing (continue on current segment). diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 6db57b144..7ea0a2d48 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -48,7 +48,6 @@ # define ZDICT_STATIC_LINKING_ONLY #endif #include "zdict.h" -#include "decompress/zstd_decompress_internal.h" /* ZSTD_entropyDTables_t */ /*-************************************* @@ -109,6 +108,7 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) if (!dummyEntropyTables) { return 0; } + dummyEntropyTables->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); headerSize = ZSTD_loadDEntropy(dummyEntropyTables, dictBuffer, dictSize); free(dummyEntropyTables); From 4b141b63e009eeaf3d6d2624e3a9a81692f40ebb Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 09:29:15 -0500 Subject: [PATCH 06/15] Revert "Move decompress symbols into zstd_internal.h, remove dependency" This reverts commit a152b4c67a5266f611db4a2eac4a79003852a795. --- lib/common/zstd_internal.h | 28 ----------------------- lib/decompress/zstd_decompress_internal.h | 23 +++++++++++++++++++ lib/dictBuilder/zdict.c | 2 +- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index db3ce1fc1..dcdcbdb81 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -251,34 +251,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e /*-******************************************* * Private declarations *********************************************/ -/** - * ZSTD_seqSymbol, ZSTD_entropyDTables_t, ZSTD_loadDEntropy(), and SEQSYMBOL_TABLE_SIZE() - * are used by ZDICT_getDictHeaderSize() and various functions in zstd_decompress.h - */ - - typedef struct { - U16 nextState; - BYTE nbAdditionalBits; - BYTE nbBits; - U32 baseValue; - } ZSTD_seqSymbol; - -#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) - -typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U32 rep[ZSTD_REP_NUM]; -} ZSTD_entropyDTables_t; - -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of entropy tables read (includes 8-byte magic number and dictionary ID) */ -size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize); - typedef struct seqDef_s { U32 offset; U16 litLength; diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 1a71e2364..ccbdfa090 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -64,6 +64,23 @@ static const U32 ML_base[MaxML+1] = { U32 tableLog; } ZSTD_seqSymbol_header; + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; +} ZSTD_entropyDTables_t; + typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, @@ -141,6 +158,12 @@ struct ZSTD_DCtx_s * Shared internal functions *********************************************************/ +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + /*! ZSTD_checkContinuity() : * check if next `dst` follows previous position, where decompression ended. * If yes, do nothing (continue on current segment). diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 7ea0a2d48..6db57b144 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -48,6 +48,7 @@ # define ZDICT_STATIC_LINKING_ONLY #endif #include "zdict.h" +#include "decompress/zstd_decompress_internal.h" /* ZSTD_entropyDTables_t */ /*-************************************* @@ -108,7 +109,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) if (!dummyEntropyTables) { return 0; } - dummyEntropyTables->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); headerSize = ZSTD_loadDEntropy(dummyEntropyTables, dictBuffer, dictSize); free(dummyEntropyTables); From 0bcaf6db089d529ce7cb73c0511dace1e8d3be8b Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 10:43:43 -0500 Subject: [PATCH 07/15] First working pass at refactor of loadZstdDictionary() --- lib/compress/zstd_compress.c | 72 ++++++++++++++++----------- lib/compress/zstd_compress_internal.h | 10 ++++ 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9299b0754..0ebbf1926 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2768,37 +2768,12 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym return 0; } - -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : dictID, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed >= 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + short* offcodeNCount, unsigned* offcodeMaxValue, + const void* const dict, size_t dictSize) { const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize; - short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff; - size_t dictID; - - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); - - dictPtr += 4; /* skip magic number */ - dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); - dictPtr += 4; + const BYTE* const dictEnd = dictPtr + dictSize - 8; { unsigned maxSymbolValue = 255; size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); @@ -2808,7 +2783,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, } { unsigned offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ @@ -2856,6 +2831,43 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, bs->rep[1] = MEM_readLE32(dictPtr+4); bs->rep[2] = MEM_readLE32(dictPtr+8); dictPtr += 12; + DEBUGLOG(1, "size %u)", (unsigned)(dictPtr - (const BYTE*)dict)); + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed > 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + size_t dictID; + size_t eSize; + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1< 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); + + dictPtr += 4; /* skip magic number */ + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); + dictPtr += 4; + + dictPtr += eSize - 8; /* size of header + magic number already accounted for */ { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); U32 offcodeMax = MaxOff; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 14036f873..62ee3f9bc 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -930,7 +930,17 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) #if defined (__cplusplus) } #endif +/* =============================================================== + * Public declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of entropy tables read */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + short* offcodeNCount, unsigned* offcodeMaxValue, + const void* const dict, size_t dictSize); /* ============================================================== * Private declarations From 04fb42b4f35b256b61108dcf0167cf21d81fcfbe Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 11:32:08 -0500 Subject: [PATCH 08/15] Integrated refactor into getDictHeaderSize, now passes tests --- lib/compress/zstd_compress.c | 10 ++++++---- lib/dictBuilder/zdict.c | 19 ++++++++++++------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0ebbf1926..3c04718e3 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2772,8 +2772,8 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, short* offcodeNCount, unsigned* offcodeMaxValue, const void* const dict, size_t dictSize) { - const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize - 8; + const BYTE* dictPtr = (const BYTE*)dict + 8; + const BYTE* const dictEnd = dictPtr + dictSize; { unsigned maxSymbolValue = 255; size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); @@ -2852,15 +2852,17 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_dictTableLoadMethod_e dtlm, void* workspace) { - size_t dictID; - size_t eSize; const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff; + size_t dictID; + size_t eSize; + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1< 8); assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); dictPtr += 4; /* skip magic number */ diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 6db57b144..6d76fb521 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -48,7 +48,7 @@ # define ZDICT_STATIC_LINKING_ONLY #endif #include "zdict.h" -#include "decompress/zstd_decompress_internal.h" /* ZSTD_entropyDTables_t */ +#include "compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */ /*-************************************* @@ -105,14 +105,19 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; { size_t headerSize; - ZSTD_entropyDTables_t* dummyEntropyTables = (ZSTD_entropyDTables_t*)malloc(sizeof(ZSTD_entropyDTables_t)); - if (!dummyEntropyTables) { + unsigned offcodeMaxValue = MaxOff; + ZSTD_compressedBlockState_t* dummyBs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); + short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short)); + if (!dummyBs || !wksp) { return 0; } - dummyEntropyTables->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); - headerSize = ZSTD_loadDEntropy(dummyEntropyTables, dictBuffer, dictSize); - free(dummyEntropyTables); - return ZSTD_isError(headerSize) ? 0 : headerSize; + + headerSize = ZSTD_loadCEntropy(dummyBs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); + free(dummyBs); + free(wksp); + free(offcodeNCount); + return headerSize; } } From c787b351ea37dc248a982a68ac5302b21f5cb962 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 11:46:25 -0500 Subject: [PATCH 09/15] Use ZSTD Error codes, improve explanation of ZSTD_loadCEntropy() and ZSTD_loadDEntropy() --- lib/compress/zstd_compress.c | 4 ++-- lib/compress/zstd_compress_internal.h | 4 ++-- lib/decompress/zstd_decompress_internal.h | 2 +- lib/dictBuilder/zdict.c | 8 ++++---- lib/dictBuilder/zdict.h | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3c04718e3..edc238b8b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2772,7 +2772,7 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, short* offcodeNCount, unsigned* offcodeMaxValue, const void* const dict, size_t dictSize) { - const BYTE* dictPtr = (const BYTE*)dict + 8; + const BYTE* dictPtr = (const BYTE*)dict + 8; /* skip magic num and dict ID */ const BYTE* const dictEnd = dictPtr + dictSize; { unsigned maxSymbolValue = 255; @@ -2869,7 +2869,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); dictPtr += 4; - dictPtr += eSize - 8; /* size of header + magic number already accounted for */ + dictPtr += eSize - 8; { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); U32 offcodeMax = MaxOff; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 62ee3f9bc..0811ccf9a 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -931,13 +931,13 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) } #endif /* =============================================================== - * Public declarations + * Shared internal declarations * These prototypes may be called from sources not in lib/compress * =============================================================== */ /* ZSTD_loadCEntropy() : * dict : must point at beginning of a valid zstd dictionary. - * return : size of entropy tables read */ + * return : size of dictionary header (size of magic number + dict ID + entropy tables) */ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, short* offcodeNCount, unsigned* offcodeMaxValue, const void* const dict, size_t dictSize); diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index ccbdfa090..99eab854c 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -160,7 +160,7 @@ struct ZSTD_DCtx_s /*! ZSTD_loadDEntropy() : * dict : must point at beginning of a valid zstd dictionary. - * @return : size of entropy tables read */ + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize); diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 6d76fb521..1c0915fe3 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -102,22 +102,22 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) { - if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; + if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); { size_t headerSize; unsigned offcodeMaxValue = MaxOff; ZSTD_compressedBlockState_t* dummyBs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short)); - if (!dummyBs || !wksp) { - return 0; + if (!dummyBs || !wksp || !offcodeNCount) { + return ERROR(memory_allocation); } headerSize = ZSTD_loadCEntropy(dummyBs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); free(dummyBs); free(wksp); free(offcodeNCount); - return headerSize; + return headerSize; /* this may be an error value if ZSTD_loadCEntropy() encountered an error */ } } diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index bb89f1f9f..1313bd214 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -64,7 +64,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap /*====== Helper functions ======*/ ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ -ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns zero if error (not a valid dictionary or mem alloc failure) */ +ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */ ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); From 4a61aaf368022e9fda91e9eb277dec8307b4e162 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 11:47:47 -0500 Subject: [PATCH 10/15] Remove redundant comment --- lib/dictBuilder/zdict.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 1c0915fe3..6d2bfd544 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -117,7 +117,7 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) free(dummyBs); free(wksp); free(offcodeNCount); - return headerSize; /* this may be an error value if ZSTD_loadCEntropy() encountered an error */ + return headerSize; } } From 6ce335371bf4249bc827efbf070ef1f51ee78b1a Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 13:58:35 -0500 Subject: [PATCH 11/15] Add error forwarding to loadCEntropy(), make check for dictSize >= 8 from bad merge --- lib/compress/zstd_compress.c | 19 ++++++++----------- lib/compress/zstd_compress_internal.h | 5 ++++- lib/dictBuilder/zdict.c | 15 +++++++++++---- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index edc238b8b..b99f91fed 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2775,6 +2775,10 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, const BYTE* dictPtr = (const BYTE*)dict + 8; /* skip magic num and dict ID */ const BYTE* const dictEnd = dictPtr + dictSize; + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + { unsigned maxSymbolValue = 255; size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); @@ -2831,7 +2835,7 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, bs->rep[1] = MEM_readLE32(dictPtr+4); bs->rep[2] = MEM_readLE32(dictPtr+8); dictPtr += 12; - DEBUGLOG(1, "size %u)", (unsigned)(dictPtr - (const BYTE*)dict)); + return dictPtr - (const BYTE*)dict; } @@ -2859,17 +2863,10 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, size_t dictID; size_t eSize; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1< 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); - + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); - - dictPtr += 4; /* skip magic number */ - dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); - dictPtr += 4; - - dictPtr += eSize - 8; + FORWARD_IF_ERROR(eSize); + dictPtr += eSize; { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); U32 offcodeMax = MaxOff; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 0811ccf9a..48bfea64a 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -930,6 +930,7 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) #if defined (__cplusplus) } #endif + /* =============================================================== * Shared internal declarations * These prototypes may be called from sources not in lib/compress @@ -937,7 +938,9 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) /* ZSTD_loadCEntropy() : * dict : must point at beginning of a valid zstd dictionary. - * return : size of dictionary header (size of magic number + dict ID + entropy tables) */ + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, short* offcodeNCount, unsigned* offcodeMaxValue, const void* const dict, size_t dictSize); diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 6d2bfd544..499a309c5 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -106,15 +106,22 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) { size_t headerSize; unsigned offcodeMaxValue = MaxOff; - ZSTD_compressedBlockState_t* dummyBs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short)); - if (!dummyBs || !wksp || !offcodeNCount) { + if (!bs || !wksp || !offcodeNCount) { return ERROR(memory_allocation); } - headerSize = ZSTD_loadCEntropy(dummyBs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); - free(dummyBs); + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; + headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); + free(bs); free(wksp); free(offcodeNCount); return headerSize; From b39149e15674ddfe0956850a6394ef48ace94af8 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 14:07:21 -0500 Subject: [PATCH 12/15] Expose ZSTD_reset_compressedBlockState() to shared API --- lib/compress/zstd_compress.c | 6 +++--- lib/compress/zstd_compress_internal.h | 2 ++ lib/dictBuilder/zdict.c | 10 ++-------- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b99f91fed..ccd7af3df 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1248,7 +1248,7 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, assert(cParams1.strategy == cParams2.strategy); } -static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) @@ -2835,7 +2835,7 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, bs->rep[1] = MEM_readLE32(dictPtr+4); bs->rep[2] = MEM_readLE32(dictPtr+8); dictPtr += 12; - + return dictPtr - (const BYTE*)dict; } @@ -2846,7 +2846,7 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, /*! ZSTD_loadZstdDictionary() : * @return : dictID, or an error code * assumptions : magic number supposed already checked - * dictSize supposed > 8 + * dictSize supposed >= 8 */ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 48bfea64a..28c46e895 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -945,6 +945,8 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, short* offcodeNCount, unsigned* offcodeMaxValue, const void* const dict, size_t dictSize); +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); + /* ============================================================== * Private declarations * These prototypes shall only be called from within lib/compress diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 499a309c5..fc01c90f3 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -112,14 +112,8 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) if (!bs || !wksp || !offcodeNCount) { return ERROR(memory_allocation); } - - int i; - for (i = 0; i < ZSTD_REP_NUM; ++i) - bs->rep[i] = repStartValue[i]; - bs->entropy.huf.repeatMode = HUF_repeat_none; - bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; - bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; - bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; + + ZSTD_reset_compressedBlockState(bs); headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); free(bs); free(wksp); From d06b90692b7826db8b6f7258fb4bef77a1fbb442 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 14:08:55 -0500 Subject: [PATCH 13/15] Move asserts to loadZstdDictionary() --- lib/compress/zstd_compress.c | 8 ++++---- lib/dictBuilder/zdict.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ccd7af3df..247470e5d 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2775,10 +2775,6 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, const BYTE* dictPtr = (const BYTE*)dict + 8; /* skip magic num and dict ID */ const BYTE* const dictEnd = dictPtr + dictSize; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); - { unsigned maxSymbolValue = 255; size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); @@ -2863,6 +2859,10 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, size_t dictID; size_t eSize; + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); FORWARD_IF_ERROR(eSize); diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index fc01c90f3..de8576af3 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -112,7 +112,7 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) if (!bs || !wksp || !offcodeNCount) { return ERROR(memory_allocation); } - + ZSTD_reset_compressedBlockState(bs); headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); free(bs); From d9c475f3b3e7e22d8f78d2b9c501ccf875d8d81f Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Thu, 7 Nov 2019 16:24:55 -0500 Subject: [PATCH 14/15] Fix static analyze error, use proper bounds for dictEnd --- lib/compress/zstd_compress.c | 3 ++- lib/dictBuilder/zdict.c | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 247470e5d..89c34d5e1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2772,8 +2772,9 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, short* offcodeNCount, unsigned* offcodeMaxValue, const void* const dict, size_t dictSize) { - const BYTE* dictPtr = (const BYTE*)dict + 8; /* skip magic num and dict ID */ + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; { unsigned maxSymbolValue = 255; size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index de8576af3..9cee71beb 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -107,14 +107,15 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) { size_t headerSize; unsigned offcodeMaxValue = MaxOff; ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + if (!bs) return ERROR(memory_allocation); U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); + if (!wksp) return ERROR(memory_allocation); short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short)); - if (!bs || !wksp || !offcodeNCount) { - return ERROR(memory_allocation); - } + if (!offcodeNCount) return ERROR(memory_allocation); ZSTD_reset_compressedBlockState(bs); headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); + free(bs); free(wksp); free(offcodeNCount); From c85d10d0ea5b4aebcf5cb4f2abb253f6de860658 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Fri, 8 Nov 2019 11:20:57 -0500 Subject: [PATCH 15/15] Remove mixed declarations --- lib/dictBuilder/zdict.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 9cee71beb..344ab446b 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -102,25 +102,26 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) { + size_t headerSize; if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); - { size_t headerSize; - unsigned offcodeMaxValue = MaxOff; + { unsigned offcodeMaxValue = MaxOff; ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); - if (!bs) return ERROR(memory_allocation); U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); - if (!wksp) return ERROR(memory_allocation); short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short)); - if (!offcodeNCount) return ERROR(memory_allocation); + if (!bs || !wksp || !offcodeNCount) { + headerSize = ERROR(memory_allocation); + } else { + ZSTD_reset_compressedBlockState(bs); + headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); + } - ZSTD_reset_compressedBlockState(bs); - headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize); - free(bs); free(wksp); free(offcodeNCount); - return headerSize; } + + return headerSize; } /*-********************************************************