mirror of
https://github.com/facebook/zstd.git
synced 2025-08-05 19:15:58 +03:00
Merge pull request #1858 from senhuang42/dictionary_header_size
Method to get dictionary header size
This commit is contained in:
@@ -1251,7 +1251,7 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
|
|||||||
assert(cParams1.strategy == cParams2.strategy);
|
assert(cParams1.strategy == cParams2.strategy);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
|
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < ZSTD_REP_NUM; ++i)
|
for (i = 0; i < ZSTD_REP_NUM; ++i)
|
||||||
@@ -2772,37 +2772,13 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
||||||
/* Dictionary format :
|
short* offcodeNCount, unsigned* offcodeMaxValue,
|
||||||
* See :
|
const void* const dict, size_t dictSize)
|
||||||
* https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
|
|
||||||
*/
|
|
||||||
/*! ZSTD_loadZstdDictionary() :
|
|
||||||
* @return : dictID, or an error code
|
|
||||||
* assumptions : magic number supposed already checked
|
|
||||||
* dictSize supposed >= 8
|
|
||||||
*/
|
|
||||||
static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
||||||
ZSTD_matchState_t* ms,
|
|
||||||
ZSTD_cwksp* ws,
|
|
||||||
ZSTD_CCtx_params const* params,
|
|
||||||
const void* dict, size_t dictSize,
|
|
||||||
ZSTD_dictTableLoadMethod_e dtlm,
|
|
||||||
void* workspace)
|
|
||||||
{
|
{
|
||||||
const BYTE* dictPtr = (const BYTE*)dict;
|
const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */
|
||||||
const BYTE* const dictEnd = dictPtr + dictSize;
|
const BYTE* const dictEnd = dictPtr + dictSize;
|
||||||
short offcodeNCount[MaxOff+1];
|
dictPtr += 8;
|
||||||
unsigned offcodeMaxValue = MaxOff;
|
|
||||||
size_t dictID;
|
|
||||||
|
|
||||||
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
|
||||||
assert(dictSize >= 8);
|
|
||||||
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
|
|
||||||
|
|
||||||
dictPtr += 4; /* skip magic number */
|
|
||||||
dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
|
|
||||||
dictPtr += 4;
|
|
||||||
|
|
||||||
{ unsigned maxSymbolValue = 255;
|
{ unsigned maxSymbolValue = 255;
|
||||||
size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
|
size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
|
||||||
@@ -2812,7 +2788,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
{ unsigned offcodeLog;
|
{ unsigned offcodeLog;
|
||||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
||||||
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
|
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
|
||||||
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
|
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
|
||||||
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
||||||
@@ -2861,6 +2837,42 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|||||||
bs->rep[2] = MEM_readLE32(dictPtr+8);
|
bs->rep[2] = MEM_readLE32(dictPtr+8);
|
||||||
dictPtr += 12;
|
dictPtr += 12;
|
||||||
|
|
||||||
|
return dictPtr - (const BYTE*)dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dictionary format :
|
||||||
|
* See :
|
||||||
|
* https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
|
||||||
|
*/
|
||||||
|
/*! ZSTD_loadZstdDictionary() :
|
||||||
|
* @return : dictID, or an error code
|
||||||
|
* assumptions : magic number supposed already checked
|
||||||
|
* dictSize supposed >= 8
|
||||||
|
*/
|
||||||
|
static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
||||||
|
ZSTD_matchState_t* ms,
|
||||||
|
ZSTD_cwksp* ws,
|
||||||
|
ZSTD_CCtx_params const* params,
|
||||||
|
const void* dict, size_t dictSize,
|
||||||
|
ZSTD_dictTableLoadMethod_e dtlm,
|
||||||
|
void* workspace)
|
||||||
|
{
|
||||||
|
const BYTE* dictPtr = (const BYTE*)dict;
|
||||||
|
const BYTE* const dictEnd = dictPtr + dictSize;
|
||||||
|
short offcodeNCount[MaxOff+1];
|
||||||
|
unsigned offcodeMaxValue = MaxOff;
|
||||||
|
size_t dictID;
|
||||||
|
size_t eSize;
|
||||||
|
|
||||||
|
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
||||||
|
assert(dictSize >= 8);
|
||||||
|
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
|
||||||
|
|
||||||
|
dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );
|
||||||
|
eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize);
|
||||||
|
FORWARD_IF_ERROR(eSize);
|
||||||
|
dictPtr += eSize;
|
||||||
|
|
||||||
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
||||||
U32 offcodeMax = MaxOff;
|
U32 offcodeMax = MaxOff;
|
||||||
if (dictContentSize <= ((U32)-1) - 128 KB) {
|
if (dictContentSize <= ((U32)-1) - 128 KB) {
|
||||||
|
@@ -931,6 +931,21 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* ===============================================================
|
||||||
|
* Shared internal declarations
|
||||||
|
* These prototypes may be called from sources not in lib/compress
|
||||||
|
* =============================================================== */
|
||||||
|
|
||||||
|
/* ZSTD_loadCEntropy() :
|
||||||
|
* dict : must point at beginning of a valid zstd dictionary.
|
||||||
|
* return : size of dictionary header (size of magic number + dict ID + entropy tables)
|
||||||
|
* assumptions : magic number supposed already checked
|
||||||
|
* and dictSize >= 8 */
|
||||||
|
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
||||||
|
short* offcodeNCount, unsigned* offcodeMaxValue,
|
||||||
|
const void* const dict, size_t dictSize);
|
||||||
|
|
||||||
|
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
|
||||||
|
|
||||||
/* ==============================================================
|
/* ==============================================================
|
||||||
* Private declarations
|
* Private declarations
|
||||||
|
@@ -160,7 +160,7 @@ struct ZSTD_DCtx_s
|
|||||||
|
|
||||||
/*! ZSTD_loadDEntropy() :
|
/*! ZSTD_loadDEntropy() :
|
||||||
* dict : must point at beginning of a valid zstd dictionary.
|
* dict : must point at beginning of a valid zstd dictionary.
|
||||||
* @return : size of entropy tables read */
|
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
||||||
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||||
const void* const dict, size_t const dictSize);
|
const void* const dict, size_t const dictSize);
|
||||||
|
|
||||||
|
@@ -48,6 +48,7 @@
|
|||||||
# define ZDICT_STATIC_LINKING_ONLY
|
# define ZDICT_STATIC_LINKING_ONLY
|
||||||
#endif
|
#endif
|
||||||
#include "zdict.h"
|
#include "zdict.h"
|
||||||
|
#include "compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
||||||
|
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
@@ -99,6 +100,29 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
|||||||
return MEM_readLE32((const char*)dictBuffer + 4);
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
||||||
|
{
|
||||||
|
size_t headerSize;
|
||||||
|
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
||||||
|
|
||||||
|
{ unsigned offcodeMaxValue = MaxOff;
|
||||||
|
ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
||||||
|
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
||||||
|
short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short));
|
||||||
|
if (!bs || !wksp || !offcodeNCount) {
|
||||||
|
headerSize = ERROR(memory_allocation);
|
||||||
|
} else {
|
||||||
|
ZSTD_reset_compressedBlockState(bs);
|
||||||
|
headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(bs);
|
||||||
|
free(wksp);
|
||||||
|
free(offcodeNCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
return headerSize;
|
||||||
|
}
|
||||||
|
|
||||||
/*-********************************************************
|
/*-********************************************************
|
||||||
* Dictionary training functions
|
* Dictionary training functions
|
||||||
|
@@ -64,6 +64,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
|||||||
|
|
||||||
/*====== Helper functions ======*/
|
/*====== Helper functions ======*/
|
||||||
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
||||||
|
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
|
||||||
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
||||||
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
||||||
|
|
||||||
|
@@ -1159,6 +1159,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
|
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
|
||||||
size_t dictSize;
|
size_t dictSize;
|
||||||
U32 dictID;
|
U32 dictID;
|
||||||
|
size_t dictHeaderSize;
|
||||||
|
|
||||||
if (dictBuffer==NULL || samplesSizes==NULL) {
|
if (dictBuffer==NULL || samplesSizes==NULL) {
|
||||||
free(dictBuffer);
|
free(dictBuffer);
|
||||||
@@ -1248,6 +1249,29 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
if (dictID==0) goto _output_error;
|
if (dictID==0) goto _output_error;
|
||||||
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID);
|
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID);
|
||||||
|
|
||||||
|
DISPLAYLEVEL(3, "test%3i : check dict header size no error : ", testNb++);
|
||||||
|
dictHeaderSize = ZDICT_getDictHeaderSize(dictBuffer, dictSize);
|
||||||
|
if (dictHeaderSize==0) goto _output_error;
|
||||||
|
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
|
||||||
|
|
||||||
|
DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++);
|
||||||
|
{ unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
|
||||||
|
0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
|
||||||
|
0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
|
||||||
|
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||||
|
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||||
|
0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
|
||||||
|
0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
|
||||||
|
0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
|
||||||
|
0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
|
||||||
|
0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
|
||||||
|
0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 };
|
||||||
|
dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144);
|
||||||
|
if (dictHeaderSize != 115) goto _output_error;
|
||||||
|
}
|
||||||
|
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
|
||||||
|
|
||||||
DISPLAYLEVEL(3, "test%3i : compress with dictionary : ", testNb++);
|
DISPLAYLEVEL(3, "test%3i : compress with dictionary : ", testNb++);
|
||||||
cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize,
|
cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize,
|
||||||
CNBuffer, CNBuffSize,
|
CNBuffer, CNBuffSize,
|
||||||
|
Reference in New Issue
Block a user