1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-30 22:23:13 +03:00

added ZSTD_dictMode_e to control dictionary loading mode

This commit is contained in:
Yann Collet
2017-06-21 11:50:33 -07:00
parent e51d51bdf7
commit 7bd1a2900e
9 changed files with 177 additions and 111 deletions

View File

@ -341,7 +341,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
typedef void (*ZSTD_freeFunction) (void* opaque, void* address);
typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
</b>/* use this constant to defer to stdlib's functions */<b>
static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL};
static const ZSTD_customMem ZSTD_defaultCMem = { NULL, NULL, NULL };
</pre></b><BR>
<a name="Chapter12"></a><h2>Frame size functions</h2><pre></pre>
@ -461,14 +461,18 @@ size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference);
It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict
</p></pre><BR>
<pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
<pre><b>typedef enum { ZSTD_dm_auto=0, ZSTD_dm_rawContent, ZSTD_dm_fullDict } ZSTD_dictMode_e;
</b></pre><BR>
<pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
unsigned byReference, ZSTD_dictMode_e dictMode,
ZSTD_compressionParameters cParams, ZSTD_customMem customMem);
</b><p> Create a ZSTD_CDict using external alloc and free, and customized compression parameters
</p></pre><BR>
<pre><b>ZSTD_CDict* ZSTD_initStaticCDict(
void* workspace, size_t workspaceSize,
const void* dict, size_t dictSize, unsigned byReference,
const void* dict, size_t dictSize,
unsigned byReference, ZSTD_dictMode_e dictMode,
ZSTD_compressionParameters cParams);
</b><p> Generate a digested dictionary in provided memory area.
workspace: The memory area to emplace the dictionary into.
@ -743,7 +747,6 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
* Special: value 0 means "do not change cLevel". */
ZSTD_p_windowLog, </b>/* Maximum allowed back-reference distance, expressed as power of 2.<b>
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
* default value : set through compressionLevel.
* Special: value 0 means "do not change windowLog". */
ZSTD_p_hashLog, </b>/* Size of the probe table, as a power of 2.<b>
* Resulting table size is (1 << (hashLog+2)).
@ -791,10 +794,10 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
</b>/* dictionary parameters */<b>
ZSTD_p_refDictContent=300, </b>/* Content of dictionary content will be referenced, instead of copied (default:0).<b>
* This avoids duplicating dictionary content.
* But it also requires that dictionary buffer outlives its user (CDict) */
</b>/* Not ready yet ! */<b>
ZSTD_p_rawContentDict, </b>/* load dictionary in "content-only" mode (no header analysis) (default:0) */<b>
</b>/* question : should there be an option to load dictionary only in zstd format, rejecting others with an error code ? */<b>
* But it also requires that dictionary buffer outlives its users */
</b>/* Not ready yet ! <=================================== */<b>
ZSTD_p_dictMode, </b>/* Select how dictionary must be interpreted. Value must be from type ZSTD_dictMode_e.<b>
* default : 0==auto : dictionary will be "full" if it respects specification, otherwise it will be "rawContent" */
</b>/* multi-threading parameters */<b>
ZSTD_p_nbThreads=400, </b>/* Select how many threads a compression job can spawn (default:1)<b>
@ -810,7 +813,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
</b>/* advanced parameters - may not remain available after API update */<b>
ZSTD_p_forceMaxWindow=1100, </b>/* Force back-references to remain < windowSize,<b>
* even when referencing into Dictionary content
* even when referencing into Dictionary content.
* default : 0 when using a CDict, 1 when using a Prefix */
} ZSTD_cParameter;
</b></pre><BR>
@ -835,12 +838,12 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
</b><p> Create an internal CDict from dict buffer.
Decompression will have to use same buffer.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
Special : Adding a NULL (or 0-size) dictionary invalidates any previous prefix,
Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
meaning "return to no-dictionary mode".
Note 1 : Dictionary content will be copied internally,
except if ZSTD_p_refDictContent is set.
Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
For this reason, compression parameters cannot be changed anymore after loading a prefix.
For this reason, compression parameters cannot be changed anymore after loading a dictionary.
It's also a CPU-heavy operation, with non-negligible impact on latency.
Note 3 : Dictionary will be used for all future compression jobs.
To return to "no-dictionary" situation, load a NULL dictionary
@ -859,7 +862,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
</p></pre><BR>
<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); </b>/* Not ready yet ! */<b>
<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); </b>/* Not ready yet ! <===================================== */<b>
</b><p> Reference a prefix (content-only dictionary) to bootstrap next compression job.
Decompression will have to use same prefix.
Prefix is only used once. Tables are discarded at end of compression job.
@ -882,15 +885,15 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
</b><p> Behave about the same as ZSTD_compressStream. To note :
- Compression parameters are pushed into CCtx before starting compression, using ZSTD_setCCtxParameter()
- Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_setParameter()
- Compression parameters cannot be changed once compression is started.
- *dstPos must be <= dstCapacity, *srcPos must be <= srcSize
- *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit.
- @return provides the minimum amount of data still to flush from internal buffers
or an error code, which can be tested using ZSTD_isError().
if @return != 0, flush is not fully completed, and must be called again to empty internal buffers.
if @return != 0, flush is not fully completed, there is some data left within internal buffers.
- after a ZSTD_e_end directive, if internal buffer is not fully flushed,
only ZSTD_e_end and ZSTD_e_flush operations are allowed.
only ZSTD_e_end or ZSTD_e_flush operations are allowed.
It is necessary to fully flush internal buffers
before starting a new compression job, or changing compression parameters.
@ -918,20 +921,6 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
</p></pre><BR>
<pre><b>ZSTD_CDict* ZSTD_CDict_createEmpty(void); </b>/* Not ready yet ! */<b>
size_t ZSTD_CDict_setParameter(ZSTD_CDict* cdict, ZSTD_cParameter param, unsigned value); </b>/* Not ready yet ! */<b>
size_t ZSTD_CDict_loadDictionary(ZSTD_CDict* cdict, const void* dict, size_t dictSize); </b>/* Not ready yet ! */<b>
</b><p> Create a CDict object which is still mutable after creation.
It's the only one case allowing usage of ZSTD_CDict_setParameter().
Once all compression parameters are selected,
it's possible to load the target dictionary, using ZSTD_CDict_loadDictionary().
Dictionary content will be copied internally (except if ZSTD_p_refDictContent is set).
After loading the dictionary, no more change is possible.
The only remaining operation is to free CDict object.
Note : An unfinished CDict behaves the same as a NULL CDict if referenced into a CCtx.
</p></pre><BR>
<a name="Chapter20"></a><h2>Block functions</h2><pre>
Block functions produce and decode raw zstd blocks, without frame metadata.
Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).

View File

@ -305,6 +305,7 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);
typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
#if 0
/*! ZSTD_compressBegin_internal() :
* innermost initialization function. Private use only.
* expects params to be valid.
@ -315,7 +316,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict,
ZSTD_parameters params, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff);
#endif
/*! ZSTD_initCStream_internal() :
* Private use only. Init streaming operation.

View File

@ -88,7 +88,7 @@ struct ZSTD_CCtx_s {
U32 hashLog3; /* dispatch table : larger == faster, more memory */
U32 loadedDictEnd; /* index of end of dictionary */
U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
ZSTD_dictMode_e dictMode; /* select restricting dictionary to "rawContent" or "fullDict" only */
ZSTD_compressionStage_e stage;
U32 rep[ZSTD_REP_NUM];
U32 repToConfirm[ZSTD_REP_NUM];
@ -163,13 +163,13 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
ZSTD_CCtx* cctx = (ZSTD_CCtx*) workspace;
if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
memset(workspace, 0, workspaceSize);
memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */
cctx->staticSize = workspaceSize;
cctx->workSpace = (void*)(cctx+1);
cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
/* entropy space (never moves) */
/* note : this code should be shared with resetCCtx, instead of copied */
/* note : this code should be shared with resetCCtx, rather than copy/pasted */
{ void* ptr = cctx->workSpace;
cctx->hufCTable = (HUF_CElt*)ptr;
ptr = (char*)cctx->hufCTable + hufCTable_size;
@ -203,6 +203,10 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support sizeof on NULL */
DEBUGLOG(5, "sizeof(*cctx) : %u", (U32)sizeof(*cctx));
DEBUGLOG(5, "workSpaceSize : %u", (U32)cctx->workSpaceSize);
DEBUGLOG(5, "streaming buffers : %u", (U32)(cctx->outBuffSize + cctx->inBuffSize));
DEBUGLOG(5, "inner MTCTX : %u", (U32)ZSTDMT_sizeof_CCtx(cctx->mtctx));
return sizeof(*cctx) + cctx->workSpaceSize
+ ZSTD_sizeof_CDict(cctx->cdictLocal)
+ cctx->outBuffSize + cctx->inBuffSize
@ -225,7 +229,9 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned
switch(param)
{
case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
ZSTD_STATIC_ASSERT(ZSTD_dm_auto==0);
ZSTD_STATIC_ASSERT(ZSTD_dm_rawContent==1);
case ZSTD_p_forceRawDict : cctx->dictMode = (ZSTD_dictMode_e)(value>0); return 0;
default: return ERROR(parameter_unknown);
}
}
@ -340,6 +346,14 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
case ZSTD_p_refDictContent : /* to be done later */
return ERROR(compressionParameter_unsupported);
case ZSTD_p_dictMode :
/* restrict dictionary mode, to "rawContent" or "fullDict" only */
ZSTD_STATIC_ASSERT((U32)ZSTD_dm_fullDict > (U32)ZSTD_dm_rawContent);
if (value > (unsigned)ZSTD_dm_fullDict)
return ERROR(compressionParameter_outOfBound);
cctx->dictMode = (ZSTD_dictMode_e)value;
return 0;
case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize,
* even when referencing into Dictionary content
* default : 0 when using a CDict, 1 when using a Prefix */
@ -375,10 +389,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
assert(cctx->mtctx != NULL);
return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_overlapSectionLog, value);
case ZSTD_p_rawContentDict : /* load dictionary in "content-only" mode (no header analysis) (default:0) */
cctx->forceRawDict = value>0;
return 0;
default: return ERROR(parameter_unknown);
}
}
@ -407,7 +417,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s
ZSTD_getCParams(cctx->compressionLevel, 0, dictSize);
cctx->cdictLocal = ZSTD_createCDict_advanced(
dict, dictSize,
0 /* byReference */,
0 /* byReference */, cctx->dictMode,
cParams, cctx->customMem);
cctx->cdict = cctx->cdictLocal;
if (cctx->cdictLocal == NULL)
@ -543,6 +553,8 @@ size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
size_t const neededSpace = entropySpace + tableSpace + tokenSpace + optSpace;
DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
return sizeof(ZSTD_CCtx) + neededSpace;
}
@ -625,8 +637,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
size_t const h3Size = ((size_t)1) << hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
size_t const buffOutSize = ZSTD_compressBound(blockSize)+1;
size_t const buffInSize = ((size_t)1 << params.cParams.windowLog) + blockSize;
size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0;
void* ptr;
/* Check if workSpace is large enough, alloc a new one if needed */
@ -638,8 +650,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
|| (params.cParams.strategy == ZSTD_btultra)) ?
optPotentialSpace : 0;
size_t const bufferSpace = (zbuff==ZSTDb_buffered) ?
buffInSize + buffOutSize : 0;
size_t const bufferSpace = buffInSize + buffOutSize;
size_t const neededSpace = entropySpace + optSpace + tableSpace
+ tokenSpace + bufferSpace;
@ -3142,22 +3153,33 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
/** ZSTD_compress_insertDictionary() :
* @return : 0, or an error code */
static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
ZSTD_dictMode_e dictMode)
{
if ((dict==NULL) || (dictSize<=8)) return 0;
/* dict as pure content */
if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict))
/* dict restricted modes */
if (dictMode==ZSTD_dm_rawContent)
return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
/* dict as zstd dictionary */
if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) {
if (dictMode == ZSTD_dm_auto)
return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
if (dictMode == ZSTD_dm_fullDict)
return ERROR(dictionary_wrong);
assert(0); /* impossible */
}
/* dict as full zstd dictionary */
return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
}
/*! ZSTD_compressBegin_internal() :
* @return : 0, or an error code */
size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
ZSTD_dictMode_e dictMode,
const ZSTD_CDict* cdict,
ZSTD_parameters params, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
@ -3172,7 +3194,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
CHECK_F(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_continue, zbuff));
return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode);
}
@ -3184,7 +3206,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
{
/* compression parameters verification and optimization */
CHECK_F(ZSTD_checkCParams(params.cParams));
return ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL,
return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
params, pledgedSrcSize, ZSTDb_not_buffered);
}
@ -3192,7 +3214,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
return ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL,
return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
params, 0, ZSTDb_not_buffered);
}
@ -3273,8 +3295,8 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
const void* dict,size_t dictSize,
ZSTD_parameters params)
{
CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL,
params, srcSize, ZSTDb_not_buffered));
CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
params, srcSize, ZSTDb_not_buffered) );
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
}
@ -3319,6 +3341,8 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcS
* Estimate amount of memory that will be needed to create a dictionary with following arguments */
size_t ZSTD_estimateCDictSize(ZSTD_compressionParameters cParams, size_t dictSize, unsigned byReference)
{
DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict));
DEBUGLOG(5, "CCtx estimate : %u", (U32)ZSTD_estimateCCtxSize(cParams));
return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize(cParams)
+ (byReference ? 0 : dictSize);
}
@ -3326,6 +3350,8 @@ size_t ZSTD_estimateCDictSize(ZSTD_compressionParameters cParams, size_t dictSiz
size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support sizeof on NULL */
DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict));
DEBUGLOG(5, "ZSTD_sizeof_CCtx : %u", (U32)ZSTD_sizeof_CCtx(cdict->refContext));
return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
}
@ -3339,7 +3365,8 @@ static ZSTD_parameters ZSTD_makeParams(ZSTD_compressionParameters cParams, ZSTD_
static size_t ZSTD_initCDict_internal(
ZSTD_CDict* cdict,
const void* dictBuffer, size_t dictSize, unsigned byReference,
const void* dictBuffer, size_t dictSize,
unsigned byReference, ZSTD_dictMode_e dictMode,
ZSTD_compressionParameters cParams)
{
if ((byReference) || (!dictBuffer) || (!dictSize)) {
@ -3357,15 +3384,18 @@ static size_t ZSTD_initCDict_internal(
{ ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */,
0 /* checksumFlag */, 0 /* noDictIDFlag */ }; /* dummy */
ZSTD_parameters const params = ZSTD_makeParams(cParams, fParams);
CHECK_F( ZSTD_compressBegin_advanced(cdict->refContext,
cdict->dictContent, dictSize,
params, 0 /* srcSize */) );
CHECK_F( ZSTD_compressBegin_internal(cdict->refContext,
cdict->dictContent, dictSize, dictMode,
NULL,
params, ZSTD_CONTENTSIZE_UNKNOWN,
ZSTDb_not_buffered) );
}
return 0;
}
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
unsigned byReference, ZSTD_dictMode_e dictMode,
ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
{
DEBUGLOG(5, "ZSTD_createCDict_advanced");
@ -3382,7 +3412,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
cdict->refContext = cctx;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dictBuffer, dictSize, byReference,
dictBuffer, dictSize,
byReference, dictMode,
cParams) )) {
ZSTD_freeCDict(cdict);
return NULL;
@ -3394,16 +3425,18 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize, 0, cParams, allocator);
return ZSTD_createCDict_advanced(dict, dictSize,
0 /* byReference */, ZSTD_dm_auto,
cParams, ZSTD_defaultCMem);
}
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize, 1, cParams, allocator);
return ZSTD_createCDict_advanced(dict, dictSize,
1 /* byReference */, ZSTD_dm_auto,
cParams, ZSTD_defaultCMem);
}
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@ -3431,7 +3464,8 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
* Since workspace was allocated externally, it must be freed externally.
*/
ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
const void* dict, size_t dictSize, unsigned byReference,
const void* dict, size_t dictSize,
unsigned byReference, ZSTD_dictMode_e dictMode,
ZSTD_compressionParameters cParams)
{
size_t const cctxSize = ZSTD_estimateCCtxSize(cParams);
@ -3455,8 +3489,9 @@ ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize);
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize, 1 /* by Reference */,
cParams) ))
dict, dictSize,
1 /* byReference */, dictMode,
cParams) ))
return NULL;
return cdict;
@ -3476,8 +3511,11 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
{ ZSTD_parameters params = cdict->refContext->appliedParams;
params.fParams = fParams;
DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced");
return ZSTD_compressBegin_internal(cctx, NULL, 0, cdict,
params, pledgedSrcSize, ZSTDb_not_buffered);
return ZSTD_compressBegin_internal(cctx,
NULL, 0, ZSTD_dm_auto,
cdict,
params, pledgedSrcSize,
ZSTDb_not_buffered);
}
}
@ -3552,8 +3590,11 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
{
DEBUGLOG(5, "ZSTD_resetCStream_internal");
CHECK_F(ZSTD_compressBegin_internal(zcs, NULL, 0, zcs->cdict,
params, pledgedSrcSize, ZSTDb_buffered));
CHECK_F( ZSTD_compressBegin_internal(zcs,
NULL, 0, ZSTD_dm_auto,
zcs->cdict,
params, pledgedSrcSize,
ZSTDb_buffered) );
zcs->inToCompress = 0;
zcs->inBuffPos = 0;
@ -3588,7 +3629,9 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
return ERROR(memory_allocation);
}
ZSTD_freeCDict(zcs->cdictLocal);
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0 /* copy */, params.cParams, zcs->customMem);
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
0 /* byReference */, ZSTD_dm_auto,
params.cParams, zcs->customMem);
zcs->cdict = zcs->cdictLocal;
if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
} else {

View File

@ -584,8 +584,9 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
if (dict) {
ZSTD_freeCDict(zcs->cdictLocal);
zcs->cdict = NULL;
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0 /* byRef */,
params.cParams, zcs->cMem);
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
0 /* byRef */, ZSTD_dm_auto,
params.cParams, zcs->cMem);
if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
zcs->cdict = zcs->cdictLocal;
} else {

View File

@ -552,9 +552,12 @@ ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter par
* It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
typedef enum { ZSTD_dm_auto=0, ZSTD_dm_rawContent, ZSTD_dm_fullDict } ZSTD_dictMode_e;
/*! ZSTD_createCDict_advanced() :
* Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference,
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
unsigned byReference, ZSTD_dictMode_e dictMode,
ZSTD_compressionParameters cParams, ZSTD_customMem customMem);
/*! ZSTD_initStaticCDict_advanced() :
@ -572,7 +575,8 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
*/
ZSTDLIB_API ZSTD_CDict* ZSTD_initStaticCDict(
void* workspace, size_t workspaceSize,
const void* dict, size_t dictSize, unsigned byReference,
const void* dict, size_t dictSize,
unsigned byReference, ZSTD_dictMode_e dictMode,
ZSTD_compressionParameters cParams);
/*! ZSTD_getCParams() :
@ -867,13 +871,16 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
/* note on naming convention :
* Initially, the API favored names like ZSTD_setCCtxParameter() .
* In this proposal, convention is changed towards ZSTD_CCtx_setParameter() .
* The main idea is that it identifies more clearly the target object type.
* It feels clearer when considering other potential variants :
* The main driver is that it identifies more clearly the target object type.
* It feels clearer in light of potential variants :
* ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter())
* ZSTD_DCtx_setParameter() (rather than ZSTD_setDCtxParameter() )
* The left variant feels clearer.
* Left variant feels easier to distinguish.
*/
/* note on enum design :
* All enum will be manually set to explicit values before reaching "stable API" status */
typedef enum {
/* compression parameters */
ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
@ -928,10 +935,10 @@ typedef enum {
/* dictionary parameters */
ZSTD_p_refDictContent=300, /* Content of dictionary content will be referenced, instead of copied (default:0).
* This avoids duplicating dictionary content.
* But it also requires that dictionary buffer outlives its user (CDict) */
/* Not ready yet ! */
ZSTD_p_rawContentDict, /* load dictionary in "content-only" mode (no header analysis) (default:0) */
/* question : should there be an option to load dictionary only in zstd format, rejecting others with an error code ? */
* But it also requires that dictionary buffer outlives its users */
/* Not ready yet ! <=================================== */
ZSTD_p_dictMode, /* Select how dictionary must be interpreted. Value must be from type ZSTD_dictMode_e.
* default : 0==auto : dictionary will be "full" if it respects specification, otherwise it will be "rawContent" */
/* multi-threading parameters */
ZSTD_p_nbThreads=400, /* Select how many threads a compression job can spawn (default:1)
@ -947,7 +954,7 @@ typedef enum {
/* advanced parameters - may not remain available after API update */
ZSTD_p_forceMaxWindow=1100, /* Force back-references to remain < windowSize,
* even when referencing into Dictionary content
* even when referencing into Dictionary content.
* default : 0 when using a CDict, 1 when using a Prefix */
} ZSTD_cParameter;
@ -1007,7 +1014,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
* Note 1 : Prefix buffer is referenced. It must outlive compression job.
* Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
* It's a CPU-heavy operation, with non-negligible impact on latency. */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /* Not ready yet ! */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /* Not ready yet ! <===================================== */

View File

@ -284,7 +284,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
if (comprParams->searchLength) zparams.cParams.searchLength = comprParams->searchLength;
if (comprParams->targetLength) zparams.cParams.targetLength = comprParams->targetLength;
if (comprParams->strategy) zparams.cParams.strategy = comprParams->strategy;
cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1, zparams.cParams, cmem);
cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1 /*byRef*/, ZSTD_dm_auto, zparams.cParams, cmem);
if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure");
#endif
do {

View File

@ -502,15 +502,16 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(4, "test%3i : estimate CDict size : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
size_t const estimatedSize = ZSTD_estimateCDictSize(cParams, dictSize, 1);
size_t const estimatedSize = ZSTD_estimateCDictSize(cParams, dictSize, 1 /*byReference*/);
DISPLAYLEVEL(4, "OK : %u \n", (U32)estimatedSize);
}
DISPLAYLEVEL(4, "test%3i : compress with preprocessed dictionary : ", testNb++);
DISPLAYLEVEL(4, "test%3i : compress with CDict ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_customMem const customMem = { NULL, NULL, NULL };
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
1 /* by Referece */, cParams, customMem);
1 /* byReference */, ZSTD_dm_auto,
cParams, ZSTD_defaultCMem);
DISPLAYLEVEL(4, "(size : %u) : ", (U32)ZSTD_sizeof_CDict(cdict));
cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
CNBuffer, CNBuffSize, cdict);
ZSTD_freeCDict(cdict);
@ -538,7 +539,8 @@ static int basicUnitTests(U32 seed, double compressibility)
void* const cdictBuffer = malloc(cdictSize);
if (cdictBuffer==NULL) goto _output_error;
{ ZSTD_CDict* const cdict = ZSTD_initStaticCDict(cdictBuffer, cdictSize,
dictBuffer, dictSize, 0 /* by Reference */,
dictBuffer, dictSize,
0 /* by Reference */, ZSTD_dm_auto,
cParams);
if (cdict == NULL) {
DISPLAY("ZSTD_initStaticCDict failed ");
@ -558,8 +560,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(4, "test%3i : ZSTD_compress_usingCDict_advanced, no contentSize, no dictID : ", testNb++);
{ ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_customMem const customMem = { NULL, NULL, NULL };
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, 1, cParams, customMem);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, 1 /*byRef*/, ZSTD_dm_auto, cParams, ZSTD_defaultCMem);
cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
CNBuffer, CNBuffSize, cdict, fParams);
ZSTD_freeCDict(cdict);
@ -608,6 +609,22 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a good dictionary : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, 1 /*byRef*/, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem);
if (cdict==NULL) goto _output_error;
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a rawContent (must fail) : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced((const char*)dictBuffer+1, dictSize-1, 1 /*byRef*/, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem);
if (cdict!=NULL) goto _output_error;
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(4, "OK \n");
ZSTD_freeCCtx(cctx);
free(dictBuffer);
free(samplesSizes);
@ -686,9 +703,7 @@ static int basicUnitTests(U32 seed, double compressibility)
size_t const wrongSrcSize = (srcSize + 1000);
ZSTD_parameters params = ZSTD_getParams(1, wrongSrcSize, 0);
params.fParams.contentSizeFlag = 1;
{ size_t const result = ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize);
if (ZSTD_isError(result)) goto _output_error;
}
CHECK( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) );
{ size_t const result = ZSTD_compressEnd(cctx, decodedBuffer, CNBuffSize, CNBuffer, srcSize);
if (!ZSTD_isError(result)) goto _output_error;
if (ZSTD_getErrorCode(result) != ZSTD_error_srcSize_wrong) goto _output_error;
@ -870,8 +885,23 @@ static size_t FUZ_randomLength(U32* seed, U32 maxLog)
}
#undef CHECK
#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; }
#define CHECK(cond, ...) { \
if (cond) { \
DISPLAY("Error => "); \
DISPLAY(__VA_ARGS__); \
DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \
goto _output_error; \
} }
#define CHECK_Z(f) { \
size_t const err = f; \
if (ZSTD_isError(err)) { \
DISPLAY("Error => %s : %s ", \
#f, ZSTD_getErrorName(err)); \
DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \
goto _output_error; \
} }
static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxDurationS, double compressibility, int bigTests)
{
@ -984,8 +1014,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
/* frame header decompression test */
{ ZSTD_frameHeader dParams;
size_t const check = ZSTD_getFrameHeader(&dParams, cBuffer, cSize);
CHECK(ZSTD_isError(check), "Frame Parameters extraction failed");
CHECK_Z( ZSTD_getFrameHeader(&dParams, cBuffer, cSize) );
CHECK(dParams.frameContentSize != sampleSize, "Frame content size incorrect");
}
@ -1072,20 +1101,17 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
if (FUZ_rand(&lseed) & 0xF) {
size_t const errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel);
CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode));
CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) );
} else {
ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize);
ZSTD_frameParameters const fPar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */,
!(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/,
0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */
ZSTD_parameters const p = FUZ_makeParams(cPar, fPar);
size_t const errorCode = ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0);
CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_advanced error : %s", ZSTD_getErrorName(errorCode));
CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) );
}
{ size_t const errorCode = ZSTD_copyCCtx(ctx, refCtx, 0);
CHECK (ZSTD_isError(errorCode), "ZSTD_copyCCtx error : %s", ZSTD_getErrorName(errorCode));
} }
CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) );
}
ZSTD_setCCtxParameter(ctx, ZSTD_p_forceWindow, FUZ_rand(&lseed) & 1);
{ U32 const nbChunks = (FUZ_rand(&lseed) & 127) + 2;
@ -1117,8 +1143,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
/* streaming decompression test */
if (dictSize<8) dictSize=0, dict=NULL; /* disable dictionary */
{ size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
CHECK (ZSTD_isError(errorCode), "ZSTD_decompressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode)); }
CHECK_Z( ZSTD_decompressBegin_usingDict(dctx, dict, dictSize) );
totalCSize = 0;
totalGenSize = 0;
while (totalCSize < cSize) {

View File

@ -486,7 +486,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_usingCDict_advanced with masked dictID : ", testNb++);
{ ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBufferSize, dictionary.filled);
ZSTD_frameParameters const fParams = { 1 /* contentSize */, 1 /* checksum */, 1 /* noDictID */};
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, 1 /* byReference */, cParams, customMem);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, 1 /* byReference */, ZSTD_dm_auto, cParams, customMem);
size_t const initError = ZSTD_initCStream_usingCDict_advanced(zc, cdict, fParams, CNBufferSize);
if (ZSTD_isError(initError)) goto _output_error;
cSize = 0;

View File

@ -90,7 +90,7 @@ static clock_t g_time = 0;
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
@ -236,7 +236,7 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize,
if (compressor == BMK_ZSTD) {
ZSTD_parameters const zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize);
ZSTD_customMem const cmem = { NULL, NULL, NULL };
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1, zparams.cParams, cmem);
ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1 /*byRef*/, ZSTD_dm_auto, zparams.cParams, cmem);
if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure");
do {