From 7bd1a2900e8e2713cdc9801a9ff81563f39aec9c Mon Sep 17 00:00:00 2001
From: Yann Collet
Frame size functions
@@ -461,14 +461,18 @@ size_t ZSTD_estimateDDictSize(size_t dictSize, unsigned byReference);
It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference, +typedef enum { ZSTD_dm_auto=0, ZSTD_dm_rawContent, ZSTD_dm_fullDict } ZSTD_dictMode_e; +
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + unsigned byReference, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams, ZSTD_customMem customMem);Create a ZSTD_CDict using external alloc and free, and customized compression parameters
ZSTD_CDict* ZSTD_initStaticCDict( void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, unsigned byReference, + const void* dict, size_t dictSize, + unsigned byReference, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams);/* Maximum allowed back-reference distance, expressed as power of 2. * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. - * default value : set through compressionLevel. * Special: value 0 means "do not change windowLog". */ ZSTD_p_hashLog, /* Size of the probe table, as a power of 2. * Resulting table size is (1 << (hashLog+2)). @@ -791,10 +794,10 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); /* dictionary parameters */ ZSTD_p_refDictContent=300, /* Content of dictionary content will be referenced, instead of copied (default:0). * This avoids duplicating dictionary content. - * But it also requires that dictionary buffer outlives its user (CDict) */ - /* Not ready yet ! */ - ZSTD_p_rawContentDict, /* load dictionary in "content-only" mode (no header analysis) (default:0) */ - /* question : should there be an option to load dictionary only in zstd format, rejecting others with an error code ? */ + * But it also requires that dictionary buffer outlives its users */ + /* Not ready yet ! <=================================== */ + ZSTD_p_dictMode, /* Select how dictionary must be interpreted. Value must be from type ZSTD_dictMode_e. + * default : 0==auto : dictionary will be "full" if it respects specification, otherwise it will be "rawContent" */ /* multi-threading parameters */ ZSTD_p_nbThreads=400, /* Select how many threads a compression job can spawn (default:1) @@ -810,7 +813,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); /* advanced parameters - may not remain available after API update */ ZSTD_p_forceMaxWindow=1100, /* Force back-references to remain < windowSize, - * even when referencing into Dictionary content + * even when referencing into Dictionary content. * default : 0 when using a CDict, 1 when using a Prefix */ } ZSTD_cParameter;Generate a digested dictionary in provided memory area. workspace: The memory area to emplace the dictionary into. @@ -743,7 +747,6 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); * Special: value 0 means "do not change cLevel". */ ZSTD_p_windowLog,
Create an internal CDict from dict buffer. Decompression will have to use same buffer. @result : 0, or an error code (which can be tested with ZSTD_isError()). - Special : Adding a NULL (or 0-size) dictionary invalidates any previous prefix, + Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, meaning "return to no-dictionary mode". Note 1 : Dictionary content will be copied internally, except if ZSTD_p_refDictContent is set. Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters. - For this reason, compression parameters cannot be changed anymore after loading a prefix. + For this reason, compression parameters cannot be changed anymore after loading a dictionary. It's also a CPU-heavy operation, with non-negligible impact on latency. Note 3 : Dictionary will be used for all future compression jobs. To return to "no-dictionary" situation, load a NULL dictionary @@ -859,7 +862,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /* Not ready yet ! */ +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /* Not ready yet ! <===================================== */Reference a prefix (content-only dictionary) to bootstrap next compression job. Decompression will have to use same prefix. Prefix is only used once. Tables are discarded at end of compression job. @@ -882,15 +885,15 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); ZSTD_inBuffer* input, ZSTD_EndDirective endOp);
Behave about the same as ZSTD_compressStream. To note : - - Compression parameters are pushed into CCtx before starting compression, using ZSTD_setCCtxParameter() + - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_setParameter() - Compression parameters cannot be changed once compression is started. - *dstPos must be <= dstCapacity, *srcPos must be <= srcSize - *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit. - @return provides the minimum amount of data still to flush from internal buffers or an error code, which can be tested using ZSTD_isError(). - if @return != 0, flush is not fully completed, and must be called again to empty internal buffers. + if @return != 0, flush is not fully completed, there is some data left within internal buffers. - after a ZSTD_e_end directive, if internal buffer is not fully flushed, - only ZSTD_e_end and ZSTD_e_flush operations are allowed. + only ZSTD_e_end or ZSTD_e_flush operations are allowed. It is necessary to fully flush internal buffers before starting a new compression job, or changing compression parameters. @@ -918,20 +921,6 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
ZSTD_CDict* ZSTD_CDict_createEmpty(void); /* Not ready yet ! */ -size_t ZSTD_CDict_setParameter(ZSTD_CDict* cdict, ZSTD_cParameter param, unsigned value); /* Not ready yet ! */ -size_t ZSTD_CDict_loadDictionary(ZSTD_CDict* cdict, const void* dict, size_t dictSize); /* Not ready yet ! */ -Create a CDict object which is still mutable after creation. - It's the only one case allowing usage of ZSTD_CDict_setParameter(). - Once all compression parameters are selected, - it's possible to load the target dictionary, using ZSTD_CDict_loadDictionary(). - Dictionary content will be copied internally (except if ZSTD_p_refDictContent is set). - After loading the dictionary, no more change is possible. - The only remaining operation is to free CDict object. - Note : An unfinished CDict behaves the same as a NULL CDict if referenced into a CCtx. - -
Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 611c74f81..4e216d728 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -305,6 +305,7 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; +#if 0 /*! ZSTD_compressBegin_internal() : * innermost initialization function. Private use only. * expects params to be valid. @@ -315,7 +316,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, ZSTD_parameters params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff); - +#endif /*! ZSTD_initCStream_internal() : * Private use only. Init streaming operation. diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4f1a0bcc4..59ac3f35d 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -88,7 +88,7 @@ struct ZSTD_CCtx_s { U32 hashLog3; /* dispatch table : larger == faster, more memory */ U32 loadedDictEnd; /* index of end of dictionary */ U32 forceWindow; /* force back-references to respect limit of 1<staticSize = workspaceSize; cctx->workSpace = (void*)(cctx+1); cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); /* entropy space (never moves) */ - /* note : this code should be shared with resetCCtx, instead of copied */ + /* note : this code should be shared with resetCCtx, rather than copy/pasted */ { void* ptr = cctx->workSpace; cctx->hufCTable = (HUF_CElt*)ptr; ptr = (char*)cctx->hufCTable + hufCTable_size; @@ -203,6 +203,10 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) { if (cctx==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cctx) : %u", (U32)sizeof(*cctx)); + DEBUGLOG(5, "workSpaceSize : %u", (U32)cctx->workSpaceSize); + DEBUGLOG(5, "streaming buffers : %u", (U32)(cctx->outBuffSize + cctx->inBuffSize)); + DEBUGLOG(5, "inner MTCTX : %u", (U32)ZSTDMT_sizeof_CCtx(cctx->mtctx)); return sizeof(*cctx) + cctx->workSpaceSize + ZSTD_sizeof_CDict(cctx->cdictLocal) + cctx->outBuffSize + cctx->inBuffSize @@ -225,7 +229,9 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned switch(param) { case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0; - case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0; + ZSTD_STATIC_ASSERT(ZSTD_dm_auto==0); + ZSTD_STATIC_ASSERT(ZSTD_dm_rawContent==1); + case ZSTD_p_forceRawDict : cctx->dictMode = (ZSTD_dictMode_e)(value>0); return 0; default: return ERROR(parameter_unknown); } } @@ -340,6 +346,14 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v case ZSTD_p_refDictContent : /* to be done later */ return ERROR(compressionParameter_unsupported); + case ZSTD_p_dictMode : + /* restrict dictionary mode, to "rawContent" or "fullDict" only */ + ZSTD_STATIC_ASSERT((U32)ZSTD_dm_fullDict > (U32)ZSTD_dm_rawContent); + if (value > (unsigned)ZSTD_dm_fullDict) + return ERROR(compressionParameter_outOfBound); + cctx->dictMode = (ZSTD_dictMode_e)value; + return 0; + case ZSTD_p_forceMaxWindow : /* Force back-references to remain < windowSize, * even when referencing into Dictionary content * default : 0 when using a CDict, 1 when using a Prefix */ @@ -375,10 +389,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v assert(cctx->mtctx != NULL); return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_overlapSectionLog, value); - case ZSTD_p_rawContentDict : /* load dictionary in "content-only" mode (no header analysis) (default:0) */ - cctx->forceRawDict = value>0; - return 0; - default: return ERROR(parameter_unknown); } } @@ -407,7 +417,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s ZSTD_getCParams(cctx->compressionLevel, 0, dictSize); cctx->cdictLocal = ZSTD_createCDict_advanced( dict, dictSize, - 0 /* byReference */, + 0 /* byReference */, cctx->dictMode, cParams, cctx->customMem); cctx->cdict = cctx->cdictLocal; if (cctx->cdictLocal == NULL) @@ -543,6 +553,8 @@ size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams) size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0; size_t const neededSpace = entropySpace + tableSpace + tokenSpace + optSpace; + DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); + DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); return sizeof(ZSTD_CCtx) + neededSpace; } @@ -625,8 +637,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog); size_t const h3Size = ((size_t)1) << hashLog3; size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - size_t const buffOutSize = ZSTD_compressBound(blockSize)+1; - size_t const buffInSize = ((size_t)1 << params.cParams.windowLog) + blockSize; + size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; + size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0; void* ptr; /* Check if workSpace is large enough, alloc a new one if needed */ @@ -638,8 +650,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) ? optPotentialSpace : 0; - size_t const bufferSpace = (zbuff==ZSTDb_buffered) ? - buffInSize + buffOutSize : 0; + size_t const bufferSpace = buffInSize + buffOutSize; size_t const neededSpace = entropySpace + optSpace + tableSpace + tokenSpace + bufferSpace; @@ -3142,22 +3153,33 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t /** ZSTD_compress_insertDictionary() : * @return : 0, or an error code */ -static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictMode_e dictMode) { if ((dict==NULL) || (dictSize<=8)) return 0; - /* dict as pure content */ - if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict)) + /* dict restricted modes */ + if (dictMode==ZSTD_dm_rawContent) return ZSTD_loadDictionaryContent(cctx, dict, dictSize); - /* dict as zstd dictionary */ + if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) { + if (dictMode == ZSTD_dm_auto) + return ZSTD_loadDictionaryContent(cctx, dict, dictSize); + if (dictMode == ZSTD_dm_fullDict) + return ERROR(dictionary_wrong); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ return ZSTD_loadZstdDictionary(cctx, dict, dictSize); } /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ -size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictMode_e dictMode, const ZSTD_CDict* cdict, ZSTD_parameters params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) @@ -3172,7 +3194,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, CHECK_F(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_continue, zbuff)); - return ZSTD_compress_insertDictionary(cctx, dict, dictSize); + return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode); } @@ -3184,7 +3206,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, { /* compression parameters verification and optimization */ CHECK_F(ZSTD_checkCParams(params.cParams)); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL, + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL, params, pledgedSrcSize, ZSTDb_not_buffered); } @@ -3192,7 +3214,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL, + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL, params, 0, ZSTDb_not_buffered); } @@ -3273,8 +3295,8 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, const void* dict,size_t dictSize, ZSTD_parameters params) { - CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, NULL, - params, srcSize, ZSTDb_not_buffered)); + CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL, + params, srcSize, ZSTDb_not_buffered) ); return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } @@ -3319,6 +3341,8 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcS * Estimate amount of memory that will be needed to create a dictionary with following arguments */ size_t ZSTD_estimateCDictSize(ZSTD_compressionParameters cParams, size_t dictSize, unsigned byReference) { + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict)); + DEBUGLOG(5, "CCtx estimate : %u", (U32)ZSTD_estimateCCtxSize(cParams)); return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize(cParams) + (byReference ? 0 : dictSize); } @@ -3326,6 +3350,8 @@ size_t ZSTD_estimateCDictSize(ZSTD_compressionParameters cParams, size_t dictSiz size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) { if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict)); + DEBUGLOG(5, "ZSTD_sizeof_CCtx : %u", (U32)ZSTD_sizeof_CCtx(cdict->refContext)); return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); } @@ -3339,7 +3365,8 @@ static ZSTD_parameters ZSTD_makeParams(ZSTD_compressionParameters cParams, ZSTD_ static size_t ZSTD_initCDict_internal( ZSTD_CDict* cdict, - const void* dictBuffer, size_t dictSize, unsigned byReference, + const void* dictBuffer, size_t dictSize, + unsigned byReference, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams) { if ((byReference) || (!dictBuffer) || (!dictSize)) { @@ -3357,15 +3384,18 @@ static size_t ZSTD_initCDict_internal( { ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksumFlag */, 0 /* noDictIDFlag */ }; /* dummy */ ZSTD_parameters const params = ZSTD_makeParams(cParams, fParams); - CHECK_F( ZSTD_compressBegin_advanced(cdict->refContext, - cdict->dictContent, dictSize, - params, 0 /* srcSize */) ); + CHECK_F( ZSTD_compressBegin_internal(cdict->refContext, + cdict->dictContent, dictSize, dictMode, + NULL, + params, ZSTD_CONTENTSIZE_UNKNOWN, + ZSTDb_not_buffered) ); } return 0; } -ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference, +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + unsigned byReference, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams, ZSTD_customMem customMem) { DEBUGLOG(5, "ZSTD_createCDict_advanced"); @@ -3382,7 +3412,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u cdict->refContext = cctx; if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dictBuffer, dictSize, byReference, + dictBuffer, dictSize, + byReference, dictMode, cParams) )) { ZSTD_freeCDict(cdict); return NULL; @@ -3394,16 +3425,18 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_customMem const allocator = { NULL, NULL, NULL }; ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, 0, cParams, allocator); + return ZSTD_createCDict_advanced(dict, dictSize, + 0 /* byReference */, ZSTD_dm_auto, + cParams, ZSTD_defaultCMem); } ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_customMem const allocator = { NULL, NULL, NULL }; ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, 1, cParams, allocator); + return ZSTD_createCDict_advanced(dict, dictSize, + 1 /* byReference */, ZSTD_dm_auto, + cParams, ZSTD_defaultCMem); } size_t ZSTD_freeCDict(ZSTD_CDict* cdict) @@ -3431,7 +3464,8 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict) * Since workspace was allocated externally, it must be freed externally. */ ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, unsigned byReference, + const void* dict, size_t dictSize, + unsigned byReference, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams) { size_t const cctxSize = ZSTD_estimateCCtxSize(cParams); @@ -3455,8 +3489,9 @@ ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize, cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize); if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dict, dictSize, 1 /* by Reference */, - cParams) )) + dict, dictSize, + 1 /* byReference */, dictMode, + cParams) )) return NULL; return cdict; @@ -3476,8 +3511,11 @@ size_t ZSTD_compressBegin_usingCDict_advanced( { ZSTD_parameters params = cdict->refContext->appliedParams; params.fParams = fParams; DEBUGLOG(5, "ZSTD_compressBegin_usingCDict_advanced"); - return ZSTD_compressBegin_internal(cctx, NULL, 0, cdict, - params, pledgedSrcSize, ZSTDb_not_buffered); + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dm_auto, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); } } @@ -3552,8 +3590,11 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, { DEBUGLOG(5, "ZSTD_resetCStream_internal"); - CHECK_F(ZSTD_compressBegin_internal(zcs, NULL, 0, zcs->cdict, - params, pledgedSrcSize, ZSTDb_buffered)); + CHECK_F( ZSTD_compressBegin_internal(zcs, + NULL, 0, ZSTD_dm_auto, + zcs->cdict, + params, pledgedSrcSize, + ZSTDb_buffered) ); zcs->inToCompress = 0; zcs->inBuffPos = 0; @@ -3588,7 +3629,9 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, return ERROR(memory_allocation); } ZSTD_freeCDict(zcs->cdictLocal); - zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0 /* copy */, params.cParams, zcs->customMem); + zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + 0 /* byReference */, ZSTD_dm_auto, + params.cParams, zcs->customMem); zcs->cdict = zcs->cdictLocal; if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); } else { diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 895d362fb..499c531c7 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -584,8 +584,9 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, if (dict) { ZSTD_freeCDict(zcs->cdictLocal); zcs->cdict = NULL; - zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0 /* byRef */, - params.cParams, zcs->cMem); + zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + 0 /* byRef */, ZSTD_dm_auto, + params.cParams, zcs->cMem); if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); zcs->cdict = zcs->cdictLocal; } else { diff --git a/lib/zstd.h b/lib/zstd.h index bbfb92d55..30972cd0b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -552,9 +552,12 @@ ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter par * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +typedef enum { ZSTD_dm_auto=0, ZSTD_dm_rawContent, ZSTD_dm_fullDict } ZSTD_dictMode_e; /*! ZSTD_createCDict_advanced() : * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference, +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + unsigned byReference, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams, ZSTD_customMem customMem); /*! ZSTD_initStaticCDict_advanced() : @@ -572,7 +575,8 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS */ ZSTDLIB_API ZSTD_CDict* ZSTD_initStaticCDict( void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, unsigned byReference, + const void* dict, size_t dictSize, + unsigned byReference, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams); /*! ZSTD_getCParams() : @@ -867,13 +871,16 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); /* note on naming convention : * Initially, the API favored names like ZSTD_setCCtxParameter() . * In this proposal, convention is changed towards ZSTD_CCtx_setParameter() . - * The main idea is that it identifies more clearly the target object type. - * It feels clearer when considering other potential variants : + * The main driver is that it identifies more clearly the target object type. + * It feels clearer in light of potential variants : * ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter()) * ZSTD_DCtx_setParameter() (rather than ZSTD_setDCtxParameter() ) - * The left variant feels clearer. + * Left variant feels easier to distinguish. */ +/* note on enum design : + * All enum will be manually set to explicit values before reaching "stable API" status */ + typedef enum { /* compression parameters */ ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table @@ -928,10 +935,10 @@ typedef enum { /* dictionary parameters */ ZSTD_p_refDictContent=300, /* Content of dictionary content will be referenced, instead of copied (default:0). * This avoids duplicating dictionary content. - * But it also requires that dictionary buffer outlives its user (CDict) */ - /* Not ready yet ! */ - ZSTD_p_rawContentDict, /* load dictionary in "content-only" mode (no header analysis) (default:0) */ - /* question : should there be an option to load dictionary only in zstd format, rejecting others with an error code ? */ + * But it also requires that dictionary buffer outlives its users */ + /* Not ready yet ! <=================================== */ + ZSTD_p_dictMode, /* Select how dictionary must be interpreted. Value must be from type ZSTD_dictMode_e. + * default : 0==auto : dictionary will be "full" if it respects specification, otherwise it will be "rawContent" */ /* multi-threading parameters */ ZSTD_p_nbThreads=400, /* Select how many threads a compression job can spawn (default:1) @@ -947,7 +954,7 @@ typedef enum { /* advanced parameters - may not remain available after API update */ ZSTD_p_forceMaxWindow=1100, /* Force back-references to remain < windowSize, - * even when referencing into Dictionary content + * even when referencing into Dictionary content. * default : 0 when using a CDict, 1 when using a Prefix */ } ZSTD_cParameter; @@ -1007,7 +1014,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); * Note 1 : Prefix buffer is referenced. It must outlive compression job. * Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters. * It's a CPU-heavy operation, with non-negligible impact on latency. */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /* Not ready yet ! */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /* Not ready yet ! <===================================== */ diff --git a/programs/bench.c b/programs/bench.c index 6f09a56de..f9493e3b0 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -284,7 +284,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, if (comprParams->searchLength) zparams.cParams.searchLength = comprParams->searchLength; if (comprParams->targetLength) zparams.cParams.targetLength = comprParams->targetLength; if (comprParams->strategy) zparams.cParams.strategy = comprParams->strategy; - cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1, zparams.cParams, cmem); + cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1 /*byRef*/, ZSTD_dm_auto, zparams.cParams, cmem); if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure"); #endif do { diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 96336423c..a3a56d9d8 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -502,15 +502,16 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "test%3i : estimate CDict size : ", testNb++); { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); - size_t const estimatedSize = ZSTD_estimateCDictSize(cParams, dictSize, 1); + size_t const estimatedSize = ZSTD_estimateCDictSize(cParams, dictSize, 1 /*byReference*/); DISPLAYLEVEL(4, "OK : %u \n", (U32)estimatedSize); } - DISPLAYLEVEL(4, "test%3i : compress with preprocessed dictionary : ", testNb++); + DISPLAYLEVEL(4, "test%3i : compress with CDict ", testNb++); { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); - ZSTD_customMem const customMem = { NULL, NULL, NULL }; ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, - 1 /* by Referece */, cParams, customMem); + 1 /* byReference */, ZSTD_dm_auto, + cParams, ZSTD_defaultCMem); + DISPLAYLEVEL(4, "(size : %u) : ", (U32)ZSTD_sizeof_CDict(cdict)); cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize), CNBuffer, CNBuffSize, cdict); ZSTD_freeCDict(cdict); @@ -538,7 +539,8 @@ static int basicUnitTests(U32 seed, double compressibility) void* const cdictBuffer = malloc(cdictSize); if (cdictBuffer==NULL) goto _output_error; { ZSTD_CDict* const cdict = ZSTD_initStaticCDict(cdictBuffer, cdictSize, - dictBuffer, dictSize, 0 /* by Reference */, + dictBuffer, dictSize, + 0 /* by Reference */, ZSTD_dm_auto, cParams); if (cdict == NULL) { DISPLAY("ZSTD_initStaticCDict failed "); @@ -558,8 +560,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "test%3i : ZSTD_compress_usingCDict_advanced, no contentSize, no dictID : ", testNb++); { ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ }; ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); - ZSTD_customMem const customMem = { NULL, NULL, NULL }; - ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, 1, cParams, customMem); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, 1 /*byRef*/, ZSTD_dm_auto, cParams, ZSTD_defaultCMem); cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize), CNBuffer, CNBuffSize, cdict, fParams); ZSTD_freeCDict(cdict); @@ -608,6 +609,22 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(4, "OK \n"); + DISPLAYLEVEL(4, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a good dictionary : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, 1 /*byRef*/, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem); + if (cdict==NULL) goto _output_error; + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(4, "OK \n"); + + DISPLAYLEVEL(4, "test%3i : Building cdict w/ ZSTD_dm_fullDict on a rawContent (must fail) : ", testNb++); + { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced((const char*)dictBuffer+1, dictSize-1, 1 /*byRef*/, ZSTD_dm_fullDict, cParams, ZSTD_defaultCMem); + if (cdict!=NULL) goto _output_error; + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(4, "OK \n"); + ZSTD_freeCCtx(cctx); free(dictBuffer); free(samplesSizes); @@ -686,9 +703,7 @@ static int basicUnitTests(U32 seed, double compressibility) size_t const wrongSrcSize = (srcSize + 1000); ZSTD_parameters params = ZSTD_getParams(1, wrongSrcSize, 0); params.fParams.contentSizeFlag = 1; - { size_t const result = ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize); - if (ZSTD_isError(result)) goto _output_error; - } + CHECK( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) ); { size_t const result = ZSTD_compressEnd(cctx, decodedBuffer, CNBuffSize, CNBuffer, srcSize); if (!ZSTD_isError(result)) goto _output_error; if (ZSTD_getErrorCode(result) != ZSTD_error_srcSize_wrong) goto _output_error; @@ -870,8 +885,23 @@ static size_t FUZ_randomLength(U32* seed, U32 maxLog) } #undef CHECK -#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ - DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); goto _output_error; } +#define CHECK(cond, ...) { \ + if (cond) { \ + DISPLAY("Error => "); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \ + goto _output_error; \ +} } + +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + DISPLAY("Error => %s : %s ", \ + #f, ZSTD_getErrorName(err)); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); \ + goto _output_error; \ +} } + static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxDurationS, double compressibility, int bigTests) { @@ -984,8 +1014,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD /* frame header decompression test */ { ZSTD_frameHeader dParams; - size_t const check = ZSTD_getFrameHeader(&dParams, cBuffer, cSize); - CHECK(ZSTD_isError(check), "Frame Parameters extraction failed"); + CHECK_Z( ZSTD_getFrameHeader(&dParams, cBuffer, cSize) ); CHECK(dParams.frameContentSize != sampleSize, "Frame content size incorrect"); } @@ -1072,20 +1101,17 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize)); if (FUZ_rand(&lseed) & 0xF) { - size_t const errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel); - CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode)); + CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) ); } else { ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize); ZSTD_frameParameters const fPar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */, !(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/, 0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */ ZSTD_parameters const p = FUZ_makeParams(cPar, fPar); - size_t const errorCode = ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0); - CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_advanced error : %s", ZSTD_getErrorName(errorCode)); + CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) ); } - { size_t const errorCode = ZSTD_copyCCtx(ctx, refCtx, 0); - CHECK (ZSTD_isError(errorCode), "ZSTD_copyCCtx error : %s", ZSTD_getErrorName(errorCode)); - } } + CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) ); + } ZSTD_setCCtxParameter(ctx, ZSTD_p_forceWindow, FUZ_rand(&lseed) & 1); { U32 const nbChunks = (FUZ_rand(&lseed) & 127) + 2; @@ -1117,8 +1143,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD /* streaming decompression test */ if (dictSize<8) dictSize=0, dict=NULL; /* disable dictionary */ - { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); - CHECK (ZSTD_isError(errorCode), "ZSTD_decompressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode)); } + CHECK_Z( ZSTD_decompressBegin_usingDict(dctx, dict, dictSize) ); totalCSize = 0; totalGenSize = 0; while (totalCSize < cSize) { diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 95b3add0b..7229f7031 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -486,7 +486,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo DISPLAYLEVEL(3, "test%3i : ZSTD_initCStream_usingCDict_advanced with masked dictID : ", testNb++); { ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBufferSize, dictionary.filled); ZSTD_frameParameters const fParams = { 1 /* contentSize */, 1 /* checksum */, 1 /* noDictID */}; - ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, 1 /* byReference */, cParams, customMem); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictionary.start, dictionary.filled, 1 /* byReference */, ZSTD_dm_auto, cParams, customMem); size_t const initError = ZSTD_initCStream_usingCDict_advanced(zc, cdict, fParams, CNBufferSize); if (ZSTD_isError(initError)) goto _output_error; cSize = 0; diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c index a57ed51ec..1fc2117f6 100644 --- a/zlibWrapper/examples/zwrapbench.c +++ b/zlibWrapper/examples/zwrapbench.c @@ -90,7 +90,7 @@ static clock_t g_time = 0; #ifndef DEBUG # define DEBUG 0 #endif -#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } #define EXM_THROW(error, ...) \ { \ DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ @@ -236,7 +236,7 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize, if (compressor == BMK_ZSTD) { ZSTD_parameters const zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize); ZSTD_customMem const cmem = { NULL, NULL, NULL }; - ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1, zparams.cParams, cmem); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, 1 /*byRef*/, ZSTD_dm_auto, zparams.cParams, cmem); if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure"); do {