diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 9b743c34b..7c55363f0 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -753,7 +753,9 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
/* compression parameters */
ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
* Default level is ZSTD_CLEVEL_DEFAULT==3.
- * Special: value 0 means "do not change cLevel". */
+ * Special: value 0 means "do not change cLevel".
+ * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
+ * Note 2 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
ZSTD_p_windowLog, /* Maximum allowed back-reference distance, expressed as power of 2.
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
* Special: value 0 means "do not change windowLog".
@@ -780,9 +782,13 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
* Note that currently, for all strategies < btopt, effective minimum is 4.
* Note that currently, for all strategies > fast, effective maximum is 6.
* Special: value 0 means "do not change minMatchLength". */
- ZSTD_p_targetLength, /* Only useful for strategies >= btopt.
- * Length of Match considered "good enough" to stop search.
- * Larger values make compression stronger and slower.
+ ZSTD_p_targetLength, /* Impact of this field depends on strategy.
+ * For strategies btopt & btultra:
+ * Length of Match considered "good enough" to stop search.
+ * Larger values make compression stronger, and slower.
+ * For strategy fast:
+ * Distance between match sampling.
+ * Larger values make compression faster, and weaker.
* Special: value 0 means "do not change targetLength". */
ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition.
* Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
@@ -807,17 +813,25 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
* (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call).
* More workers improve speed, but also increase memory usage.
* Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
- ZSTD_p_jobSize, /* Size of a compression job. This value is only enforced in streaming (non-blocking) mode.
- * Each compression job is completed in parallel, so indirectly controls the nb of active threads.
+ ZSTD_p_jobSize, /* Size of a compression job. This value is enforced only in non-blocking mode.
+ * Each compression job is completed in parallel, so this value indirectly controls the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
- * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+ * Job size must be a minimum of overlapSize, or 1 MB, whichever is largest.
* The minimum size is automatically and transparently enforced */
ZSTD_p_overlapSizeLog, /* Size of previous input reloaded at the beginning of each job.
* 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
/* advanced parameters - may not remain available after API update */
+
+ ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default.
+ * disabling it improves speed and decreases compression ratio by a large amount.
+ * note : this setting is updated when changing compression level.
+ * positive compression levels set literalCompression to 1.
+ * negative compression levels set literalCompression to 0. */
+
ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
* even when referencing into Dictionary content (default:0) */
+
ZSTD_p_enableLongDistanceMatching=1200, /* Enable long distance matching.
* This parameter is designed to improve the compression
* ratio for large inputs with long distance matches.
@@ -877,23 +891,21 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
Create an internal CDict from dict buffer.
- Decompression will have to use same buffer.
+size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode);
-
Create an internal CDict from `dict` buffer. + Decompression will have to use same dictionary. @result : 0, or an error code (which can be tested with ZSTD_isError()). - Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - meaning "return to no-dictionary mode". - Note 1 : `dict` content will be copied internally. Use - ZSTD_CCtx_loadDictionary_byReference() to reference dictionary - content instead. The dictionary buffer must then outlive its - users. + Special: Adding a NULL (or 0-size) dictionary invalidates previous dictionary, + meaning "return to no-dictionary mode". + Note 1 : Dictionary will be used for all future compression jobs. + To return to "no-dictionary" situation, load a NULL dictionary Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters. For this reason, compression parameters cannot be changed anymore after loading a dictionary. - It's also a CPU-heavy operation, with non-negligible impact on latency. - Note 3 : Dictionary will be used for all future compression jobs. - To return to "no-dictionary" situation, load a NULL dictionary - Note 5 : Use ZSTD_CCtx_loadDictionary_advanced() to select how dictionary - content will be interpreted. - + It's also a CPU consuming operation, with non-negligible impact on latency. + Note 3 :`dict` content will be copied internally. + Use ZSTD_CCtx_loadDictionary_byReference() to reference dictionary content instead. + In such a case, dictionary buffer must outlive its users. + Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + to precisely select how dictionary content must be interpreted.
size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); @@ -905,8 +917,7 @@ size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size Special : adding a NULL CDict means "return to no-dictionary mode". Note 1 : Currently, only one dictionary can be managed. Adding a new dictionary effectively "discards" any previous one. - Note 2 : CDict is just referenced, its lifetime must outlive CCtx. - + Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); @@ -917,13 +928,12 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t Subsequent compression jobs will be done without prefix (if none is explicitly referenced). If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead. @result : 0, or an error code (which can be tested with ZSTD_isError()). - Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary + Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary Note 1 : Prefix buffer is referenced. It must outlive compression job. Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters. - It's a CPU-heavy operation, with non-negligible impact on latency. - Note 3 : By default, the prefix is treated as raw content - (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter - dictMode. + It's a CPU consuming operation, with non-negligible impact on latency. + Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.
typedef enum { diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 706845eb7..c461fa443 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -146,7 +146,7 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStor static ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( ZSTD_CCtx_params CCtxParams, U64 srcSizeHint, size_t dictSize) { - DEBUGLOG(4, "ZSTD_getCParamsFromCCtxParams: srcSize = %u, dictSize = %u", + DEBUGLOG(5, "ZSTD_getCParamsFromCCtxParams: srcSize = %u, dictSize = %u", (U32)srcSizeHint, (U32)dictSize); return (CCtxParams.compressionLevel == ZSTD_CLEVEL_CUSTOM) ? CCtxParams.cParams : @@ -252,6 +252,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_p_minMatch: case ZSTD_p_targetLength: case ZSTD_p_compressionStrategy: + case ZSTD_p_compressLiterals: return 1; case ZSTD_p_format: @@ -305,6 +306,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v } return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + case ZSTD_p_compressLiterals: case ZSTD_p_contentSizeFlag: case ZSTD_p_checksumFlag: case ZSTD_p_dictIDFlag: @@ -354,11 +356,16 @@ size_t ZSTD_CCtxParam_setParameter( CCtxParams->format = (ZSTD_format_e)value; return (size_t)CCtxParams->format; - case ZSTD_p_compressionLevel : - if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel(); - if (value) /* 0 : does not change current level */ - CCtxParams->compressionLevel = value; - return CCtxParams->compressionLevel; + case ZSTD_p_compressionLevel : { + int cLevel = (int)value; /* cast expected to restore negative sign */ + if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); + if (cLevel) { /* 0 : does not change current level */ + CCtxParams->disableLiteralCompression = (cLevel<0); /* negative levels disable huffman */ + CCtxParams->compressionLevel = cLevel; + } + if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } case ZSTD_p_windowLog : DEBUGLOG(4, "ZSTD_CCtxParam_setParameter: set windowLog=%u", value); @@ -417,6 +424,10 @@ size_t ZSTD_CCtxParam_setParameter( } return (size_t)CCtxParams->cParams.strategy; + case ZSTD_p_compressLiterals: + CCtxParams->disableLiteralCompression = !value; + return !!value; + case ZSTD_p_contentSizeFlag : /* Content size written in frame header _when known_ (default:1) */ DEBUGLOG(4, "set content size flag = %u", (value>0)); @@ -1376,7 +1387,7 @@ static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, - ZSTD_strategy strategy, + ZSTD_strategy strategy, int disableLiteralCompression, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32* workspace, const int bmi2) @@ -1388,15 +1399,22 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy, symbolEncodingType_e hType = set_compressed; size_t cLitSize; + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", + disableLiteralCompression); + /* Prepare nextEntropy assuming reusing the existing table */ nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode; memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable)); + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + /* small ? don't even attempt compression (speed opt) */ # define LITERAL_NOENTROPY 63 - { size_t const minLitSize = prevEntropy->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : LITERAL_NOENTROPY; + if (srcSize <= minLitSize) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ @@ -1604,11 +1622,11 @@ size_t ZSTD_encodeSequences( MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ZSTD_entropyCTables_t const* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, - ZSTD_compressionParameters const* cParams, + ZSTD_CCtx_params const* cctxParams, void* dst, size_t dstCapacity, U32* workspace, const int bmi2) { - const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN; + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; U32 count[MaxSeq+1]; FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; @@ -1629,9 +1647,12 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; - size_t const cSize = ZSTD_compressLiterals(prevEntropy, nextEntropy, - cParams->strategy, op, dstCapacity, literals, litSize, - workspace, bmi2); + size_t const cSize = ZSTD_compressLiterals( + prevEntropy, nextEntropy, + cctxParams->cParams.strategy, cctxParams->disableLiteralCompression, + op, dstCapacity, + literals, litSize, + workspace, bmi2); if (ZSTD_isError(cSize)) return cSize; assert(cSize <= dstCapacity); @@ -1722,28 +1743,24 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, ZSTD_entropyCTables_t const* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, - ZSTD_compressionParameters const* cParams, + ZSTD_CCtx_params const* cctxParams, void* dst, size_t dstCapacity, size_t srcSize, U32* workspace, int bmi2) { size_t const cSize = ZSTD_compressSequences_internal( - seqStorePtr, prevEntropy, nextEntropy, cParams, dst, dstCapacity, + seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, workspace, bmi2); - /* If the srcSize <= dstCapacity, then there is enough space to write a - * raw uncompressed block. Since we ran out of space, the block must not - * be compressible, so fall back to a raw uncompressed block. + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. */ - int const uncompressibleError = (cSize == ERROR(dstSize_tooSmall)) && (srcSize <= dstCapacity); - if (ZSTD_isError(cSize) && !uncompressibleError) - return cSize; + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + if (ZSTD_isError(cSize)) return cSize; /* Check compressibility */ - { size_t const minGain = ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */ - size_t const maxCSize = srcSize - minGain; - if (cSize >= maxCSize || uncompressibleError) { - return 0; /* block not compressed */ - } } - assert(!ZSTD_isError(cSize)); + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */ + if (cSize >= maxCSize) return 0; /* block not compressed */ + } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large @@ -1813,7 +1830,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, /* select and store sequences */ { U32 const extDict = ZSTD_window_hasExtDict(ms->window); size_t lastLLSize; - { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; } + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } if (zc->appliedParams.ldmParams.enableLdm) { size_t const nbSeq = ZSTD_ldm_generateSequences(&zc->ldmState, zc->ldmSequences, @@ -1834,7 +1854,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, } } /* encode sequences and literals */ - { size_t const cSize = ZSTD_compressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize, zc->entropyWorkspace, zc->bmi2); + { size_t const cSize = ZSTD_compressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, zc->entropyWorkspace, zc->bmi2); if (ZSTD_isError(cSize) || cSize == 0) return cSize; /* confirm repcodes and entropy tables */ { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; @@ -2422,20 +2446,20 @@ size_t ZSTD_compress_advanced_internal( return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } -size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0); - params.fParams.contentSizeFlag = 1; - DEBUGLOG(4, "ZSTD_compress_usingDict (level=%i, srcSize=%u, dictSize=%u)", - compressionLevel, (U32)srcSize, (U32)dictSize); - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParam_setParameter(&cctxParams, ZSTD_p_compressLiterals, compressionLevel>=0); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); } -size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +size_t ZSTD_compressCCtx (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) { DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize); - return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); } size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) @@ -2743,29 +2767,30 @@ size_t ZSTD_CStreamOutSize(void) return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; } -static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, +static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, const void* const dict, size_t const dictSize, ZSTD_dictMode_e const dictMode, const ZSTD_CDict* const cdict, ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize) { - DEBUGLOG(4, "ZSTD_resetCStream_internal"); + DEBUGLOG(4, "ZSTD_resetCStream_internal (disableLiteralCompression=%i)", + params.disableLiteralCompression); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - CHECK_F( ZSTD_compressBegin_internal(zcs, + CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, dictMode, cdict, params, pledgedSrcSize, ZSTDb_buffered) ); - zcs->inToCompress = 0; - zcs->inBuffPos = 0; - zcs->inBuffTarget = zcs->blockSize - + (zcs->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - zcs->streamStage = zcss_load; - zcs->frameEnded = 0; + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + cctx->inBuffTarget = cctx->blockSize + + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; return 0; /* ready to go */ } @@ -3099,10 +3124,10 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, cctx->appliedParams.nbWorkers = params.nbWorkers; } else #endif - { CHECK_F( ZSTD_resetCStream_internal( - cctx, prefixDict.dict, prefixDict.dictSize, - prefixDict.dictMode, cctx->cdict, params, - cctx->pledgedSrcSizePlusOne-1) ); + { CHECK_F( ZSTD_resetCStream_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictMode, + cctx->cdict, + params, cctx->pledgedSrcSizePlusOne-1) ); assert(cctx->streamStage == zcss_load); assert(cctx->appliedParams.nbWorkers == 0); } } @@ -3178,11 +3203,11 @@ int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { { /* "default" - guarantees a monotonically increasing memory budget */ /* W, C, H, S, L, TL, strat */ - { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */ - { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */ - { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */ - { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3 */ - { 20, 17, 18, 1, 5, 16, ZSTD_dfast }, /* level 4 */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 1, ZSTD_fast }, /* level 1 */ + { 19, 15, 16, 1, 6, 1, ZSTD_fast }, /* level 2 */ + { 20, 16, 17, 1, 5, 8, ZSTD_dfast }, /* level 3 */ + { 20, 17, 18, 1, 5, 8, ZSTD_dfast }, /* level 4 */ { 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */ { 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */ { 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */ @@ -3191,9 +3216,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 4, 5, 16, ZSTD_btlazy2 }, /* level 13 */ - { 22, 21, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 14 */ - { 22, 22, 22, 6, 5, 16, ZSTD_btlazy2 }, /* level 15 */ + { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */ { 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */ { 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */ @@ -3204,8 +3229,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, { /* for srcSize <= 256 KB */ /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */ - { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */ { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */ { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */ { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/ @@ -3230,9 +3255,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, { /* for srcSize <= 128 KB */ /* W, C, H, S, L, T, strat */ - { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */ - { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */ - { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* level 0 - not used */ + { 17, 12, 13, 1, 6, 1, ZSTD_fast }, /* level 1 */ + { 17, 13, 16, 1, 5, 1, ZSTD_fast }, /* level 2 */ { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */ { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */ { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */ @@ -3256,9 +3281,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, { /* for srcSize <= 16 KB */ /* W, C, H, S, L, T, strat */ - { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */ - { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */ - { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */ + { 14, 14, 14, 1, 4, 1, ZSTD_fast }, /* level 2 */ { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/ { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/ { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/ @@ -3290,12 +3315,14 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l size_t const addedSize = srcSizeHint ? 0 : 500; U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1; U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ - DEBUGLOG(4, "ZSTD_getCParams: cLevel=%i, srcSize=%u, dictSize=%u => table %u", - compressionLevel, (U32)srcSizeHint, (U32)dictSize, tableID); - if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default; no negative compressionLevel yet */ - if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; - { ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel]; - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); } + int row = compressionLevel; + DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); } } @@ -3305,6 +3332,7 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { ZSTD_parameters params; ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); memset(¶ms, 0, sizeof(params)); params.cParams = cParams; params.fParams.contentSizeFlag = 1; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 92bd0c707..65e99cd90 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -158,6 +158,7 @@ struct ZSTD_CCtx_params_s { ZSTD_frameParameters fParams; int compressionLevel; + int disableLiteralCompression; int forceWindow; /* force back-references to respect limit of * 1<> (32-h) ; } MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ @@ -453,9 +456,9 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) #define ZSTD_LOWLIMIT_MAX (3U << 29) /* Max lowLimit allowed */ /* Maximum chunk size before overflow correction needs to be called again */ -#define ZSTD_CHUNKSIZE_MAX \ - ( ((U32)-1) /* Maximum ending current index */ \ - - (1U << ZSTD_WINDOWLOG_MAX) /* Max distance from lowLimit to current */ \ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - (1U << ZSTD_WINDOWLOG_MAX) /* Max distance from lowLimit to current */ \ - ZSTD_LOWLIMIT_MAX) /* Maximum beginning lowLimit */ /** diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index d2f50488e..df4d28b34 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -42,7 +42,7 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const hlog, U32 const mls) + U32 const hlog, U32 const stepSize, U32 const mls) { U32* const hashTable = ms->hashTable; const BYTE* const base = ms->window.base; @@ -77,19 +77,19 @@ size_t ZSTD_compressBlock_fast_generic( ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); } else { - U32 offset; - if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { - ip += ((ip-anchor) >> kSearchStrength) + 1; + if ( (matchIndex <= lowestIndex) + || (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; continue; } mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip-match); - while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } + { U32 const offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } } /* match found */ ip += mLength; @@ -128,17 +128,18 @@ size_t ZSTD_compressBlock_fast( { U32 const hlog = cParams->hashLog; U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; switch(mls) { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, 4); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, 5); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, 6); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, 7); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); } } @@ -146,7 +147,7 @@ size_t ZSTD_compressBlock_fast( static size_t ZSTD_compressBlock_fast_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const hlog, U32 const mls) + U32 const hlog, U32 const stepSize, U32 const mls) { U32* hashTable = ms->hashTable; const BYTE* const base = ms->window.base; @@ -185,7 +186,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( } else { if ( (matchIndex < lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { - ip += ((ip-anchor) >> kSearchStrength) + 1; + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; continue; } { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; @@ -241,16 +243,17 @@ size_t ZSTD_compressBlock_fast_extDict( { U32 const hlog = cParams->hashLog; U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; switch(mls) { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, 4); + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); case 5 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, 5); + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); case 6 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, 6); + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); case 7 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, 7); + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); } } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 4d58e2e2f..a5b6e0143 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -660,6 +660,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) jobParams.cParams = params.cParams; jobParams.fParams = params.fParams; jobParams.compressionLevel = params.compressionLevel; + jobParams.disableLiteralCompression = params.disableLiteralCompression; jobParams.ldmParams = params.ldmParams; return jobParams; @@ -767,6 +768,7 @@ static size_t ZSTDMT_compress_advanced_internal( if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */ ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; + DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); } @@ -909,8 +911,8 @@ size_t ZSTDMT_initCStream_internal( const ZSTD_CDict* cdict, ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) { - DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)", - (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx); + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)", + (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ @@ -928,7 +930,7 @@ size_t ZSTDMT_initCStream_internal( mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ if (mtctx->singleBlockingThread) { ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); - DEBUGLOG(4, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); + DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); assert(singleThreadParams.nbWorkers == 0); return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], dict, dictSize, cdict, diff --git a/lib/zstd.h b/lib/zstd.h index deedd3643..3ea049eb6 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -390,8 +390,8 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output #define ZSTD_SEARCHLOG_MIN 1 #define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ #define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */ -#define ZSTD_TARGETLENGTH_MIN 4 /* only useful for btopt */ -#define ZSTD_TARGETLENGTH_MAX 999 /* only useful for btopt */ +#define ZSTD_TARGETLENGTH_MIN 1 /* only used by btopt, btultra and btfast */ +#define ZSTD_TARGETLENGTH_MAX 999 /* only used by btopt, btultra and btfast */ #define ZSTD_LDM_MINMATCH_MIN 4 #define ZSTD_LDM_MINMATCH_MAX 4096 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8 @@ -947,7 +947,9 @@ typedef enum { /* compression parameters */ ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table * Default level is ZSTD_CLEVEL_DEFAULT==3. - * Special: value 0 means "do not change cLevel". */ + * Special: value 0 means "do not change cLevel". + * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type. + * Note 2 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */ ZSTD_p_windowLog, /* Maximum allowed back-reference distance, expressed as power of 2. * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. * Special: value 0 means "do not change windowLog". @@ -974,9 +976,13 @@ typedef enum { * Note that currently, for all strategies < btopt, effective minimum is 4. * Note that currently, for all strategies > fast, effective maximum is 6. * Special: value 0 means "do not change minMatchLength". */ - ZSTD_p_targetLength, /* Only useful for strategies >= btopt. - * Length of Match considered "good enough" to stop search. - * Larger values make compression stronger and slower. + ZSTD_p_targetLength, /* Impact of this field depends on strategy. + * For strategies btopt & btultra: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. * Special: value 0 means "do not change targetLength". */ ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition. * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility. @@ -1001,17 +1007,25 @@ typedef enum { * (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call). * More workers improve speed, but also increase memory usage. * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ - ZSTD_p_jobSize, /* Size of a compression job. This value is only enforced in streaming (non-blocking) mode. - * Each compression job is completed in parallel, so indirectly controls the nb of active threads. + ZSTD_p_jobSize, /* Size of a compression job. This value is enforced only in non-blocking mode. + * Each compression job is completed in parallel, so this value indirectly controls the nb of active threads. * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest + * Job size must be a minimum of overlapSize, or 1 MB, whichever is largest. * The minimum size is automatically and transparently enforced */ ZSTD_p_overlapSizeLog, /* Size of previous input reloaded at the beginning of each job. * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */ /* advanced parameters - may not remain available after API update */ + + ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default. + * disabling it improves speed and decreases compression ratio by a large amount. + * note : this setting is automatically updated when changing compression level. + * positive compression levels set ZSTD_p_compressLiterals to 1. + * negative compression levels set ZSTD_p_compressLiterals to 0. */ + ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize, * even when referencing into Dictionary content (default:0) */ + ZSTD_p_enableLongDistanceMatching=1200, /* Enable long distance matching. * This parameter is designed to improve the compression * ratio for large inputs with long distance matches. @@ -1070,23 +1084,21 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); /*! ZSTD_CCtx_loadDictionary() : - * Create an internal CDict from dict buffer. - * Decompression will have to use same buffer. + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : `dict` content will be copied internally. Use - * ZSTD_CCtx_loadDictionary_byReference() to reference dictionary - * content instead. The dictionary buffer must then outlive its - * users. + * Special: Adding a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary will be used for all future compression jobs. + * To return to "no-dictionary" situation, load a NULL dictionary * Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters. * For this reason, compression parameters cannot be changed anymore after loading a dictionary. - * It's also a CPU-heavy operation, with non-negligible impact on latency. - * Note 3 : Dictionary will be used for all future compression jobs. - * To return to "no-dictionary" situation, load a NULL dictionary - * Note 5 : Use ZSTD_CCtx_loadDictionary_advanced() to select how dictionary - * content will be interpreted. - */ + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Note 3 :`dict` content will be copied internally. + * Use ZSTD_CCtx_loadDictionary_byReference() to reference dictionary content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); @@ -1101,8 +1113,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void * Special : adding a NULL CDict means "return to no-dictionary mode". * Note 1 : Currently, only one dictionary can be managed. * Adding a new dictionary effectively "discards" any previous one. - * Note 2 : CDict is just referenced, its lifetime must outlive CCtx. - */ + * Note 2 : CDict is just referenced, its lifetime must outlive CCtx. */ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*! ZSTD_CCtx_refPrefix() : @@ -1112,13 +1123,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); * Subsequent compression jobs will be done without prefix (if none is explicitly referenced). * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead. * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary * Note 1 : Prefix buffer is referenced. It must outlive compression job. * Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters. - * It's a CPU-heavy operation, with non-negligible impact on latency. - * Note 3 : By default, the prefix is treated as raw content - * (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter - * dictMode. */ + * It's a CPU consuming operation, with non-negligible impact on latency. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); diff --git a/programs/bench.c b/programs/bench.c index 66f7d69a5..014a4fd41 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -214,6 +214,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */ + if (g_nbWorkers==1) g_nbWorkers=0; /* prefer synchronous mode */ if (g_decodeOnly) { /* benchmark only decompression : source must be already compressed */ const char* srcPtr = (const char*)srcBuffer; @@ -533,9 +534,8 @@ static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, if (g_displayLevel == 1 && !g_additionalParam) DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); - if (cLevelLast < cLevel) cLevelLast = cLevel; - for (l=cLevel; l <= cLevelLast; l++) { + if (l==0) continue; /* skip level 0 */ BMK_benchMem(srcBuffer, benchedSize, displayName, l, fileSizes, nbFiles, @@ -545,8 +545,8 @@ static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, /*! BMK_loadFiles() : - Loads `buffer` with content of files listed within `fileNamesTable`. - At most, fills `buffer` entirely */ + * Loads `buffer` with content of files listed within `fileNamesTable`. + * At most, fills `buffer` entirely. */ static void BMK_loadFiles(void* buffer, size_t bufferSize, size_t* fileSizes, const char* const * const fileNamesTable, unsigned nbFiles) @@ -677,7 +677,6 @@ int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, { double const compressibility = (double)g_compressibilityDefault / 100; - if (cLevel < 1) cLevel = 1; /* minimum compression level */ if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); if (cLevelLast < cLevel) cLevelLast = cLevel; diff --git a/programs/fileio.c b/programs/fileio.c index 94dcb916d..d3d8b7dc5 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -420,6 +420,7 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, cRess_t ress; memset(&ress, 0, sizeof(ress)); + DISPLAYLEVEL(6, "FIO_createCResources \n"); ress.cctx = ZSTD_createCCtx(); if (ress.cctx == NULL) EXM_THROW(30, "allocation error : can't create ZSTD_CCtx"); @@ -440,7 +441,7 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) ); /* compression level */ - CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)cLevel) ); /* long distance matching */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) ); @@ -908,6 +909,7 @@ static int FIO_compressFilename_dstFile(cRess_t ress, stat_t statbuf; int stat_result = 0; + DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName); ress.dstFile = FIO_openDstFile(dstFileName); if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ /* Must ony be added after FIO_openDstFile() succeeds. diff --git a/programs/zstd.1.md b/programs/zstd.1.md index c970c5cbb..2e2dc54f8 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -115,6 +115,14 @@ the last one takes effect. Note: If `windowLog` is set to larger than 27, `--long=windowLog` or `--memory=windowSize` needs to be passed to the decompressor. +* `--fast[=#]`: + switch to ultra-fast compression levels. + If `=#` is not present, it defaults to `1`. + The higher the value, the faster the compression speed, + at the cost of some compression ratio. + This setting overwrites compression level if one was set previously. + Similarly, if a compression level is set after `--fast`, it overrides it. + * `-T#`, `--threads=#`: Compress using `#` working threads (default: 1). If `#` is 0, attempt to detect and use the number of physical CPU cores. @@ -339,14 +347,21 @@ The list of available _options_: The minimum _slen_ is 3 and the maximum is 7. - `targetLen`=_tlen_, `tlen`=_tlen_: - Specify the minimum match length that causes a match finder to stop - searching for better matches. + The impact of this field vary depending on selected strategy. - A larger minimum match length usually improves compression ratio but - decreases compression speed. - This option is only used with strategies ZSTD_btopt and ZSTD_btultra. + For ZSTD\_btopt and ZSTD\_btultra, it specifies the minimum match length + that causes match finder to stop searching for better matches. + A larger `targetLen` usually improves compression ratio + but decreases compression speed. - The minimum _tlen_ is 4 and the maximum is 999. + For ZSTD\_fast, it specifies + the amount of data skipped between match sampling. + Impact is reversed : a larger `targetLen` increases compression speed + but decreases compression ratio. + + For all other strategies, this field has no impact. + + The minimum _tlen_ is 1 and the maximum is 999. - `overlapLog`=_ovlog_, `ovlog`=_ovlog_: Determine `overlapSize`, amount of data reloaded from previous job. diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 9f8d4dea7..bf82843aa 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -133,7 +133,8 @@ static int usage_advanced(const char* programName) DISPLAY( " -l : print information about zstd compressed files \n"); #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); - DISPLAY( "--long[=#] : enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); + DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); + DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); #ifdef ZSTD_MULTITHREAD DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); DISPLAY( " -B# : select size of each job (default: 0==automatic) \n"); @@ -219,10 +220,10 @@ static int exeNameMatch(const char* exeName, const char* test) } /*! readU32FromChar() : - @return : unsigned integer value read from input in `char` format - allows and interprets K, KB, KiB, M, MB and MiB suffix. - Will also modify `*stringPtr`, advancing it to position where it stopped reading. - Note : function result can overflow if digit string > MAX_UINT */ + * @return : unsigned integer value read from input in `char` format. + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * Note : function result can overflow if digit string > MAX_UINT */ static unsigned readU32FromChar(const char** stringPtr) { unsigned result = 0; @@ -241,7 +242,7 @@ static unsigned readU32FromChar(const char** stringPtr) /** longCommandWArg() : * check if *stringPtr is the same as longCommand. * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. - * @return 0 and doesn't modify *stringPtr otherwise. + * @return 0 and doesn't modify *stringPtr otherwise. */ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) { @@ -387,7 +388,7 @@ int main(int argCount, const char* argv[]) zstd_operation_mode operation = zom_compress; ZSTD_compressionParameters compressionParams; int cLevel = ZSTDCLI_CLEVEL_DEFAULT; - int cLevelLast = 1; + int cLevelLast = -10000; unsigned recursive = 0; unsigned memLimit = 0; const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */ @@ -543,6 +544,21 @@ int main(int argCount, const char* argv[]) compressionParams.windowLog = ldmWindowLog; continue; } + if (longCommandWArg(&argument, "--fast")) { + /* Parse optional window log */ + if (*argument == '=') { + U32 fastLevel; + ++argument; + fastLevel = readU32FromChar(&argument); + if (fastLevel) cLevel = - (int)fastLevel; + } else if (*argument != 0) { + /* Invalid character following --fast */ + CLEAN_RETURN(badusage(programName)); + } else { + cLevel = -1; /* default for --fast */ + } + continue; + } /* fall-through, will trigger bad_usage() later on */ } @@ -813,16 +829,22 @@ int main(int argCount, const char* argv[]) } #ifndef ZSTD_NODECOMPRESS - if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */ + if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */ #endif /* No input filename ==> use stdin and stdout */ filenameIdx += !filenameIdx; /* filenameTable[0] is stdin by default */ - if (!strcmp(filenameTable[0], stdinmark) && !outFileName) outFileName = stdoutmark; /* when input is stdin, default output is stdout */ + if (!strcmp(filenameTable[0], stdinmark) && !outFileName) + outFileName = stdoutmark; /* when input is stdin, default output is stdout */ /* Check if input/output defined as console; trigger an error in this case */ - if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName)); - if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !strcmp(filenameTable[0], stdinmark) && !forceStdout && operation!=zom_decompress) + if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) + CLEAN_RETURN(badusage(programName)); + if ( outFileName && !strcmp(outFileName, stdoutmark) + && IS_CONSOLE(stdout) + && !strcmp(filenameTable[0], stdinmark) + && !forceStdout + && operation!=zom_decompress ) CLEAN_RETURN(badusage(programName)); #ifndef ZSTD_NOCOMPRESS diff --git a/tests/fuzzer.c b/tests/fuzzer.c index aecc06449..8d8e547c2 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -34,7 +34,7 @@ #include "zdict.h" /* ZDICT_trainFromBuffer */ #include "datagen.h" /* RDG_genBuffer */ #include "mem.h" -#define XXH_STATIC_LINKING_ONLY +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ #include "xxhash.h" /* XXH64 */ #include "util.h" @@ -1382,10 +1382,13 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD crcOrig = XXH64(sampleBuffer, sampleSize, 0); /* compression tests */ - { int const cLevel = + { int const cLevelPositive = ( FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (FUZ_highbit32((U32)sampleSize) / cLevelLimiter)) ) + 1; + int const cLevel = ((FUZ_rand(&lseed) & 15) == 3) ? + - (int)((FUZ_rand(&lseed) & 7) + 1) : /* test negative cLevel */ + cLevelPositive; DISPLAYLEVEL(5, "fuzzer t%u: Simple compression test (level %i) \n", testNb, cLevel); cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel); CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed : %s", ZSTD_getErrorName(cSize)); diff --git a/tests/playTests.sh b/tests/playTests.sh index 258c9c75b..c170ba9f4 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -101,8 +101,11 @@ $ECHO "test : basic compression " $ZSTD -f tmp # trivial compression case, creates tmp.zst $ECHO "test : basic decompression" $ZSTD -df tmp.zst # trivial decompression case (overwrites tmp) -$ECHO "test : too large compression level (must fail)" +$ECHO "test : too large compression level => auto-fix" $ZSTD -99 -f tmp # too large compression level, automatic sized down +$ECHO "test : --fast aka negative compression levels" +$ZSTD --fast -f tmp # == -1 +$ZSTD --fast=3 -f tmp # == -3 $ECHO "test : compress to stdout" $ZSTD tmp -c > tmpCompressed $ZSTD tmp --stdout > tmpCompressed # long command format @@ -201,7 +204,6 @@ fi rm tmp* - $ECHO "\n===> Advanced compression parameters " $ECHO "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!" $ECHO "Hello world!" | $ZSTD --zstd=windowLo=21 - -o tmp.zst && die "wrong parameters not detected!" @@ -474,6 +476,8 @@ $ECHO "bench one file" $ZSTD -bi0 tmp1 $ECHO "bench multiple levels" $ZSTD -i0b0e3 tmp1 +$ECHO "bench negative level" +$ZSTD -bi0 --fast tmp1 $ECHO "with recursive and quiet modes" $ZSTD -rqi1b1e2 tmp1