From 6a546efb8c90adf7865801e313da433ce8406ce1 Mon Sep 17 00:00:00 2001 From: Stella Lau Date: Fri, 28 Jul 2017 15:51:33 -0700 Subject: [PATCH] Add long distance matcher Move last literals section to ZSTD_block_internal --- lib/common/zstd_internal.h | 14 + lib/compress/zstd_compress.c | 934 +++++++++++++++++++++++++++++++---- lib/compress/zstd_opt.h | 24 +- lib/zstd.h | 4 +- programs/bench.c | 6 +- programs/bench.h | 1 + programs/fileio.c | 6 + programs/fileio.h | 1 + programs/zstdcli.c | 7 +- tests/fuzzer.c | 4 +- tests/playTests.sh | 22 + tests/zstreamtest.c | 2 + 12 files changed, 901 insertions(+), 124 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 19c0a6261..49b21c2db 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -274,6 +274,20 @@ typedef struct { const BYTE* cachedLiterals; } optState_t; +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + ldmEntry_t* hashTable; + BYTE* bucketOffsets; + U32 ldmEnable; /* 1 if enable long distance matching */ + U32 hashLog; /* log size of hashTable */ + U32 bucketLog; /* log number of buckets, at most 4 */ + U32 hashEveryLog; +} ldmState_t; + typedef struct { U32 hufCTable[HUF_CTABLE_SIZE_U32(255)]; FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 5adeb480b..331b21207 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -36,6 +36,13 @@ static const U32 g_searchStrength = 8; /* control skip over incompressible dat #define HASH_READ_SIZE 8 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_BUCKET_SIZE_LOG_MAX 4 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_WINDOW_LOG 27 +#define LDM_HASH_LOG 20 +#define LDM_HASH_CHAR_OFFSET 10 + /*-************************************* * Helper functions @@ -46,7 +53,6 @@ size_t ZSTD_compressBound(size_t srcSize) { return srcSize + (srcSize >> 8) + margin; } - /*-************************************* * Sequence storage ***************************************/ @@ -101,6 +107,7 @@ struct ZSTD_CCtx_s { seqStore_t seqStore; /* sequences storage ptrs */ optState_t optState; + ldmState_t ldmState; /* long distance matching state */ U32* hashTable; U32* hashTable3; U32* chainTable; @@ -354,6 +361,16 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->requestedParams.nbThreads); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + case ZSTD_p_longDistanceMatching: + /* TODO */ + if (cctx->cdict) return ERROR(stage_wrong); + cctx->ldmState.ldmEnable = value>0; + if (value != 0) { + ZSTD_cLevelToCParams(cctx); + cctx->requestedParams.cParams.windowLog = LDM_WINDOW_LOG; + } + return 0; + default: return ERROR(parameter_unsupported); } } @@ -453,6 +470,10 @@ size_t ZSTD_CCtxParam_setParameter( if (params->nbThreads <= 1) return ERROR(parameter_unsupported); return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_overlapSectionLog, value); + case ZSTD_p_longDistanceMatching : + /* TODO */ + return ERROR(parameter_unsupported); + default: return ERROR(parameter_unsupported); } } @@ -677,7 +698,11 @@ size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* pa ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<appliedParams)) { + /* TODO: For now, reset if long distance matching is enabled */ + if (ZSTD_equivalentParams(params, zc->appliedParams) && + !zc->ldmState.ldmEnable) { DEBUGLOG(5, "ZSTD_equivalentParams()==1"); zc->entropy->hufCTable_repeatMode = HUF_repeat_none; zc->entropy->offcode_repeatMode = FSE_repeat_none; @@ -787,6 +814,15 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, return ZSTD_continueCCtx(zc, params, pledgedSrcSize); } } + { + zc->ldmState.hashLog = LDM_HASH_LOG; + zc->ldmState.bucketLog = + MIN(LDM_BUCKET_SIZE_LOG, LDM_BUCKET_SIZE_LOG_MAX); + zc->ldmState.hashEveryLog = + params.cParams.windowLog < zc->ldmState.hashLog ? + 0 : params.cParams.windowLog - zc->ldmState.hashLog; + } + { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog); U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; @@ -802,6 +838,14 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0; void* ptr; + size_t const ldmHSize = ((size_t)1) << zc->ldmState.hashLog; + size_t const ldmBucketSize = + ((size_t)1) << (zc->ldmState.hashLog - zc->ldmState.bucketLog); + size_t const ldmPotentialSpace = + ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t))); + size_t const ldmSpace = zc->ldmState.ldmEnable ? + ldmPotentialSpace : 0; + /* Check if workSpace is large enough, alloc a new one if needed */ { size_t const entropySpace = sizeof(ZSTD_entropyCTables_t); size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<workSpaceSize < neededSpace) { /* too small : resize /*/ + if (zc->workSpaceSize < neededSpace) { /* too small : resize */ DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n", (unsigned)zc->workSpaceSize>>10, (unsigned)neededSpace>>10); @@ -878,6 +922,16 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ptr = zc->optState.priceTable + ZSTD_OPT_NUM+1; } + /* ldm space */ + if (zc->ldmState.ldmEnable) { + if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, ldmSpace); + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + zc->ldmState.hashTable = (ldmEntry_t*)ptr; + ptr = zc->ldmState.hashTable + ldmHSize; + zc->ldmState.bucketOffsets = (BYTE*)ptr; + ptr = zc->ldmState.bucketOffsets + ldmBucketSize; + } + /* table Space */ if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ @@ -990,6 +1044,18 @@ static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reduce } } +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + /*! ZSTD_reduceIndex() : * rescale all indexes to avoid future overflow (indexes are U32) */ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) @@ -1002,6 +1068,12 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } + + { if (zc->ldmState.ldmEnable) { + U32 const ldmHSize = 1 << LDM_HASH_LOG; + ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue); + } + } } @@ -1611,7 +1683,6 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) } } - /*-************************************* * Fast Scan ***************************************/ @@ -1630,11 +1701,10 @@ static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) } } - FORCE_INLINE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, - const void* src, size_t srcSize, - const U32 mls) +size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize, + const U32 mls) { U32* const hashTable = cctx->hashTable; U32 const hBits = cctx->appliedParams.cParams.hashLog; @@ -1681,7 +1751,6 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } @@ -1711,15 +1780,11 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } - -static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, +static size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { const U32 mls = ctx->appliedParams.cParams.searchLength; @@ -1727,18 +1792,19 @@ static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, { default: /* includes case 3 */ case 4 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; + return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); case 5 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; + return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); case 6 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; + return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); case 7 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; + return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); } } -static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 mls) { @@ -1825,15 +1891,11 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, /* save reps for next block */ seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } - -static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, +static size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { U32 const mls = ctx->appliedParams.cParams.searchLength; @@ -1841,13 +1903,13 @@ static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, { default: /* includes case 3 */ case 4 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); case 5 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); case 6 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); case 7 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); } } @@ -1875,7 +1937,7 @@ static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U3 FORCE_INLINE -void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, +size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) { @@ -1921,6 +1983,7 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, ip++; ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); } else { + U32 offset; if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; @@ -1982,33 +2045,32 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) { const U32 mls = ctx->appliedParams.cParams.searchLength; switch(mls) { default: /* includes case 3 */ case 4 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; + return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); case 5 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; + return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); case 6 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; + return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); case 7 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; + return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); } } -static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 mls) { @@ -2131,15 +2193,12 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, /* save reps for next block */ seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, +static size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { U32 const mls = ctx->appliedParams.cParams.searchLength; @@ -2147,13 +2206,13 @@ static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, { default: /* includes case 3 */ case 4 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; + return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); case 5 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; + return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); case 6 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; + return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); case 7 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; + return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); } } @@ -2546,9 +2605,9 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( * Common parser - lazy strategy *********************************/ FORCE_INLINE -void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) +size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -2678,37 +2737,38 @@ _storeSequence: seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, + size_t srcSize) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); } -static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, + size_t srcSize) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); } -static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, + size_t srcSize) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); } -static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, + size_t srcSize) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); } FORCE_INLINE -void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, +size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) { @@ -2774,7 +2834,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, /* let's try to find a better solution */ if (depth>=1) while (iprepToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); } -static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); } -static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); } -static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); } /* The optimal parser */ #include "zstd_opt.h" -static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { #ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); #else (void)ctx; (void)src; (void)srcSize; - return; + return 0; #endif } -static void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { #ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); #else (void)ctx; (void)src; (void)srcSize; - return; + return 0; #endif } -static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { #ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); + return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); #else (void)ctx; (void)src; (void)srcSize; - return; + return 0; #endif } -static void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +static size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { #ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); + return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); #else (void)ctx; (void)src; (void)srcSize; - return; + return 0; #endif } - /* ZSTD_selectBlockCompressor() : * assumption : strat is a valid strategy */ -typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); +typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = { @@ -2967,18 +3023,687 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int return blockCompressor[extDict!=0][(U32)strat]; } +/*-************************************* +* Long distance matching +***************************************/ + +/** ZSTD_ldm_getSmallHash() : + * numBits should be <= 32 + * @return : the most significant numBits of value */ +static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) +{ + assert(numBits <= 32); + return (U32)(value >> (64 - numBits)); +} + +/** ZSTD_ldm_getChecksum() : + * numBitsToDiscard should be <= 32 + * @return : the next most significant 32 bits after numBitsToDiscard */ +static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) +{ + assert(numBitsToDiscard <= 32); + return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; +} + +/** ZSTD_ldm_getTag() ; + * Given the hash, returns the most significant numTagBits bits + * after (32 + hbits) bits. + * + * If there are not enough bits remaining, return the last + * numTagBits bits. */ +static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) +{ + if (32 - hbits < numTagBits) { + return hash & ((1 << numTagBits) - 1); + } else { + return (hash >> (32 - hbits - numTagBits)) & ((1 << numTagBits) - 1); + } +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket(ldmState_t* ldmState, size_t hash) +{ + return ldmState->hashTable + (hash << ldmState->bucketLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry) +{ + BYTE* const bucketOffsets = ldmState->bucketOffsets; + *(ZSTD_ldm_getBucket(ldmState, hash) + bucketOffsets[hash]) = entry; + bucketOffsets[hash]++; + bucketOffsets[hash] &= (1 << ldmState->bucketLog) - 1; +} + +/** ZSTD_ldm_makeEntryAndInsertByTag() : + * + * Gets the small hash, checksum, and tag from the rollingHash. + * + * If the tag matches (1 << ldmState->hashEveryLog)-1, then + * creates an ldmEntry from the offset, and inserts it into the hash table. + * + * hBits is the length of the small hash, which is the most significant hBits + * of rollingHash. The checksum is the next 32 most significant bits, followed + * by ldmState->hashEveryLog bits that make up the tag. */ +static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, + U64 rollingHash, U32 hBits, + U32 const offset) +{ + U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog); + U32 const tagMask = (1 << ldmState->hashEveryLog) - 1; + if (tag == tagMask) { + U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + ldmEntry_t entry; + entry.offset = offset; + entry.checksum = checksum; + ZSTD_ldm_insertEntry(ldmState, hash, entry); + } +} + +/** ZSTD_ldm_getRollingHash() : + * Get a 64-bit hash using the first len bytes from buf. + * + * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be + * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0) + * + * where the constant a is defined to be prime8bytes. + * + * The implementation adds an offset to each byte, so + * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */ +static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len) +{ + U64 ret = 0; + U32 i; + for (i = 0; i < len; i++) { + ret *= prime8bytes; + ret += buf[i] + LDM_HASH_CHAR_OFFSET; + } + return ret; +} + +/** ZSTD_ldm_ipow() : + * Return base^exp. */ +static U64 ZSTD_ldm_ipow(U64 base, U64 exp) +{ + U64 ret = 1; + while (exp) { + if (exp & 1) { ret *= base; } + exp >>= 1; + base *= base; + } + return ret; +} + +/** ZSTD_ldm_updateHash() : + * Updates hash by removing toRemove and adding toAdd. + * + * Note: this currently relies on compiler optimization to avoid + * recalculating hashPower. */ +static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd) +{ + U64 const hashPower = ZSTD_ldm_ipow(prime8bytes, LDM_MIN_MATCH_LENGTH - 1); + hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower); + hash *= prime8bytes; + hash += toAdd + LDM_HASH_CHAR_OFFSET; + return hash; +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end) +{ + const BYTE* const iend = (const BYTE*)end; + const U32 mls = zc->appliedParams.cParams.searchLength; + + switch(zc->appliedParams.cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(zc, iend, mls); + zc->nextToUpdate = (U32)(iend - zc->base); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(zc, iend, mls); + zc->nextToUpdate = (U32)(iend - zc->base); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +/** ZSTD_ldm_fillLdmHashTable() : + * + * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). + * lastHash is the rolling hash that corresponds to lastHashed. + * + * Returns the rolling hash corresponding to position iend-1. */ +static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, + U64 lastHash, const BYTE* lastHashed, + const BYTE* iend, const BYTE* base, + U32 hBits) +{ + U64 rollingHash = lastHash; + const BYTE* cur = lastHashed + 1; + + while (cur < iend) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1], + cur[LDM_MIN_MATCH_LENGTH-1]); + ZSTD_ldm_makeEntryAndInsertByTag(state, + rollingHash, hBits, + (U32)(cur - base)); + ++cur; + } + return rollingHash; +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor) +{ + U32 const current = (U32)(anchor - cctx->base); + if (current > cctx->nextToUpdate + 1024) { + cctx->nextToUpdate = + current - MIN(512, current - cctx->nextToUpdate - 1024); + } +} + +/** ZSTD_compressBlock_ldm_generic() : + * + * This is a block compressor intended for long distance matching. + * + * The function searches for matches of length at least LDM_MIN_MATCH_LENGTH + * using a hash table in cctx->ldmState. Matches can be at a distance of + * up to LDM_WINDOW_LOG. + * + * Upon finding a match, the unmatched literals are compressed using a + * ZSTD_blockCompressor (depending on the strategy in the compression + * parameters), which stores the matched sequences. The "long distance" + * match is then stored with the remaining literals from the + * ZSTD_blockCompressor. */ +FORCE_INLINE +size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize) +{ + ldmState_t* const ldmState = &(cctx->ldmState); + const U32 hBits = ldmState->hashLog - ldmState->bucketLog; + const U32 ldmBucketSize = (1 << ldmState->bucketLog); + const U32 ldmTagMask = (1 << ldmState->hashEveryLog) - 1; + seqStore_t* const seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - LDM_MIN_MATCH_LENGTH; + + const ZSTD_blockCompressor blockCompressor = + ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0); + U32* const repToConfirm = seqStorePtr->repToConfirm; + U32 savedRep[ZSTD_REP_NUM]; + U64 rollingHash = 0; + const BYTE* lastHashed = NULL; + size_t i, lastLiterals; + + /* Save seqStorePtr->rep and copy repToConfirm */ + for (i = 0; i < ZSTD_REP_NUM; i++) + savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i]; + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 const current = (U32)(ip - base); + size_t forwardMatchLength = 0, backwardMatchLength = 0; + ldmEntry_t* bestEntry = NULL; + if (ip != istart) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], + lastHashed[LDM_MIN_MATCH_LENGTH]); + } else { + rollingHash = ZSTD_ldm_getRollingHash(ip, LDM_MIN_MATCH_LENGTH); + } + lastHashed = ip; + + /* Do not insert and do not look for a match */ + if (ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog) != + ldmTagMask) { + ip++; + continue; + } + + /* Get the best entry and compute the match lengths */ + { + ldmEntry_t* const bucket = + ZSTD_ldm_getBucket(ldmState, + ZSTD_ldm_getSmallHash(rollingHash, hBits)); + ldmEntry_t* cur; + size_t bestMatchLength = 0; + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + + for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + const BYTE* const pMatch = cur->offset + base; + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + + curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + if (curForwardMatchLength < LDM_MIN_MATCH_LENGTH) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( + ip, anchor, pMatch, lowest); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + } + + /* No match found -- continue searching */ + if (bestEntry == NULL) { + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, + hBits, current); + ip++; + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + ip -= backwardMatchLength; + + /* Call the block compressor on the remaining literals */ + { + U32 const matchIndex = bestEntry->offset; + const BYTE* const match = base + matchIndex - backwardMatchLength; + U32 const offset = (U32)(ip - match); + + /* Overwrite rep codes */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = repToConfirm[i]; + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(cctx, anchor); + ZSTD_ldm_fillFastTables(cctx, anchor); + + /* Call block compressor and get remaining literals */ + lastLiterals = blockCompressor(cctx, anchor, ip - anchor); + cctx->nextToUpdate = (U32)(ip - base); + + /* Update repToConfirm with the new offset */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + repToConfirm[i] = repToConfirm[i-1]; + repToConfirm[0] = offset; + + /* Store the sequence with the leftover literals */ + ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals, + offset + ZSTD_REP_MOVE, mLength - MINMATCH); + } + + /* Insert the current entry into the hash table */ + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base)); + + assert(ip + backwardMatchLength == lastHashed); + + /* Fill the hash table from lastHashed+1 to ip+mLength*/ + /* Heuristic: don't need to fill the entire table at end of block */ + if (ip + mLength < ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + mLength, base, hBits); + lastHashed = ip + mLength - 1; + } + ip += mLength; + anchor = ip; + + /* Check immediate repcode */ + while ( (ip < ilimit) + && ( (repToConfirm[1] > 0) + && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) { + + size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1], + iend) + 4; + /* Swap repToConfirm[1] <=> repToConfirm[0] */ + { + U32 const tmpOff = repToConfirm[1]; + repToConfirm[1] = repToConfirm[0]; + repToConfirm[0] = tmpOff; + } + + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + + /* Fill the hash table from lastHashed+1 to ip+rLength*/ + if (ip + rLength < ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + rLength, base, hBits); + lastHashed = ip + rLength - 1; + } + ip += rLength; + anchor = ip; + + continue; /* faster when present ... (?) */ + } + } + + /* Overwrite rep */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = repToConfirm[i]; + + ZSTD_ldm_limitTableUpdate(cctx, anchor); + ZSTD_ldm_fillFastTables(cctx, anchor); + + lastLiterals = blockCompressor(cctx, anchor, iend - anchor); + cctx->nextToUpdate = (U32)(ip - base); + + /* Restore seqStorePtr->rep */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = savedRep[i]; + + /* Return the last literals size */ + return lastLiterals; +} + +static size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize); +} + +static size_t ZSTD_compressBlock_ldm_extDict_generic( + ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + ldmState_t* ldmState = &(ctx->ldmState); + const U32 hBits = ldmState->hashLog - ldmState->bucketLog; + const U32 ldmBucketSize = (1 << ldmState->bucketLog); + const U32 ldmTagMask = (1 << ldmState->hashEveryLog) - 1; + seqStore_t* const seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - LDM_MIN_MATCH_LENGTH; + + const ZSTD_blockCompressor blockCompressor = + ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1); + U32* const repToConfirm = seqStorePtr->repToConfirm; + U32 savedRep[ZSTD_REP_NUM]; + U64 rollingHash = 0; + const BYTE* lastHashed = NULL; + size_t i, lastLiterals; + + /* Save seqStorePtr->rep and copy repToConfirm */ + for (i = 0; i < ZSTD_REP_NUM; i++) { + savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i]; + } + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + size_t mLength; + const U32 current = (U32)(ip-base); + size_t forwardMatchLength = 0, backwardMatchLength = 0; + ldmEntry_t* bestEntry = NULL; + if (ip != istart) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], + lastHashed[LDM_MIN_MATCH_LENGTH]); + } else { + rollingHash = ZSTD_ldm_getRollingHash(ip, LDM_MIN_MATCH_LENGTH); + } + lastHashed = ip; + + if (ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog) != + ldmTagMask) { + /* Don't insert and don't look for a match */ + ip++; + continue; + } + + /* Get the best entry and compute the match lengths */ + { + ldmEntry_t* const bucket = + ZSTD_ldm_getBucket(ldmState, + ZSTD_ldm_getSmallHash(rollingHash, hBits)); + ldmEntry_t* cur; + size_t bestMatchLength = 0; + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + + for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + const BYTE* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + const BYTE* const pMatch = curMatchBase + cur->offset; + const BYTE* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + const BYTE* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + + curForwardMatchLength = ZSTD_count_2segments( + ip, pMatch, iend, + matchEnd, lowPrefixPtr); + if (curForwardMatchLength < LDM_MIN_MATCH_LENGTH) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( + ip, anchor, pMatch, lowMatchPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + } + + /* No match found -- continue searching */ + if (bestEntry == NULL) { + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base)); + ip++; + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + ip -= backwardMatchLength; + + /* Call the block compressor on the remaining literals */ + { + U32 const matchIndex = bestEntry->offset; + U32 const offset = current - matchIndex; + + /* Overwrite rep codes */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = repToConfirm[i]; + + /* Fill the hash table for the block compressor */ + ZSTD_ldm_limitTableUpdate(ctx, anchor); + ZSTD_ldm_fillFastTables(ctx, anchor); + + /* Call block compressor and get remaining literals */ + lastLiterals = blockCompressor(ctx, anchor, ip - anchor); + ctx->nextToUpdate = (U32)(ip - base); + + /* Update repToConfirm with the new offset */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + repToConfirm[i] = repToConfirm[i-1]; + repToConfirm[0] = offset; + + /* Store the sequence with the leftover literals */ + ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals, + offset + ZSTD_REP_MOVE, mLength - MINMATCH); + } + + /* Insert the current entry into the hash table */ + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base)); + + /* Fill the hash table from lastHashed+1 to ip+mLength */ + assert(ip + backwardMatchLength == lastHashed); + if (ip + mLength < ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + mLength, base, hBits); + lastHashed = ip + mLength - 1; + } + ip += mLength; + anchor = ip; + + /* check immediate repcode */ + while (ip < ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - repToConfirm[1]; + const BYTE* repMatch2 = repIndex2 < dictLimit ? + dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & + (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? + dictEnd : iend; + size_t const repLength2 = + ZSTD_count_2segments(ip+4, repMatch2+4, iend, + repEnd2, lowPrefixPtr) + 4; + + U32 tmpOffset = repToConfirm[1]; + repToConfirm[1] = repToConfirm[0]; + repToConfirm[0] = tmpOffset; + + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + + /* Fill the hash table from lastHashed+1 to ip+repLength2*/ + if (ip + repLength2 < ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + repLength2, base, hBits); + lastHashed = ip + repLength2 - 1; + } + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + + /* Overwrite rep */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = repToConfirm[i]; + + ZSTD_ldm_limitTableUpdate(ctx, anchor); + ZSTD_ldm_fillFastTables(ctx, anchor); + + /* Call the block compressor one last time on the last literals */ + lastLiterals = blockCompressor(ctx, anchor, iend - anchor); + ctx->nextToUpdate = (U32)(ip - base); + + /* Restore seqStorePtr->rep */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = savedRep[i]; + + /* Return the last literals size */ + return lastLiterals; +} + +static size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize); +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->lowLimit < zc->dictLimit); const BYTE* const base = zc->base; const BYTE* const istart = (const BYTE*)src; const U32 current = (U32)(istart-base); + size_t lastLLSize; + const BYTE* anchor; + const ZSTD_blockCompressor blockCompressor = + zc->ldmState.ldmEnable ? + (zc->lowLimit < zc->dictLimit ? ZSTD_compressBlock_ldm_extDict : + ZSTD_compressBlock_ldm) : + ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->lowLimit < zc->dictLimit); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ ZSTD_resetSeqStore(&(zc->seqStore)); if (current > zc->nextToUpdate + 384) zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */ - blockCompressor(zc, src, srcSize); + + lastLLSize = blockCompressor(zc, src, srcSize); + + /* Last literals */ + anchor = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, anchor, lastLLSize); + return ZSTD_compressSequences(&zc->seqStore, zc->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize); } @@ -3203,7 +3928,6 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t case ZSTD_fast: ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength); break; - case ZSTD_dfast: ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength); break; diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 53e806eb7..575cfa661 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -413,8 +413,9 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( * Optimal parser *********************************/ FORCE_INLINE -void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) +size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const int ultra) { seqStore_t* seqStorePtr = &(ctx->seqStore); optState_t* optStatePtr = &(ctx->optState); @@ -654,17 +655,15 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ /* Save reps for next block */ { int i; for (i=0; irepToConfirm[i] = rep[i]; } - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } FORCE_INLINE -void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) +size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const int ultra) { seqStore_t* seqStorePtr = &(ctx->seqStore); optState_t* optStatePtr = &(ctx->optState); @@ -928,11 +927,8 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ /* Save reps for next block */ { int i; for (i=0; irepToConfirm[i] = rep[i]; } - /* Last Literals */ - { size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } #endif /* ZSTD_OPT_H_91842398743 */ diff --git a/lib/zstd.h b/lib/zstd.h index c11964408..c9fa34f36 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -978,7 +978,9 @@ typedef enum { /* advanced parameters - may not remain available after API update */ ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize, * even when referencing into Dictionary content (default:0) */ - + ZSTD_p_longDistanceMatching, /* Enable long distance matching. + * This increases the memory usage as well as the + * window size. */ } ZSTD_cParameter; diff --git a/programs/bench.c b/programs/bench.c index 7731d079e..a2c4efcf3 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -129,7 +129,10 @@ void BMK_setNbThreads(unsigned nbThreads) { #endif g_nbThreads = nbThreads; } - +static U32 g_ldmFlag = 0; +void BMK_setLdmFlag(unsigned ldmFlag) { + g_ldmFlag = ldmFlag; +} /* ******************************************************** * Bench functions @@ -271,6 +274,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog); ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength); ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag); ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy); ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize); #else diff --git a/programs/bench.h b/programs/bench.h index 77a527f8f..03f56d06b 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -25,5 +25,6 @@ void BMK_setNbThreads(unsigned nbThreads); void BMK_setNotificationLevel(unsigned level); void BMK_setAdditionalParam(int additionalParam); void BMK_setDecodeOnlyMode(unsigned decodeFlag); +void BMK_setLdmFlag(unsigned ldmFlag); #endif /* BENCH_H_121279284357 */ diff --git a/programs/fileio.c b/programs/fileio.c index 1dd8008e8..8d024a9b3 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -213,6 +213,10 @@ void FIO_setOverlapLog(unsigned overlapLog){ DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); g_overlapLog = overlapLog; } +static U32 g_ldmFlag = 0; +void FIO_setLdmFlag(unsigned ldmFlag) { + g_ldmFlag = (ldmFlag>0); +} /*-************************************* @@ -407,6 +411,8 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams->searchLength) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) ); + /* long distance matching */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) ); /* multi-threading */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbThreads, g_nbThreads) ); /* dictionary */ diff --git a/programs/fileio.h b/programs/fileio.h index 9d9167df9..06cf414df 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -56,6 +56,7 @@ void FIO_setMemLimit(unsigned memLimit); void FIO_setNbThreads(unsigned nbThreads); void FIO_setBlockSize(unsigned blockSize); void FIO_setOverlapLog(unsigned overlapLog); +void FIO_setLdmFlag(unsigned ldmFlag); /*-************************************* diff --git a/programs/zstdcli.c b/programs/zstdcli.c index b1268c1f3..cf0710f7c 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -152,6 +152,7 @@ static int usage_advanced(const char* programName) #endif DISPLAY( " -M# : Set a memory usage limit for decompression \n"); DISPLAY( "--list : list information about a zstd compressed file \n"); + DISPLAY( "--long : enable long distance matching\n"); DISPLAY( "-- : All arguments after \"--\" are treated as files \n"); #ifndef ZSTD_NODICT DISPLAY( "\n"); @@ -333,7 +334,8 @@ int main(int argCount, const char* argv[]) ultra=0, lastCommand = 0, nbThreads = 1, - setRealTimePrio = 0; + setRealTimePrio = 0, + ldmFlag = 0; unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ size_t blockSize = 0; zstd_operation_mode operation = zom_compress; @@ -440,6 +442,7 @@ int main(int argCount, const char* argv[]) #ifdef ZSTD_LZ4COMPRESS if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(FIO_lz4Compression); continue; } #endif + if (!strcmp(argument, "--long")) { ldmFlag = 1; continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT @@ -690,6 +693,7 @@ int main(int argCount, const char* argv[]) BMK_setBlockSize(blockSize); BMK_setNbThreads(nbThreads); BMK_setNbSeconds(bench_nbSeconds); + BMK_setLdmFlag(ldmFlag); BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio); #endif (void)bench_nbSeconds; @@ -757,6 +761,7 @@ int main(int argCount, const char* argv[]) #ifndef ZSTD_NOCOMPRESS FIO_setNbThreads(nbThreads); FIO_setBlockSize((U32)blockSize); + FIO_setLdmFlag(ldmFlag); if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog); if ((filenameIdx==1) && outFileName) operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 439ab39d9..ca67483dd 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -440,8 +440,6 @@ static int basicUnitTests(U32 seed, double compressibility) free(staticDCtxBuffer); } - - /* ZSTDMT simple MT compression test */ DISPLAYLEVEL(4, "test%3i : create ZSTDMT CCtx : ", testNb++); { ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2); @@ -1342,6 +1340,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */ dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize)); + if (FUZ_rand(&lseed) & 0xF) { CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) ); } else { @@ -1350,6 +1349,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD !(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/, 0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */ ZSTD_parameters const p = FUZ_makeParams(cPar, fPar); + CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) ); } CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) ); diff --git a/tests/playTests.sh b/tests/playTests.sh index 77853b1a4..8fb2768c2 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -544,6 +544,15 @@ roundTripTest -g516K 19 # btopt fileRoundTripTest -g500K +$ECHO "\n**** zstd long distance matching round-trip tests **** " +roundTripTest -g0 "2 --long" +roundTripTest -g1000K "1 --long" +roundTripTest -g517K "6 --long" +roundTripTest -g516K "16 --long" +roundTripTest -g518K "19 --long" +fileRoundTripTest -g5M "3 --long" + + if [ -n "$hasMT" ] then $ECHO "\n**** zstdmt round-trip tests **** " @@ -551,6 +560,9 @@ then roundTripTest -g8M "3 -T2" roundTripTest -g8000K "2 --threads=2" fileRoundTripTest -g4M "19 -T2 -B1M" + + $ECHO "\n**** zstdmt long distance matching round-trip tests **** " + roundTripTest -g8M "3 --long -T2" else $ECHO "\n**** no multithreading, skipping zstdmt tests **** " fi @@ -639,6 +651,15 @@ roundTripTest -g6000000000 -P99 1 fileRoundTripTest -g4193M -P99 1 +$ECHO "\n**** zstd long, long distance matching round-trip tests **** " +roundTripTest -g0 "2 --long" +roundTripTest -g270000000 "1 --long" +roundTripTest -g140000000 -P60 "5 --long" +roundTripTest -g70000000 -P70 "8 --long" +roundTripTest -g18000001 -P80 "18 --long" +fileRoundTripTest -g4100M -P99 "1 --long" + + if [ -n "$hasMT" ] then $ECHO "\n**** zstdmt long round-trip tests **** " @@ -646,6 +667,7 @@ then roundTripTest -g6000000000 -P99 "1 -T2" roundTripTest -g1500000000 -P97 "1 -T999" fileRoundTripTest -g4195M -P98 " -T0" + roundTripTest -g1500000000 -P97 "1 --long -T999" else $ECHO "\n**** no multithreading, skipping zstdmt tests **** " fi diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 8be6a5910..3baa10ef0 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1380,6 +1380,8 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_minMatch, cParams.searchLength, useOpaqueAPI) ); if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63) ); + /* unconditionally set, to be sync with decoder */ /* mess with frame parameters */ if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_checksumFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) );