diff --git a/lib/zstdhc.c b/lib/zstdhc.c index fb10b4b10..efe0c2bf3 100644 --- a/lib/zstdhc.c +++ b/lib/zstdhc.c @@ -385,7 +385,261 @@ FORCE_INLINE size_t ZSTD_HC_BtFindBestMatch_selectMLS ( } -size_t ZSTD_HC_compressBlock_btLazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +/* *********************** +* Hash Chain +*************************/ + +/* Update chains up to ip (excluded) */ +static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.hashLog; + U32* const chainTable = zc->chainTable; + const U32 chainMask = (1 << zc->params.chainLog) - 1; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) + { + size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + zc->nextToUpdate = target; + return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)]; +} + + +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_HC_insertAndFindBestMatch ( + ZSTD_HC_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + U32* const chainTable = zc->chainTable; + const U32 chainSize = (1 << zc->params.chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const U32 maxDistance = (1 << zc->params.windowLog); + const U32 lowLimit = (zc->lowLimit + maxDistance > (U32)(ip-base)) ? zc->lowLimit : (U32)(ip - base) - (maxDistance - 1); + U32 matchIndex; + const BYTE* match; + int nbAttempts=maxNbAttempts; + size_t ml=0; + + /* HC4 match finder */ + matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch); + + while ((matchIndex>lowLimit) && (nbAttempts)) + { + nbAttempts--; + if (matchIndex >= dictLimit) + { + match = base + matchIndex; + if ( (match[ml] == ip[ml]) + && (MEM_read32(match) == MEM_read32(ip)) ) /* ensures minimum match of 4 */ + { + const size_t mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (mlt > ml) + //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1)))) + { + ml = mlt; *offsetPtr = ip-match; + if (ip+ml >= iLimit) break; + } + } + } + else + { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) + { + size_t mlt; + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iLimit) vLimit = iLimit; + mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iLimit)) + mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit); + if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; } + } + } + + if (base + matchIndex <= ip - chainSize) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + return ml; +} + + +FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( + ZSTD_HC_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + +#if 1 + +FORCE_INLINE +size_t ZSTD_HC_compressBlock_lazy_generic(ZSTD_HC_CCtx* ctx, + void* dst, size_t maxDstSize, const void* src, size_t srcSize, + const U32 searchMethod, const U32 deep) /* 0 : hc; 1 : bt */ +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + + size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + const U32 maxSearches = 1 << ctx->params.searchLog; + const U32 mls = ctx->params.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_HC_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_HC_BtFindBestMatch_selectMLS : ZSTD_HC_insertAndFindBestMatch_selectMLS; + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + + /* Match Loop */ + while (ip <= ilimit) + { + size_t matchLength; + size_t offset=999999; + const BYTE* start; + + /* try to find a first match */ + if (MEM_read32(ip) == MEM_read32(ip - offset_2)) + { + /* repcode : we take it*/ + size_t offtmp = offset_2; + size_t litLength = ip - anchor; + matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + offset_2 = offset_1; + offset_1 = offtmp; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); + ip += matchLength+MINMATCH; + anchor = ip; + continue; + } + + offset_2 = offset_1; + matchLength = searchMax(ctx, ip, iend, &offset, maxSearches, mls); + if (!matchLength) { ip++; continue; } + + /* let's try to find a better solution */ + start = ip; + + while (ip gain1) + matchLength = ml2, offset = 0, start = ip; + } + { + size_t offset2=999999; + size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int gain2 = (int)(ml2*(3+deep) - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*(3+deep) - ZSTD_highbit((U32)offset+1) + (3+deep)); + if (gain2 > gain1) + { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } + } + + /* let's find an even better one */ + if (deep && (ip gain1) + matchLength = ml2, offset = 0, start = ip; + } + { + size_t offset2=999999; + size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + if (gain2 > gain1) + { + matchLength = ml2, offset = offset2, start = ip; + continue; + } + } + } + break; /* nothing found : store previous solution */ + } + + /* store sequence */ + { + size_t litLength = start - anchor; + if (offset) offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + ip = start + matchLength; + anchor = ip; + } + + } + + /* Last Literals */ + { + size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } + + /* Final compression stage */ + return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + seqStorePtr, srcSize); +} + +size_t ZSTD_HC_compressBlock_btlazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 1); +} + +size_t ZSTD_HC_compressBlock_hclazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1); +} + +size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); +} + +#else + +size_t ZSTD_HC_compressBlock_btlazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -505,116 +759,7 @@ size_t ZSTD_HC_compressBlock_btLazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDst } - -/* *********************** -* Hash Chain -*************************/ - -/* Update chains up to ip (excluded) */ -static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) -{ - U32* const hashTable = zc->hashTable; - const U32 hashLog = zc->params.hashLog; - U32* const chainTable = zc->chainTable; - const U32 chainMask = (1 << zc->params.chainLog) - 1; - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) - { - size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - zc->nextToUpdate = target; - return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)]; -} - - -FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HC_insertAndFindBestMatch ( - ZSTD_HC_CCtx* zc, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - U32* const chainTable = zc->chainTable; - const U32 chainSize = (1 << zc->params.chainLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const U32 maxDistance = (1 << zc->params.windowLog); - const U32 lowLimit = (zc->lowLimit + maxDistance > (U32)(ip-base)) ? zc->lowLimit : (U32)(ip - base) - (maxDistance - 1); - U32 matchIndex; - const BYTE* match; - int nbAttempts=maxNbAttempts; - size_t ml=0; - - /* HC4 match finder */ - matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch); - - while ((matchIndex>lowLimit) && (nbAttempts)) - { - nbAttempts--; - if (matchIndex >= dictLimit) - { - match = base + matchIndex; - if ( (match[ml] == ip[ml]) - && (MEM_read32(match) == MEM_read32(ip)) ) /* ensures minimum match of 4 */ - { - const size_t mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - if (mlt > ml) - //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1)))) - { - ml = mlt; *offsetPtr = ip-match; - if (ip+ml >= iLimit) break; - } - } - } - else - { - match = dictBase + matchIndex; - if (MEM_read32(match) == MEM_read32(ip)) - { - size_t mlt; - const BYTE* vLimit = ip + (dictLimit - matchIndex); - if (vLimit > iLimit) vLimit = iLimit; - mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; - if ((ip+mlt == vLimit) && (vLimit < iLimit)) - mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit); - if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; } - } - } - - if (base + matchIndex <= ip - chainSize) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - return ml; -} - - -FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( - ZSTD_HC_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : - case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - - -size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_HC_compressBlock_hclazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -733,7 +878,6 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs seqStorePtr, srcSize); } - size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); @@ -833,6 +977,11 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz } +#endif + + + + size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); @@ -923,10 +1072,10 @@ static ZSTD_HC_blockCompressor ZSTD_HC_selectBlockCompressor(ZSTD_HC_strategy st return ZSTD_HC_compressBlock_greedy; case ZSTD_HC_lazy: return ZSTD_HC_compressBlock_lazy; - case ZSTD_HC_lazydeep: - return ZSTD_HC_compressBlock_lazydeep; + case ZSTD_HC_hclazy2: + return ZSTD_HC_compressBlock_hclazy2; case ZSTD_HC_btlazy2: - return ZSTD_HC_compressBlock_btLazy2; + return ZSTD_HC_compressBlock_btlazy2; } } diff --git a/lib/zstdhc_static.h b/lib/zstdhc_static.h index 1525b35f8..b49aef208 100644 --- a/lib/zstdhc_static.h +++ b/lib/zstdhc_static.h @@ -45,7 +45,9 @@ extern "C" { /* ************************************* * Types ***************************************/ -typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_lazydeep, ZSTD_HC_btlazy2 } ZSTD_HC_strategy; +/** from faster to stronger */ +typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_hclazy2, ZSTD_HC_btlazy2 } ZSTD_HC_strategy; + typedef struct { U32 windowLog; /* largest match distance : impact decompression buffer size */ @@ -53,7 +55,7 @@ typedef struct U32 hashLog; /* dispatch table : larger == more memory, faster*/ U32 searchLog; /* nb of searches : larger == more compression, slower*/ U32 searchLength; /* size of matches : larger == faster decompression */ - ZSTD_HC_strategy strategy; /* faster to stronger : greedy, lazy, lazydeep, btlazy2 */ + ZSTD_HC_strategy strategy; } ZSTD_HC_parameters; /* parameters boundaries */ @@ -110,11 +112,11 @@ static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] { 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */ { 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */ { 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */ - { 21, 20, 20, 5, 5, ZSTD_HC_lazydeep }, /* level 11 */ - { 22, 20, 22, 5, 5, ZSTD_HC_lazydeep }, /* level 12 */ - { 22, 20, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 13 */ - { 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 14 */ - { 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 15 */ + { 21, 20, 20, 5, 5, ZSTD_HC_hclazy2 }, /* level 11 */ + { 22, 20, 22, 5, 5, ZSTD_HC_hclazy2 }, /* level 12 */ + { 22, 20, 22, 6, 5, ZSTD_HC_hclazy2 }, /* level 13 */ + { 22, 21, 22, 6, 5, ZSTD_HC_hclazy2 }, /* level 14 */ + { 22, 21, 22, 6, 5, ZSTD_HC_hclazy2 }, /* level 15 */ { 22, 21, 22, 4, 5, ZSTD_HC_btlazy2 }, /* level 16 */ { 23, 23, 23, 4, 5, ZSTD_HC_btlazy2 }, /* level 17 */ { 23, 23, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 18 */ diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 40033a6a3..b133a9497 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -429,7 +429,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, } -const char* g_stratName[] = { "ZSTD_HC_greedy ", "ZSTD_HC_lazy ", "ZSTD_HC_lazydeep", "ZSTD_HC_btlazy2 " }; +const char* g_stratName[] = { "ZSTD_HC_greedy ", "ZSTD_HC_lazy ", "ZSTD_HC_hclazy2", "ZSTD_HC_btlazy2" }; static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize) {