1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-05 19:15:58 +03:00

merged strats

This commit is contained in:
Yann Collet
2015-11-05 15:00:24 +01:00
parent 43ae161fe9
commit 5106a76dc9
3 changed files with 274 additions and 123 deletions

View File

@@ -385,7 +385,261 @@ FORCE_INLINE size_t ZSTD_HC_BtFindBestMatch_selectMLS (
} }
size_t ZSTD_HC_compressBlock_btLazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) /* ***********************
* Hash Chain
*************************/
/* Update chains up to ip (excluded) */
static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
{
U32* const hashTable = zc->hashTable;
const U32 hashLog = zc->params.hashLog;
U32* const chainTable = zc->chainTable;
const U32 chainMask = (1 << zc->params.chainLog) - 1;
const BYTE* const base = zc->base;
const U32 target = (U32)(ip - base);
U32 idx = zc->nextToUpdate;
while(idx < target)
{
size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls);
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
hashTable[h] = idx;
idx++;
}
zc->nextToUpdate = target;
return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)];
}
FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
size_t ZSTD_HC_insertAndFindBestMatch (
ZSTD_HC_CCtx* zc, /* Index table will be updated */
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
U32* const chainTable = zc->chainTable;
const U32 chainSize = (1 << zc->params.chainLog);
const U32 chainMask = chainSize-1;
const BYTE* const base = zc->base;
const BYTE* const dictBase = zc->dictBase;
const U32 dictLimit = zc->dictLimit;
const U32 maxDistance = (1 << zc->params.windowLog);
const U32 lowLimit = (zc->lowLimit + maxDistance > (U32)(ip-base)) ? zc->lowLimit : (U32)(ip - base) - (maxDistance - 1);
U32 matchIndex;
const BYTE* match;
int nbAttempts=maxNbAttempts;
size_t ml=0;
/* HC4 match finder */
matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch);
while ((matchIndex>lowLimit) && (nbAttempts))
{
nbAttempts--;
if (matchIndex >= dictLimit)
{
match = base + matchIndex;
if ( (match[ml] == ip[ml])
&& (MEM_read32(match) == MEM_read32(ip)) ) /* ensures minimum match of 4 */
{
const size_t mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
if (mlt > ml)
//if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1))))
{
ml = mlt; *offsetPtr = ip-match;
if (ip+ml >= iLimit) break;
}
}
}
else
{
match = dictBase + matchIndex;
if (MEM_read32(match) == MEM_read32(ip))
{
size_t mlt;
const BYTE* vLimit = ip + (dictLimit - matchIndex);
if (vLimit > iLimit) vLimit = iLimit;
mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
if ((ip+mlt == vLimit) && (vLimit < iLimit))
mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit);
if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; }
}
}
if (base + matchIndex <= ip - chainSize) break;
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
}
return ml;
}
FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
ZSTD_HC_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
switch(matchLengthSearch)
{
default :
case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
}
}
#if 1
FORCE_INLINE
size_t ZSTD_HC_compressBlock_lazy_generic(ZSTD_HC_CCtx* ctx,
void* dst, size_t maxDstSize, const void* src, size_t srcSize,
const U32 searchMethod, const U32 deep) /* 0 : hc; 1 : bt */
{
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
const U32 maxSearches = 1 << ctx->params.searchLog;
const U32 mls = ctx->params.searchLength;
typedef size_t (*searchMax_f)(ZSTD_HC_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
size_t* offsetPtr,
U32 maxNbAttempts, U32 matchLengthSearch);
searchMax_f searchMax = searchMethod ? ZSTD_HC_BtFindBestMatch_selectMLS : ZSTD_HC_insertAndFindBestMatch_selectMLS;
/* init */
ZSTD_resetSeqStore(seqStorePtr);
if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
/* Match Loop */
while (ip <= ilimit)
{
size_t matchLength;
size_t offset=999999;
const BYTE* start;
/* try to find a first match */
if (MEM_read32(ip) == MEM_read32(ip - offset_2))
{
/* repcode : we take it*/
size_t offtmp = offset_2;
size_t litLength = ip - anchor;
matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
offset_2 = offset_1;
offset_1 = offtmp;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength);
ip += matchLength+MINMATCH;
anchor = ip;
continue;
}
offset_2 = offset_1;
matchLength = searchMax(ctx, ip, iend, &offset, maxSearches, mls);
if (!matchLength) { ip++; continue; }
/* let's try to find a better solution */
start = ip;
while (ip<ilimit)
{
ip ++;
if (MEM_read32(ip) == MEM_read32(ip - offset_1))
{
size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
int gain2 = (int)(ml2 * 3);
int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
if (gain2 > gain1)
matchLength = ml2, offset = 0, start = ip;
}
{
size_t offset2=999999;
size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int gain2 = (int)(ml2*(3+deep) - ZSTD_highbit((U32)offset2+1)); /* raw approx */
int gain1 = (int)(matchLength*(3+deep) - ZSTD_highbit((U32)offset+1) + (3+deep));
if (gain2 > gain1)
{
matchLength = ml2, offset = offset2, start = ip;
continue; /* search a better one */
}
}
/* let's find an even better one */
if (deep && (ip<ilimit))
{
ip ++;
if (MEM_read32(ip) == MEM_read32(ip - offset_1))
{
size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
int gain2 = (int)(ml2 * 4);
int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
if (gain2 > gain1)
matchLength = ml2, offset = 0, start = ip;
}
{
size_t offset2=999999;
size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */
int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
if (gain2 > gain1)
{
matchLength = ml2, offset = offset2, start = ip;
continue;
}
}
}
break; /* nothing found : store previous solution */
}
/* store sequence */
{
size_t litLength = start - anchor;
if (offset) offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
ip = start + matchLength;
anchor = ip;
}
}
/* Last Literals */
{
size_t lastLLSize = iend - anchor;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
/* Final compression stage */
return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
seqStorePtr, srcSize);
}
size_t ZSTD_HC_compressBlock_btlazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 1);
}
size_t ZSTD_HC_compressBlock_hclazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1);
}
size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0);
}
#else
size_t ZSTD_HC_compressBlock_btlazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{ {
seqStore_t* seqStorePtr = &(ctx->seqStore); seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
@@ -505,116 +759,7 @@ size_t ZSTD_HC_compressBlock_btLazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDst
} }
size_t ZSTD_HC_compressBlock_hclazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
/* ***********************
* Hash Chain
*************************/
/* Update chains up to ip (excluded) */
static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
{
U32* const hashTable = zc->hashTable;
const U32 hashLog = zc->params.hashLog;
U32* const chainTable = zc->chainTable;
const U32 chainMask = (1 << zc->params.chainLog) - 1;
const BYTE* const base = zc->base;
const U32 target = (U32)(ip - base);
U32 idx = zc->nextToUpdate;
while(idx < target)
{
size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls);
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
hashTable[h] = idx;
idx++;
}
zc->nextToUpdate = target;
return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)];
}
FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
size_t ZSTD_HC_insertAndFindBestMatch (
ZSTD_HC_CCtx* zc, /* Index table will be updated */
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
U32* const chainTable = zc->chainTable;
const U32 chainSize = (1 << zc->params.chainLog);
const U32 chainMask = chainSize-1;
const BYTE* const base = zc->base;
const BYTE* const dictBase = zc->dictBase;
const U32 dictLimit = zc->dictLimit;
const U32 maxDistance = (1 << zc->params.windowLog);
const U32 lowLimit = (zc->lowLimit + maxDistance > (U32)(ip-base)) ? zc->lowLimit : (U32)(ip - base) - (maxDistance - 1);
U32 matchIndex;
const BYTE* match;
int nbAttempts=maxNbAttempts;
size_t ml=0;
/* HC4 match finder */
matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch);
while ((matchIndex>lowLimit) && (nbAttempts))
{
nbAttempts--;
if (matchIndex >= dictLimit)
{
match = base + matchIndex;
if ( (match[ml] == ip[ml])
&& (MEM_read32(match) == MEM_read32(ip)) ) /* ensures minimum match of 4 */
{
const size_t mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
if (mlt > ml)
//if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1))))
{
ml = mlt; *offsetPtr = ip-match;
if (ip+ml >= iLimit) break;
}
}
}
else
{
match = dictBase + matchIndex;
if (MEM_read32(match) == MEM_read32(ip))
{
size_t mlt;
const BYTE* vLimit = ip + (dictLimit - matchIndex);
if (vLimit > iLimit) vLimit = iLimit;
mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
if ((ip+mlt == vLimit) && (vLimit < iLimit))
mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit);
if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; }
}
}
if (base + matchIndex <= ip - chainSize) break;
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
}
return ml;
}
FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
ZSTD_HC_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
switch(matchLengthSearch)
{
default :
case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
}
}
size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{ {
seqStore_t* seqStorePtr = &(ctx->seqStore); seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
@@ -733,7 +878,6 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs
seqStorePtr, srcSize); seqStorePtr, srcSize);
} }
size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{ {
seqStore_t* seqStorePtr = &(ctx->seqStore); seqStore_t* seqStorePtr = &(ctx->seqStore);
@@ -833,6 +977,11 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz
} }
#endif
size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{ {
seqStore_t* seqStorePtr = &(ctx->seqStore); seqStore_t* seqStorePtr = &(ctx->seqStore);
@@ -923,10 +1072,10 @@ static ZSTD_HC_blockCompressor ZSTD_HC_selectBlockCompressor(ZSTD_HC_strategy st
return ZSTD_HC_compressBlock_greedy; return ZSTD_HC_compressBlock_greedy;
case ZSTD_HC_lazy: case ZSTD_HC_lazy:
return ZSTD_HC_compressBlock_lazy; return ZSTD_HC_compressBlock_lazy;
case ZSTD_HC_lazydeep: case ZSTD_HC_hclazy2:
return ZSTD_HC_compressBlock_lazydeep; return ZSTD_HC_compressBlock_hclazy2;
case ZSTD_HC_btlazy2: case ZSTD_HC_btlazy2:
return ZSTD_HC_compressBlock_btLazy2; return ZSTD_HC_compressBlock_btlazy2;
} }
} }

View File

@@ -45,7 +45,9 @@ extern "C" {
/* ************************************* /* *************************************
* Types * Types
***************************************/ ***************************************/
typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_lazydeep, ZSTD_HC_btlazy2 } ZSTD_HC_strategy; /** from faster to stronger */
typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_hclazy2, ZSTD_HC_btlazy2 } ZSTD_HC_strategy;
typedef struct typedef struct
{ {
U32 windowLog; /* largest match distance : impact decompression buffer size */ U32 windowLog; /* largest match distance : impact decompression buffer size */
@@ -53,7 +55,7 @@ typedef struct
U32 hashLog; /* dispatch table : larger == more memory, faster*/ U32 hashLog; /* dispatch table : larger == more memory, faster*/
U32 searchLog; /* nb of searches : larger == more compression, slower*/ U32 searchLog; /* nb of searches : larger == more compression, slower*/
U32 searchLength; /* size of matches : larger == faster decompression */ U32 searchLength; /* size of matches : larger == faster decompression */
ZSTD_HC_strategy strategy; /* faster to stronger : greedy, lazy, lazydeep, btlazy2 */ ZSTD_HC_strategy strategy;
} ZSTD_HC_parameters; } ZSTD_HC_parameters;
/* parameters boundaries */ /* parameters boundaries */
@@ -110,11 +112,11 @@ static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1]
{ 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */ { 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */
{ 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */ { 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */
{ 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */ { 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */
{ 21, 20, 20, 5, 5, ZSTD_HC_lazydeep }, /* level 11 */ { 21, 20, 20, 5, 5, ZSTD_HC_hclazy2 }, /* level 11 */
{ 22, 20, 22, 5, 5, ZSTD_HC_lazydeep }, /* level 12 */ { 22, 20, 22, 5, 5, ZSTD_HC_hclazy2 }, /* level 12 */
{ 22, 20, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 13 */ { 22, 20, 22, 6, 5, ZSTD_HC_hclazy2 }, /* level 13 */
{ 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 14 */ { 22, 21, 22, 6, 5, ZSTD_HC_hclazy2 }, /* level 14 */
{ 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 15 */ { 22, 21, 22, 6, 5, ZSTD_HC_hclazy2 }, /* level 15 */
{ 22, 21, 22, 4, 5, ZSTD_HC_btlazy2 }, /* level 16 */ { 22, 21, 22, 4, 5, ZSTD_HC_btlazy2 }, /* level 16 */
{ 23, 23, 23, 4, 5, ZSTD_HC_btlazy2 }, /* level 17 */ { 23, 23, 23, 4, 5, ZSTD_HC_btlazy2 }, /* level 17 */
{ 23, 23, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 18 */ { 23, 23, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 18 */

View File

@@ -429,7 +429,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
} }
const char* g_stratName[] = { "ZSTD_HC_greedy ", "ZSTD_HC_lazy ", "ZSTD_HC_lazydeep", "ZSTD_HC_btlazy2 " }; const char* g_stratName[] = { "ZSTD_HC_greedy ", "ZSTD_HC_lazy ", "ZSTD_HC_hclazy2", "ZSTD_HC_btlazy2" };
static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize) static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize)
{ {