From be2010ea1ba97e8a0b1954ac8f6b70feff73fea6 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 31 Oct 2015 12:57:14 +0100 Subject: [PATCH] lazy strategy --- lib/zstd_internal.h | 2 + lib/zstdhc.c | 118 +++++++++++++++++++++++++++++++++++++++--- lib/zstdhc_static.h | 56 ++++++++++---------- programs/bench.c | 2 +- programs/paramgrill.c | 27 +++++++--- programs/zstdcli.c | 2 +- 6 files changed, 165 insertions(+), 42 deletions(-) diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 09fa1923e..807725923 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -49,6 +49,8 @@ extern "C" { ****************************************/ static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } +#define MIN(a,b) ((a)<(b) ? (a) : (b)) + static unsigned ZSTD_highbit(U32 val) { # if defined(_MSC_VER) /* Visual */ diff --git a/lib/zstdhc.c b/lib/zstdhc.c index 5c67fc3d7..008820766 100644 --- a/lib/zstdhc.c +++ b/lib/zstdhc.c @@ -191,7 +191,7 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 h, U32 mls) * HC Compression ***************************************/ /* Update chains up to ip (excluded) */ -static void ZSTD_HC_insert (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) +static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) { U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; @@ -210,6 +210,7 @@ static void ZSTD_HC_insert (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) } zc->nextToUpdate = target; + return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)]; } @@ -220,8 +221,6 @@ size_t ZSTD_HC_insertAndFindBestMatch ( const BYTE** matchpos, const U32 maxNbAttempts, const U32 matchLengthSearch) { - U32* const hashTable = zc->hashTable; - const U32 hashLog = zc->params.hashLog; U32* const chainTable = zc->chainTable; const U32 chainSize = (1 << zc->params.chainLog); const U32 chainMask = chainSize-1; @@ -236,8 +235,7 @@ size_t ZSTD_HC_insertAndFindBestMatch ( size_t ml=0; /* HC4 match finder */ - ZSTD_HC_insert(zc, ip, matchLengthSearch); - matchIndex = hashTable[ZSTD_HC_hashPtr(ip, hashLog, matchLengthSearch)]; + matchIndex = ZSTD_HC_insertAndFindFirstIndex(zc, ip, matchLengthSearch); while ((matchIndex>=lowLimit) && (nbAttempts)) { @@ -291,7 +289,7 @@ static size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( } -static size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -367,6 +365,114 @@ static size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS seqStorePtr, srcSize); } +size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* match = istart; + + size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + const U32 maxSearches = 1 << ctx->params.searchLog; + const U32 mls = ctx->params.searchLength; + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + + /* Match Loop */ + while (ip <= ilimit) + { + size_t matchLength; + size_t offset; + const BYTE* start; + + /* try to find a first match */ + if (MEM_read32(ip) == MEM_read32(ip - offset_2)) + { + /* repcode : we take it*/ + size_t offtmp = offset_2; + size_t litLength = ip - anchor; + matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); + offset_2 = offset_1; + offset_1 = offtmp; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); + ip += matchLength+MINMATCH; + anchor = ip; + continue; + } + + offset_2 = offset_1; + matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + if (!matchLength) { ip++; continue; } + + /* let's try to find a better solution */ + offset = ip - match; + start = ip; + + while (ip gain1) + { + matchLength = ml2, offset = 0, start = ip; + break; + } + } + { + size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls); + size_t offset2 = ip - match; + int gain2 = ml2 - (ZSTD_highbit((U32)offset2) / 4); /* raw approx */ + int gain1 = matchLength - (ZSTD_highbit((U32)offset) / 4); + if (gain2 > gain1) + { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } + } + + break; /* nothing found : store previous one */ + } + + /* store sequence */ + { + size_t litLength = start - anchor; + if (offset) offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + ip = start + matchLength; + anchor = ip; + } + + } + + /* Last Literals */ + { + size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } + + /* Final compression stage */ + return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + seqStorePtr, srcSize); +} + + +size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + if (ctx->params.strategy==ZSTD_HC_greedy) + return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize); + return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize); +} + + static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr, void* dst, size_t maxDstSize, const void* src, size_t srcSize) diff --git a/lib/zstdhc_static.h b/lib/zstdhc_static.h index 96f0d7cde..c67894276 100644 --- a/lib/zstdhc_static.h +++ b/lib/zstdhc_static.h @@ -45,6 +45,7 @@ extern "C" { /* ************************************* * Types ***************************************/ +typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy } ZSTD_HC_strategy; typedef struct { U32 windowLog; /* largest match distance : impact decompression buffer size */ @@ -52,6 +53,7 @@ typedef struct U32 hashLog; /* dispatch table : larger == more memory, faster*/ U32 searchLog; /* nb of searches : larger == more compression, slower*/ U32 searchLength; /* size of matches : larger == faster decompression */ + ZSTD_HC_strategy strategy; /* greedy, lazy (stronger, slower) */ } ZSTD_HC_parameters; /* parameters boundaries */ @@ -92,33 +94,33 @@ size_t ZSTD_HC_compressEnd(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize); #define ZSTD_HC_MAX_CLEVEL 26 static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] = { /* W, C, H, S */ - { 18, 12, 14, 1, 4 }, /* level 0 - never used */ - { 18, 12, 14, 1, 4 }, /* level 1 - in fact redirected towards zstd fast */ - { 18, 12, 15, 2, 4 }, /* level 2 */ - { 19, 14, 16, 3, 4 }, /* level 3 */ - { 20, 15, 17, 4, 5 }, /* level 4 */ - { 20, 17, 19, 4, 5 }, /* level 5 */ - { 20, 19, 19, 4, 5 }, /* level 6 */ - { 20, 19, 19, 5, 5 }, /* level 7 */ - { 20, 20, 20, 5, 5 }, /* level 8 */ - { 20, 20, 20, 6, 5 }, /* level 9 */ - { 21, 21, 20, 5, 5 }, /* level 10 */ - { 22, 21, 22, 6, 5 }, /* level 11 */ - { 23, 21, 22, 6, 5 }, /* level 12 */ - { 23, 21, 22, 7, 5 }, /* level 13 */ - { 22, 22, 23, 7, 5 }, /* level 14 */ - { 22, 22, 23, 7, 5 }, /* level 15 */ - { 22, 22, 23, 8, 5 }, /* level 16 */ - { 22, 22, 23, 8, 5 }, /* level 17 */ - { 22, 22, 23, 9, 5 }, /* level 18 */ - { 22, 22, 23, 9, 5 }, /* level 19 */ - { 23, 23, 23, 9, 5 }, /* level 20 */ - { 23, 23, 23, 9, 5 }, /* level 21 */ - { 23, 23, 23, 10, 5 }, /* level 22 */ - { 23, 23, 23, 10, 5 }, /* level 23 */ - { 23, 23, 23, 11, 5 }, /* level 24 */ - { 23, 23, 23, 12, 5 }, /* level 25 */ - { 23, 23, 23, 13, 5 }, /* level 26 */ /* ZSTD_HC_MAX_CLEVEL */ + { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 0 - never used */ + { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 1 - in fact redirected towards zstd fast */ + { 18, 12, 15, 2, 4, ZSTD_HC_greedy }, /* level 2 */ + { 19, 13, 17, 3, 5, ZSTD_HC_greedy }, /* level 3 */ + { 20, 18, 19, 2, 5, ZSTD_HC_greedy }, /* level 4 */ + { 20, 19, 19, 3, 5, ZSTD_HC_greedy }, /* level 5 */ + { 20, 19, 19, 4, 5, ZSTD_HC_greedy }, /* level 6 */ + { 20, 19, 19, 5, 5, ZSTD_HC_greedy }, /* level 7 */ + { 20, 20, 20, 5, 5, ZSTD_HC_greedy }, /* level 8 */ + { 20, 20, 20, 6, 5, ZSTD_HC_greedy }, /* level 9 */ + { 21, 20, 21, 6, 5, ZSTD_HC_greedy }, /* level 10 */ + { 22, 21, 22, 6, 5, ZSTD_HC_greedy }, /* level 11 */ + { 23, 21, 22, 6, 5, ZSTD_HC_greedy }, /* level 12 */ + { 22, 22, 22, 6, 5, ZSTD_HC_greedy }, /* level 13 */ + { 22, 22, 23, 7, 5, ZSTD_HC_greedy }, /* level 14 */ + { 22, 22, 23, 7, 5, ZSTD_HC_greedy }, /* level 15 */ + { 22, 22, 23, 8, 5, ZSTD_HC_greedy }, /* level 16 */ + { 22, 22, 23, 8, 5, ZSTD_HC_greedy }, /* level 17 */ + { 22, 22, 23, 9, 5, ZSTD_HC_greedy }, /* level 18 */ + { 22, 22, 23, 10, 5, ZSTD_HC_greedy }, /* level 19 */ + { 23, 23, 23, 9, 5, ZSTD_HC_greedy }, /* level 20 */ + { 23, 23, 23, 9, 5, ZSTD_HC_greedy }, /* level 21 */ + { 23, 23, 23, 10, 5, ZSTD_HC_greedy }, /* level 22 */ + { 23, 23, 23, 11, 5, ZSTD_HC_greedy }, /* level 23 */ + { 23, 23, 23, 11, 5, ZSTD_HC_greedy }, /* level 24 */ + { 24, 24, 23, 11, 5, ZSTD_HC_greedy }, /* level 25 */ + { 24, 24, 24, 12, 5, ZSTD_HC_greedy }, /* level 26 */ /* ZSTD_HC_MAX_CLEVEL */ }; #if defined (__cplusplus) diff --git a/programs/bench.c b/programs/bench.c index 5c410f39a..85c3700b4 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -237,7 +237,7 @@ static size_t local_compress_fast (void* dst, size_t maxDstSize, const void* src return ZSTD_compress(dst, maxDstSize, src, srcSize); } -#define MIN(a,b) (a g_maxParamTime) break; /* Compression */ - DISPLAY("%1u-W%02uC%02uH%02uS%02uL%02u : %9u ->\r", loopNb, Wlog, Clog, Hlog, Slog, Slength, (U32)srcSize); + DISPLAY("%1u-%s : %9u ->\r", loopNb, name, (U32)srcSize); memset(compressedBuffer, 0xE5, maxCompressedSize); nbLoops = 0; @@ -367,7 +370,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, cSize += blockTable[blockNb].cSize; if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops; ratio = (double)srcSize / (double)cSize; - DISPLAY("%1u-W%02uC%02uH%02uS%02uL%02u : %9u ->", loopNb, Wlog, Clog, Hlog, Slog, Slength, (U32)srcSize); + DISPLAY("%1u-%s : %9u ->", loopNb, name, (U32)srcSize); DISPLAY(" %9u (%4.3f),%7.1f MB/s\r", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.); resultPtr->cSize = cSize; resultPtr->cSpeed = (U32)((double)srcSize / fastestC); @@ -389,7 +392,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, milliTime = BMK_GetMilliSpan(milliTime); if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops; - DISPLAY("%1u-W%02uC%02uH%02uS%02uL%02u : %9u -> ", loopNb, Wlog, Clog, Hlog, Slog, Slength, (U32)srcSize); + DISPLAY("%1u-%s : %9u -> ", loopNb, name, (U32)srcSize); DISPLAY("%9u (%4.3f),%7.1f MB/s, ", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.); DISPLAY("%7.1f MB/s\r", (double)srcSize / fastestD / 1000.); resultPtr->dSpeed = (U32)((double)srcSize / fastestD); @@ -421,11 +424,14 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, return 0; } +const char* g_stratName[2] = { "ZSTD_HC_greedy", "ZSTD_HC_lazy " }; static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize) { DISPLAY("\r%79s\r", ""); - fprintf(f," {%3u,%3u,%3u,%3u,%3u }, ", params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength); + fprintf(f," {%3u,%3u,%3u,%3u,%3u, %s }, ", + params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, + g_stratName[params.strategy]); fprintf(f, "/* level %2u */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */ \n", cLevel, (double)srcSize / result.cSize, (double)result.cSpeed / 1000., (double)result.dSpeed / 1000.); @@ -446,7 +452,7 @@ static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSi fprintf(f, "\n /* Selected configurations : */ \n"); fprintf(f, "#define ZSTD_HC_MAX_CLEVEL %2u \n", ZSTD_HC_MAX_CLEVEL); fprintf(f, "static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] = {\n"); - fprintf(f, " /* W, C, H, S, L */ \n"); + fprintf(f, " /* W, C, H, S, L, strat */ \n"); for (cLevel=0; cLevel <= ZSTD_HC_MAX_CLEVEL; cLevel++) BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize); @@ -593,7 +599,7 @@ static void playAround(FILE* f, winnerInfo_t* winners, for (; nbChanges; nbChanges--) { - const U32 changeID = FUZ_rand(&g_rand) % 9; + const U32 changeID = FUZ_rand(&g_rand) % 12; switch(changeID) { case 0: @@ -616,6 +622,10 @@ static void playAround(FILE* f, winnerInfo_t* winners, p.searchLength++; break; case 9: p.searchLength--; break; + case 10: + p.strategy++; break; + case 11: + p.strategy--; break; } } @@ -631,6 +641,8 @@ static void playAround(FILE* f, winnerInfo_t* winners, if (p.searchLog < ZSTD_HC_SEARCHLOG_MIN) continue; if (p.searchLength > ZSTD_HC_SEARCHLENGTH_MAX) continue; if (p.searchLength < ZSTD_HC_SEARCHLENGTH_MIN) continue; + if (p.strategy < ZSTD_HC_greedy) continue; + if (p.strategy > ZSTD_HC_lazy) continue; /* exclude faster if already played params */ if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(p))-1)) continue; @@ -662,6 +674,7 @@ static void BMK_selectRandomStart( p.searchLog = FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLOG_MAX+1 - ZSTD_HC_SEARCHLOG_MIN) + ZSTD_HC_SEARCHLOG_MIN; p.windowLog = FUZ_rand(&g_rand) % (ZSTD_HC_WINDOWLOG_MAX+1 - ZSTD_HC_WINDOWLOG_MIN) + ZSTD_HC_WINDOWLOG_MIN; p.searchLength=FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLENGTH_MAX+1 - ZSTD_HC_SEARCHLENGTH_MIN) + ZSTD_HC_SEARCHLENGTH_MIN; + p.strategy = FUZ_rand(&g_rand) & 1; playAround(f, winners, p, srcBuffer, srcSize, ctx); } else diff --git a/programs/zstdcli.c b/programs/zstdcli.c index c6f7b5530..5bfe25b62 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -70,7 +70,7 @@ **************************************/ #define COMPRESSOR_NAME "zstd command line interface" #ifndef ZSTD_VERSION -# define ZSTD_VERSION "v0.0.1" +# define ZSTD_VERSION "v0.3.0" #endif #define AUTHOR "Yann Collet" #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__