From 38654988f3d568df7e07700622da2397de314b3d Mon Sep 17 00:00:00 2001 From: inikep Date: Thu, 21 Apr 2016 12:18:47 +0200 Subject: [PATCH] minor speed improvements 2 bench.c: block size has to be bigger than 32 bytes zstdcli.c: support for e.g. -B16k -B16m --- lib/zstd_opt.h | 41 ++++++++++++++++++++--------------------- programs/bench.c | 8 ++++---- programs/zstdcli.c | 10 +++++----- 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h index 122818523..ae09a2693 100644 --- a/lib/zstd_opt.h +++ b/lib/zstd_opt.h @@ -260,7 +260,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, U32 nbCompares, const U32 mls, - U32 extDict, ZSTD_match_t* matches) + U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) { const BYTE* const base = zc->base; const U32 current = (U32)(ip-base); @@ -285,7 +285,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( U32 mnum = 0; const U32 minMatch = (mls == 3) ? 3 : 4; - size_t bestLength = minMatch-1; + size_t bestLength = minMatchLen-1; if (minMatch == 3) { /* HC3 match finder */ U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); @@ -385,26 +385,26 @@ update: static U32 ZSTD_BtGetAllMatches ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) { if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); } static U32 ZSTD_BtGetAllMatches_selectMLS ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) { switch(matchLengthSearch) { - case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches); + case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); default : - case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches); - case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches); - case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches); + case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); } } @@ -412,26 +412,26 @@ static U32 ZSTD_BtGetAllMatches_selectMLS ( static U32 ZSTD_BtGetAllMatches_extDict ( ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches) + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) { if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); } static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches) + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) { switch(matchLengthSearch) { - case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches); + case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); default : - case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches); - case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches); - case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches); + case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); } } @@ -499,7 +499,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } while (mlen >= minMatch); } } - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -604,8 +604,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } while (mlen >= minMatch); } } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches); + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { @@ -799,7 +798,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, } while (mlen >= minMatch); } } } - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches); /* first search (depth 0) */ + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); if (!last_pos && !match_num) { ip++; continue; } @@ -913,7 +912,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, } while (mlen >= minMatch); } } } - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches); + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); if (match_num > 0 && matches[match_num-1].len > sufficient_len) { diff --git a/programs/bench.c b/programs/bench.c index 2892ef742..8470ed324 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -248,7 +248,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const size_t* fileSizes, U32 nbFiles, const void* dictBuffer, size_t dictBufferSize, benchResult_t *result) { - size_t const blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ + size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ @@ -488,9 +488,9 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, dictBuffer, dictBufferSize, &result); if (g_displayLevel == 1) { if (g_additionalParam) - DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam); + DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName, g_additionalParam); else - DISPLAY("%-3i%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName); + DISPLAY("%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", -l, (int)result.cSize, result.ratio, result.cSpeed, result.dSpeed, displayName); total.cSize += result.cSize; total.cSpeed += result.cSpeed; total.dSpeed += result.dSpeed; @@ -501,7 +501,7 @@ static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, total.cSpeed /= 1+cLevelLast-cLevel; total.dSpeed /= 1+cLevelLast-cLevel; total.ratio /= 1+cLevelLast-cLevel; - DISPLAY("avg%11i (%5.3f) %6.1f MB/s %6.1f MB/s %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName); + DISPLAY("avg%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", (int)total.cSize, total.ratio, total.cSpeed, total.dSpeed, displayName); } } diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 3019edcf2..5529cf8e1 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -41,6 +41,7 @@ #include /* fprintf, getchar */ #include /* exit, calloc, free */ #include /* strcmp, strlen */ +#include /* toupper */ #include "fileio.h" #ifndef ZSTD_NOBENCH # include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */ @@ -304,9 +305,9 @@ int main(int argCount, const char** argv) argument++; while ((*argument >='0') && (*argument <='9')) bSize *= 10, bSize += *argument++ - '0'; - if (*argument=='K') bSize<<=10, argument++; /* allows using KB notation */ - if (*argument=='M') bSize<<=20, argument++; - if (*argument=='B') argument++; + if (toupper(*argument)=='K') bSize<<=10, argument++; /* allows using KB notation */ + if (toupper(*argument)=='M') bSize<<=20, argument++; + if (toupper(*argument)=='B') argument++; BMK_setNotificationLevel(displayLevel); BMK_SetBlockSize(bSize); } @@ -368,8 +369,7 @@ int main(int argCount, const char** argv) maxDictSize = 0; while ((*argument>='0') && (*argument<='9')) maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++; - if (*argument=='k' || *argument=='K') - maxDictSize <<= 10; + if (toupper(*argument)=='K') maxDictSize <<= 10; continue; }