From e148db366ea424bbc2dc7269c566adb97cc11ec6 Mon Sep 17 00:00:00 2001 From: George Lu Date: Fri, 20 Jul 2018 14:35:09 -0700 Subject: [PATCH] Separate capacity vs size Also: Make suggested fixes -varInds_t -reorder some arguments -remove code duplication -update README / -h -Fix memory leaks --- programs/bench.c | 63 +- programs/bench.h | 8 +- tests/README.md | 32 + tests/fullbench.c | 2 +- tests/paramgrill.c | 1965 ++++++++++++++++---------------------------- 5 files changed, 780 insertions(+), 1290 deletions(-) mode change 100755 => 100644 tests/paramgrill.c diff --git a/programs/bench.c b/programs/bench.c index a34b74cf1..177dbe0e3 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -171,6 +171,8 @@ struct BMK_timeState_t{ static void BMK_initCCtx(ZSTD_CCtx* ctx, const void* dictBuffer, size_t dictBufferSize, int cLevel, const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) { + ZSTD_CCtx_reset(ctx); + ZSTD_CCtx_resetParameters(ctx); if (adv->nbWorkers==1) { ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, 0); } else { @@ -195,6 +197,7 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx, static void BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize) { + ZSTD_DCtx_reset(dctx); ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize); } @@ -291,9 +294,9 @@ BMK_customReturn_t BMK_benchFunction( BMK_initFn_t initFn, void* initPayload, size_t blockCount, const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, - void* const * const dstBlockBuffers, size_t* dstBlockCapacitiesToSizes, + void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, size_t* cSizes, unsigned nbLoops) { - size_t srcSize = 0, dstSize = 0, ind = 0; + size_t dstSize = 0; U64 totalTime; BMK_customReturn_t retval; @@ -303,36 +306,37 @@ BMK_customReturn_t BMK_benchFunction( EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); } - for(ind = 0; ind < blockCount; ind++) { - srcSize += srcBlockSizes[ind]; - } - { size_t i; for(i = 0; i < blockCount; i++) { - memset(dstBlockBuffers[i], 0xE5, dstBlockCapacitiesToSizes[i]); /* warm up and erase result buffer */ + memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ } - - //UTIL_sleepMilli(5); /* give processor time to other processes */ - //UTIL_waitForNextTick(); +#if 0 + /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops + * (Makes former slower) + */ + UTIL_sleepMilli(5); /* give processor time to other processes */ + UTIL_waitForNextTick(); +#endif } { - unsigned i, j, firstIter = 1; + unsigned i, j; clockStart = UTIL_getTime(); if(initFn != NULL) { initFn(initPayload); } for(i = 0; i < nbLoops; i++) { for(j = 0; j < blockCount; j++) { - size_t res = benchFn(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacitiesToSizes[j], benchPayload); + size_t res = benchFn(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); if(ZSTD_isError(res)) { EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", - j, (U32)dstBlockCapacitiesToSizes[j], ZSTD_getErrorName(res)); - } else if(firstIter) { + j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); + } else if(i == nbLoops - 1) { dstSize += res; - dstBlockCapacitiesToSizes[j] = res; + if(cSizes != NULL) { + cSizes[j] = res; + } } } - firstIter = 0; } totalTime = UTIL_clockSpanNano(clockStart); } @@ -369,7 +373,7 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed( BMK_initFn_t initFn, void* initPayload, size_t blockCount, const void* const* const srcBlockBuffers, const size_t* srcBlockSizes, - void * const * const dstBlockBuffers, size_t * dstBlockCapacitiesToSizes) + void * const * const dstBlockBuffers, const size_t * dstBlockCapacities, size_t* dstSizes) { U64 fastest = cont->fastestTime; int completed = 0; @@ -384,9 +388,9 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed( UTIL_sleep(COOLPERIOD_SEC); cont->coolTime = UTIL_getTime(); } - + /* reinitialize capacity */ r.result = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload, - blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacitiesToSizes, cont->nbLoops); + blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, dstSizes, cont->nbLoops); if(r.result.error) { /* completed w/ error */ r.completed = 1; return r; @@ -420,7 +424,7 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed( /* benchMem with no allocation */ static BMK_return_t BMK_benchMemAdvancedNoAlloc( const void ** const srcPtrs, size_t* const srcSizes, - void** const cPtrs, size_t* const cSizes, + void** const cPtrs, size_t* const cCapacities, size_t* const cSizes, void** const resPtrs, size_t* const resSizes, void** resultBufferPtr, void* compressedBuffer, const size_t maxCompressedSize, @@ -485,11 +489,11 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( srcPtrs[nbBlocks] = (const void*)srcPtr; srcSizes[nbBlocks] = thisBlockSize; cPtrs[nbBlocks] = (void*)cPtr; - cSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); + cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); resPtrs[nbBlocks] = (void*)resPtr; resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; srcPtr += thisBlockSize; - cPtr += cSizes[nbBlocks]; + cPtr += cCapacities[nbBlocks]; resPtr += thisBlockSize; remaining -= thisBlockSize; } @@ -540,7 +544,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( while(!(intermediateResultCompress.completed && intermediateResultDecompress.completed)) { if(!intermediateResultCompress.completed) { intermediateResultCompress = BMK_benchFunctionTimed(timeStateCompress, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes); + nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, cSizes); if(intermediateResultCompress.result.error) { results.error = intermediateResultCompress.result.error; return results; @@ -564,7 +568,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( if(!intermediateResultDecompress.completed) { intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, - nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes); + nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes, NULL); if(intermediateResultDecompress.result.error) { results.error = intermediateResultDecompress.result.error; return results; @@ -590,7 +594,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( if(adv->mode != BMK_decodeOnly) { BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); + nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, cSizes, adv->nbSeconds); if(compressionResults.error) { results.error = compressionResults.error; return results; @@ -617,7 +621,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( BMK_customReturn_t decompressionResults = BMK_benchFunction( &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, nbBlocks, - (const void* const*)cPtrs, cSizes, resPtrs, resSizes, + (const void* const*)cPtrs, cSizes, resPtrs, resSizes, NULL, adv->nbSeconds); if(decompressionResults.error) { results.error = decompressionResults.error; @@ -717,8 +721,10 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, const void ** const srcPtrs = (const void** const)malloc(maxNbBlocks * sizeof(void*)); size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); + void ** const cPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); size_t* const cSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); + size_t* const cCapacities = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); void ** const resPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); size_t* const resSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); @@ -744,13 +750,11 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes; if (!allocationincomplete) { - results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cSizes, + results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cCapacities, cSizes, resPtrs, resSizes, &resultBuffer, compressedBuffer, maxCompressedSize, timeStateCompress, timeStateDecompress, srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, dictBuffer, dictBufferSize, ctx, dctx, displayLevel, displayName, adv); } - - /* clean up */ BMK_freeTimeState(timeStateCompress); @@ -764,6 +768,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, free(srcSizes); free(cPtrs); free(cSizes); + free(cCapacities); free(resPtrs); free(resSizes); diff --git a/programs/bench.h b/programs/bench.h index 25d9f24a7..2a9945ac8 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -177,6 +177,7 @@ typedef size_t (*BMK_initFn_t)(void*); * dstBuffers - an array of buffers to be written into by benchFn * dstCapacitiesToSizes - an array of the capacities of above buffers. Output modified to compressed sizes of those blocks. * nbLoops - defines number of times benchFn is run. + * assumed array of size blockCount, will have compressed size of each block written to it. * return * .error will give a nonzero value if ZSTD_isError() is nonzero for any of the return * of the calls to initFn and benchFn, or if benchFunction errors internally @@ -187,12 +188,11 @@ typedef size_t (*BMK_initFn_t)(void*); * into dstBuffer, hence this value will be the total amount of bytes written to * dstBuffer. */ -BMK_customReturn_t BMK_benchFunction( - BMK_benchFn_t benchFn, void* benchPayload, +BMK_customReturn_t BMK_benchFunction(BMK_benchFn_t benchFn, void* benchPayload, BMK_initFn_t initFn, void* initPayload, size_t blockCount, const void* const * const srcBuffers, const size_t* srcSizes, - void * const * const dstBuffers, size_t* dstCapacitiesToSizes, + void * const * const dstBuffers, const size_t* dstCapacities, size_t* cSizes, unsigned nbLoops); @@ -221,7 +221,7 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed(BMK_timedFnState_t* cont, BMK_initFn_t initFn, void* initPayload, size_t blockCount, const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, - void* const * const dstBlockBuffers, size_t* dstBlockCapacitiesToSizes); + void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, size_t* cSizes); #endif /* BENCH_H_121279284357 */ diff --git a/tests/README.md b/tests/README.md index 24a28ab7b..8bedd0a3c 100644 --- a/tests/README.md +++ b/tests/README.md @@ -88,3 +88,35 @@ as well as the 10,000 original files for more detailed comparison of decompressi will choose a random seed, and for 1 minute, generate random test frames and ensure that the zstd library correctly decompresses them in both simple and streaming modes. + +#### `paramgrill` - tool for generating compression table parameters and optimizing parameters on file given constraints + +Full list of arguments +``` + -T# : set level 1 speed objective + -B# : cut input into blocks of size # (default : single block) + -i# : iteration loops + -S : benchmarks a single run (example command: -Sl3w10h12) + w# - windowLog + h# - hashLog + c# - chainLog + s# - searchLog + l# - searchLength + t# - targetLength + S# - strategy + L# - level + --zstd= : Single run, parameter selection syntax same as zstdcli + --optimize= : find parameters to maximize compression ratio given parameters + Can use all --zstd= commands to constrain the type of solution found in addition to the following constraints + cSpeed= - Minimum compression speed + dSpeed= - Minimum decompression speed + cMem= - compression memory + lvl= - Automatically sets compression speed constraint to the speed of that level + --optimize= : same as -O with more verbose syntax + -P# : generated sample compressibility + -t# : Caps runtime of operation in seconds (default : 99999 seconds (about 27 hours )) + -v : Prints Benchmarking output + -D : Next argument dictionary file + +``` + Any inputs afterwards are treated as files to benchmark. diff --git a/tests/fullbench.c b/tests/fullbench.c index 9e7639f92..270cac86a 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -516,7 +516,7 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb, int cLevel, { r = BMK_benchFunction(benchFunction, buff2, NULL, NULL, 1, &src, &srcSize, - (void **)&dstBuff, &dstBuffSize, g_nbIterations); + (void **)&dstBuff, &dstBuffSize, NULL, g_nbIterations); if(r.error) { DISPLAY("ERROR %d ! ! \n", r.error); errorcode = r.error; diff --git a/tests/paramgrill.c b/tests/paramgrill.c old mode 100755 new mode 100644 index 02811b9c1..099be3688 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -56,7 +56,7 @@ static const int g_maxNbVariations = 64; #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define TIMED 0 #ifndef DEBUG -# define DEBUG 1 +# define DEBUG 0 #endif #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } @@ -67,14 +67,15 @@ static const int g_maxNbVariations = 64; #define CUSTOM_LEVEL 99 /* indices for each of the variables */ -#define WLOG_IND 0 -#define CLOG_IND 1 -#define HLOG_IND 2 -#define SLOG_IND 3 -#define SLEN_IND 4 -#define TLEN_IND 5 -//#define STRT_IND 6 -//#define NUM_PARAMS 7 +typedef enum { + wlog_ind = 0, + clog_ind = 1, + hlog_ind = 2, + slog_ind = 3, + slen_ind = 4, + tlen_ind = 5 +} varInds_t; + #define NUM_PARAMS 6 //just don't use strategy as a param. @@ -85,20 +86,16 @@ static const int g_maxNbVariations = 64; #define ZSTD_TARGETLENGTH_MIN 0 #define ZSTD_TARGETLENGTH_MAX 999 -//#define ZSTD_TARGETLENGTH_MAX 1024 #define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1) #define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1) #define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1) #define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1) #define SLEN_RANGE (ZSTD_SEARCHLENGTH_MAX - ZSTD_SEARCHLENGTH_MIN + 1) -#define TLEN_RANGE 12 -//TLEN_RANGE = 0, 2^0 to 2^10; -//hard coded since we only use powers of 2 (and 999 ~ 1024) +#define TLEN_RANGE 17 +/* TLEN_RANGE picked manually */ -//static const int mintable[NUM_PARAMS] = { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLENGTH_MIN, ZSTD_TARGETLENGTH_MIN }; -//static const int maxtable[NUM_PARAMS] = { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_SEARCHLENGTH_MAX, ZSTD_TARGETLENGTH_MAX }; static const int rangetable[NUM_PARAMS] = { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, SLEN_RANGE, TLEN_RANGE }; - +static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 }; /*-************************************ * Benchmark Parameters **************************************/ @@ -179,9 +176,6 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) return result; } -//assume that clock can at least measure .01 second intervals? -//make this a settable global initialized with fn? -//#define CLOCK_GRANULARITY 100000000ULL static U64 g_clockGranularity = 100000000ULL; static void findClockGranularity(void) { @@ -253,7 +247,7 @@ static void BMK_translateAdvancedParams(const ZSTD_compressionParameters params) /* checks results are feasible */ static int feasible(const BMK_result_t results, const constraint_t target) { - return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.cMem || !target.cMem); + return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.cMem); } #define EPSILON 0.001 @@ -268,13 +262,6 @@ static int eqZero(const double c1) { return !(memcmp(&c1, &z1, sizeof(double))) || !(memcmp(&c1, &z2, sizeof(double))); } -/* returns 1 if result2 is strictly 'better' than result1 */ -/* strict comparison / cutoff based */ -static int objective_lt(const BMK_result_t result1, const BMK_result_t result2) { - return (result1.cSize > result2.cSize) || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed) - || (result1.cSize == result2.cSize && epsilonEqual(result2.cSpeed, result1.cSpeed) && result2.dSpeed > result1.dSpeed); -} - /* hill climbing value for part 1 */ static double resultScore(const BMK_result_t res, const size_t srcSize, const constraint_t target) { double cs = 0., ds = 0., rt, cm = 0.; @@ -291,6 +278,16 @@ static double resultScore(const BMK_result_t res, const size_t srcSize, const co return ret; } +/* return true if r2 strictly better than r1 */ +static int compareResultLT(const BMK_result_t result1, const BMK_result_t result2, const constraint_t target, size_t srcSize) { + if(feasible(result1, target) && feasible(result2, target)) { + return (result1.cSize > result2.cSize) || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed) + || (result1.cSize == result2.cSize && epsilonEqual(result2.cSpeed, result1.cSpeed) && result2.dSpeed > result1.dSpeed); + } + return feasible(result2, target) || (!feasible(result1, target) && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target))); + +} + /* factor sort of arbitrary */ static constraint_t relaxTarget(constraint_t target) { target.cMem = (U32)-1; @@ -303,35 +300,11 @@ static constraint_t relaxTarget(constraint_t target) { * Bench functions *********************************************************/ -typedef struct -{ - const char* srcPtr; - size_t srcSize; - char* cPtr; - size_t cRoom; - size_t cSize; - char* resPtr; - size_t resSize; -} blockParam_t; - - const char* g_stratName[ZSTD_btultra+1] = { "(none) ", "ZSTD_fast ", "ZSTD_dfast ", "ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ", "ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra "}; -static int -BMK_benchParam(BMK_result_t* resultPtr, - const void* srcBuffer, const size_t srcSize, - const size_t* fileSizes, const unsigned nbFiles, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, - const ZSTD_compressionParameters cParams) { - - BMK_return_t res = BMK_benchMem(srcBuffer,srcSize, fileSizes, nbFiles, 0, &cParams, NULL, 0, ctx, dctx, 0, "File"); - *resultPtr = res.result; - return res.error; -} - /* benchParam but only takes in one file. */ static int BMK_benchParam1(BMK_result_t* resultPtr, @@ -349,6 +322,49 @@ typedef struct { ZSTD_compressionParameters params; } winnerInfo_t; +static ZSTD_compressionParameters emptyParams(void) { + ZSTD_compressionParameters p = { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }; + return p; +} + +static winnerInfo_t initWinnerInfo(ZSTD_compressionParameters p) { + winnerInfo_t w1; + w1.result.cSpeed = 0.; + w1.result.dSpeed = 0.; + w1.result.cMem = (size_t)-1; + w1.result.cSize = (size_t)-1; + w1.params = p; + return w1; +} + +typedef struct { + size_t srcSize; + void** srcPtrs; + size_t* srcSizes; + void** dstPtrs; + size_t* dstCapacities; + size_t* dstSizes; + void** resPtrs; + size_t* resSizes; + size_t nbBlocks; +} buffers_t; + +typedef struct { + size_t dictSize; + void* dictBuffer; + ZSTD_CCtx* cctx; + ZSTD_DCtx* dctx; +} contexts_t; + +static int +BMK_benchParam(BMK_result_t* resultPtr, + buffers_t buf, contexts_t ctx, + const ZSTD_compressionParameters cParams) { + BMK_return_t res = BMK_benchMem(buf.srcPtrs[0], buf.srcSize, buf.srcSizes, (unsigned)buf.nbBlocks, 0, &cParams, ctx.dictBuffer, ctx.dictSize, ctx.cctx, ctx.dctx, 0, "Files"); + *resultPtr = res.result; + return res.error; +} + /*-******************************************************* * From Paramgrill *********************************************************/ @@ -356,6 +372,8 @@ typedef struct { static void BMK_initCCtx(ZSTD_CCtx* ctx, const void* dictBuffer, size_t dictBufferSize, int cLevel, const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) { + ZSTD_CCtx_reset(ctx); + ZSTD_CCtx_resetParameters(ctx); if (adv->nbWorkers==1) { ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, 0); } else { @@ -380,6 +398,7 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx, static void BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize) { + ZSTD_DCtx_reset(dctx); ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize); } @@ -425,6 +444,7 @@ static size_t local_defaultCompress( out.dst = dstBuffer; out.size = dstSize; out.pos = 0; + assert(dstSize == ZSTD_compressBound(srcSize)); /* specific to this version, which is only used in paramgrill */ while (moreToFlush) { if(out.pos == out.size) { return (size_t)-ZSTD_error_dstSize_tooSmall; @@ -471,249 +491,164 @@ static size_t local_defaultDecompress( *********************************************************/ /* Replicate function of benchMemAdvanced, but with pre-split src / dst buffers, with relevant info to invert it (compressedSizes) passed out. */ -/*BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */ +/* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */ /* nbSeconds used in same way as in BMK_advancedParams_t, as nbIters when in iterMode */ /* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs? */ /* dictionary nullable, nothing else though. */ -static BMK_return_t BMK_benchMemInvertible(const void * const * const srcPtrs, size_t const * const srcSizes, - void** dstPtrs, size_t* dstCapacityToSizes, U32 const nbBlocks, +static BMK_return_t BMK_benchMemInvertible(buffers_t buf, contexts_t ctx, const int cLevel, const ZSTD_compressionParameters* comprParams, - const void* dictBuffer, const size_t dictBufferSize, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, const BMK_mode_t mode, const BMK_loopMode_t loopMode, const unsigned nbSeconds) { + U32 i; BMK_return_t results = { { 0, 0., 0., 0 }, 0 } ; - size_t srcSize = 0; - void** const resPtrs = malloc(sizeof(void*) * nbBlocks); /* only really needed in both mode. */ - size_t* const resSizes = malloc(sizeof(size_t) * nbBlocks); - int freeDST = 0; + const void *const *const srcPtrs = (const void *const *const)buf.srcPtrs; + size_t const *const srcSizes = buf.srcSizes; + void** dstPtrs = buf.dstPtrs; + size_t* dstCapacities = buf.dstCapacities; + size_t* dstSizes = buf.dstSizes; + void** resPtrs = buf.resPtrs; + size_t* resSizes = buf.resSizes; + const void* dictBuffer = ctx.dictBuffer; + const size_t dictBufferSize = ctx.dictSize; + const size_t nbBlocks = buf.nbBlocks; + const size_t srcSize = buf.srcSize; + ZSTD_CCtx* cctx = ctx.cctx; + ZSTD_DCtx* dctx = ctx.dctx; BMK_advancedParams_t adv = BMK_initAdvancedParams(); adv.mode = mode; adv.loopMode = loopMode; adv.nbSeconds = nbSeconds; - /* resSizes == srcSizes, but modifiable */ - memcpy(resSizes, srcSizes, sizeof(size_t) * nbBlocks); - - for(i = 0; i < nbBlocks; i++) { - srcSize += srcSizes[i]; - } - - if(!ctx || !dctx || !srcPtrs || ! srcSizes) - { - results.error = 31; - DISPLAY("error: passed in null argument\n"); - free(resPtrs); - free(resSizes); - return results; - } - if(!resPtrs || !resSizes) { - results.error = 32; - DISPLAY("error: allocation failed\n"); - free(resPtrs); - free(resSizes); - return results; - } - - /* so resPtr is continuous */ - resPtrs[0] = malloc(srcSize); - - if(!(resPtrs[0])) { - results.error = 32; - DISPLAY("error: allocation failed\n"); - free(resPtrs); - free(resSizes); - return results; - } - - for(i = 1; i < nbBlocks; i++) { - resPtrs[i] = (void*)(((char*)resPtrs[i-1]) + srcSizes[i-1]); - } - - /* allocate own dst if NULL */ - if(dstPtrs == NULL) { - freeDST = 1; - dstPtrs = malloc(nbBlocks * sizeof(void*)); - dstCapacityToSizes = malloc(nbBlocks * sizeof(size_t)); - if(dstPtrs == NULL) { - results.error = 33; - DISPLAY("error: allocation failed\n"); - free(resPtrs); - free(resSizes); - return results; - } - - if(mode == BMK_decodeOnly) { //dst is src - size_t dstSize = 0; - for(i = 0; i < nbBlocks; i++) { - dstCapacityToSizes[i] = ZSTD_getDecompressedSize(srcPtrs[i], srcSizes[i]); - dstSize += dstCapacityToSizes[i]; - } - dstPtrs[0] = malloc(dstSize); - if(dstPtrs[0] == NULL) { - results.error = 34; - DISPLAY("error: allocation failed\n"); - goto _cleanUp; - } - for(i = 1; i < nbBlocks; i++) { - dstPtrs[i] = (void*)(((char*)dstPtrs[i-1]) + ZSTD_getDecompressedSize(srcPtrs[i-1], srcSizes[i-1])); - } - } else { - dstPtrs[0] = malloc(ZSTD_compressBound(srcSize) + (nbBlocks * 1024)); - if(dstPtrs[0] == NULL) { - results.error = 35; - DISPLAY("error: allocation failed\n"); - goto _cleanUp; - } - dstCapacityToSizes[0] = ZSTD_compressBound(srcSizes[0]); - for(i = 1; i < nbBlocks; i++) { - dstPtrs[i] = (void*)(((char*)dstPtrs[i-1]) + dstCapacityToSizes[i-1]); - dstCapacityToSizes[i] = ZSTD_compressBound(srcSizes[i]); - } - } - } - /* warmimg up memory */ - for(i = 0; i < nbBlocks; i++) { - RDG_genBuffer(dstPtrs[i], dstCapacityToSizes[i], 0.10, 0.50, 1); + /* can't do this if decode only */ + for(i = 0; i < buf.nbBlocks; i++) { + if(mode != BMK_decodeOnly) { + RDG_genBuffer(dstPtrs[i], dstCapacities[i], 0.10, 0.50, 1); + } else { + RDG_genBuffer(resPtrs[i], resSizes[i], 0.10, 0.50, 1); + } } /* Bench */ - { - { - BMK_initCCtxArgs cctxprep; - BMK_initDCtxArgs dctxprep; - cctxprep.ctx = ctx; - cctxprep.dictBuffer = dictBuffer; - cctxprep.dictBufferSize = dictBufferSize; - cctxprep.cLevel = cLevel; - cctxprep.comprParams = comprParams; - cctxprep.adv = &adv; - dctxprep.dctx = dctx; - dctxprep.dictBuffer = dictBuffer; - dctxprep.dictBufferSize = dictBufferSize; - if(loopMode == BMK_timeMode) { - BMK_customTimedReturn_t intermediateResultCompress; - BMK_customTimedReturn_t intermediateResultDecompress; - BMK_timedFnState_t* timeStateCompress = BMK_createTimeState(nbSeconds); - BMK_timedFnState_t* timeStateDecompress = BMK_createTimeState(nbSeconds); - if(mode == BMK_compressOnly) { - intermediateResultCompress.completed = 0; - intermediateResultDecompress.completed = 1; - } else if (mode == BMK_decodeOnly) { - intermediateResultCompress.completed = 1; - intermediateResultDecompress.completed = 0; - } else { /* both */ - intermediateResultCompress.completed = 0; - intermediateResultDecompress.completed = 0; + + { + /* init args */ + BMK_initCCtxArgs cctxprep; + BMK_initDCtxArgs dctxprep; + cctxprep.ctx = cctx; + cctxprep.dictBuffer = dictBuffer; + cctxprep.dictBufferSize = dictBufferSize; + cctxprep.cLevel = cLevel; + cctxprep.comprParams = comprParams; + cctxprep.adv = &adv; + dctxprep.dctx = dctx; + dctxprep.dictBuffer = dictBuffer; + dctxprep.dictBufferSize = dictBufferSize; + + if(loopMode == BMK_timeMode) { + BMK_customTimedReturn_t intermediateResultCompress; + BMK_customTimedReturn_t intermediateResultDecompress; + BMK_timedFnState_t* timeStateCompress = BMK_createTimeState(nbSeconds); + BMK_timedFnState_t* timeStateDecompress = BMK_createTimeState(nbSeconds); + if(mode == BMK_compressOnly) { + intermediateResultCompress.completed = 0; + intermediateResultDecompress.completed = 1; + } else if (mode == BMK_decodeOnly) { + intermediateResultCompress.completed = 1; + intermediateResultDecompress.completed = 0; + } else { /* both */ + intermediateResultCompress.completed = 0; + intermediateResultDecompress.completed = 0; + } + + while(!intermediateResultCompress.completed) { + intermediateResultCompress = BMK_benchFunctionTimed(timeStateCompress, &local_defaultCompress, (void*)cctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, dstPtrs, dstCapacities, dstSizes); + + if(intermediateResultCompress.result.error) { + results.error = intermediateResultCompress.result.error; + BMK_freeTimeState(timeStateCompress); + BMK_freeTimeState(timeStateDecompress); + return results; } - while(!(intermediateResultCompress.completed && intermediateResultDecompress.completed)) { - if(!intermediateResultCompress.completed) { - intermediateResultCompress = BMK_benchFunctionTimed(timeStateCompress, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes); - if(intermediateResultCompress.result.error) { - results.error = intermediateResultCompress.result.error; - BMK_freeTimeState(timeStateCompress); - BMK_freeTimeState(timeStateDecompress); - goto _cleanUp; - } - results.result.cSpeed = ((double)srcSize / intermediateResultCompress.result.result.nanoSecPerRun) * 1000000000; - results.result.cSize = intermediateResultCompress.result.result.sumOfReturn; - } + results.result.cSpeed = ((double)srcSize / intermediateResultCompress.result.result.nanoSecPerRun) * TIMELOOP_NANOSEC; + results.result.cSize = intermediateResultCompress.result.result.sumOfReturn; + } - if(!intermediateResultDecompress.completed) { - if(mode == BMK_decodeOnly) { - intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, - nbBlocks, (const void* const*)srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes); - } else { /* both, decompressed result already written to dstPtr */ - intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, - nbBlocks, (const void* const*)dstPtrs, dstCapacityToSizes, resPtrs, resSizes); - } + while(!intermediateResultDecompress.completed) { + intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, + nbBlocks, (const void* const*)dstPtrs, dstSizes, resPtrs, resSizes, NULL); - if(intermediateResultDecompress.result.error) { - results.error = intermediateResultDecompress.result.error; - BMK_freeTimeState(timeStateCompress); - BMK_freeTimeState(timeStateDecompress); - goto _cleanUp; - } - results.result.dSpeed = ((double)srcSize / intermediateResultDecompress.result.result.nanoSecPerRun) * 1000000000; - } + if(intermediateResultDecompress.result.error) { + results.error = intermediateResultDecompress.result.error; + BMK_freeTimeState(timeStateCompress); + BMK_freeTimeState(timeStateDecompress); + return results; } - BMK_freeTimeState(timeStateCompress); - BMK_freeTimeState(timeStateDecompress); - } else { //iterMode; - if(mode != BMK_decodeOnly) { + results.result.dSpeed = ((double)srcSize / intermediateResultDecompress.result.result.nanoSecPerRun) * TIMELOOP_NANOSEC; + } - BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbSeconds); - if(compressionResults.error) { - results.error = compressionResults.error; - goto _cleanUp; - } - if(compressionResults.result.nanoSecPerRun == 0) { - results.result.cSpeed = 0; - } else { - results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; - } - results.result.cSize = compressionResults.result.sumOfReturn; + BMK_freeTimeState(timeStateCompress); + BMK_freeTimeState(timeStateDecompress); + + } else { //iterMode; + if(mode != BMK_decodeOnly) { + + BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)cctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, dstPtrs, dstCapacities, dstSizes, nbSeconds); + if(compressionResults.error) { + results.error = compressionResults.error; + return results; } - if(mode != BMK_compressOnly) { - BMK_customReturn_t decompressionResults; - if(mode == BMK_decodeOnly) { - decompressionResults = BMK_benchFunction( - &local_defaultDecompress, (void*)(dctx), - &local_initDCtx, (void*)&dctxprep, nbBlocks, - (const void* const*)srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, - nbSeconds); - } else { - decompressionResults = BMK_benchFunction( - &local_defaultDecompress, (void*)(dctx), - &local_initDCtx, (void*)&dctxprep, nbBlocks, - (const void* const*)dstPtrs, dstCapacityToSizes, resPtrs, resSizes, - nbSeconds); - } + if(compressionResults.result.nanoSecPerRun == 0) { + results.result.cSpeed = 0; + } else { + results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + } + results.result.cSize = compressionResults.result.sumOfReturn; + } - if(decompressionResults.error) { - results.error = decompressionResults.error; - goto _cleanUp; - } + if(mode != BMK_compressOnly) { + BMK_customReturn_t decompressionResults; + decompressionResults = BMK_benchFunction( + &local_defaultDecompress, (void*)(dctx), + &local_initDCtx, (void*)&dctxprep, nbBlocks, + (const void* const*)dstPtrs, dstSizes, resPtrs, resSizes, NULL, + nbSeconds); - if(decompressionResults.result.nanoSecPerRun == 0) { - results.result.dSpeed = 0; - } else { - results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; - } + if(decompressionResults.error) { + results.error = decompressionResults.error; + return results; + } + + if(decompressionResults.result.nanoSecPerRun == 0) { + results.result.dSpeed = 0; + } else { + results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; } } } - } /* Bench */ - results.result.cMem = (1 << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(ctx); - -_cleanUp: - free(resPtrs[0]); - free(resPtrs); - free(resSizes); - if(freeDST) { - free(dstPtrs[0]); - free(dstPtrs); } + /* Bench */ + results.result.cMem = (1 << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx); return results; } /* global winner used for display. */ //Should be totally 0 initialized? -static winnerInfo_t g_winner; +static winnerInfo_t g_winner = { { 0, 0, (size_t)-1, (size_t)-1 } , { 0, 0, 0, 0, 0, 0, ZSTD_fast } }; static constraint_t g_targetConstraints; static void BMK_printWinner(FILE* f, const U32 cLevel, const BMK_result_t result, const ZSTD_compressionParameters params, const size_t srcSize) { - if(DEBUG || (objective_lt(g_winner.result, result) && feasible(result, g_targetConstraints))) { + if(DEBUG || compareResultLT(g_winner.result, result, g_targetConstraints, srcSize)) { char lvlstr[15] = "Custom Level"; const U64 time = UTIL_clockSpanNano(g_time); const U64 minutes = time / (60ULL * TIMELOOP_NANOSEC); - if(DEBUG && (objective_lt(g_winner.result, result) && feasible(result, g_targetConstraints))) { + + if(DEBUG && compareResultLT(g_winner.result, result, g_targetConstraints, srcSize)) { DISPLAY("New Winner: \n"); } @@ -722,27 +657,24 @@ static void BMK_printWinner(FILE* f, const U32 cLevel, const BMK_result_t result fprintf(f," {%3u,%3u,%3u,%3u,%3u,%3u, %s }, ", params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, params.targetLength, g_stratName[(U32)(params.strategy)]); + if(cLevel != CUSTOM_LEVEL) { snprintf(lvlstr, 15, " Level %2u ", cLevel); } + fprintf(f, "/* %s */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */", - lvlstr, (double)srcSize / result.cSize, result.cSpeed / 1000000., result.dSpeed / 1000000.); + lvlstr, (double)srcSize / result.cSize, result.cSpeed / (1 << 20), result.dSpeed / (1 << 20)); if(TIMED) { fprintf(f, " - %1lu:%2lu:%05.2f", (unsigned long) minutes / 60,(unsigned long) minutes % 60, (double)(time - minutes * TIMELOOP_NANOSEC * 60ULL)/TIMELOOP_NANOSEC); } fprintf(f, "\n"); - if(objective_lt(g_winner.result, result) && feasible(result, g_targetConstraints)) { + + if(compareResultLT(g_winner.result, result, g_targetConstraints, srcSize)) { BMK_translateAdvancedParams(params); g_winner.result = result; g_winner.params = params; } } - //else { - // DISPLAY("G_WINNER: "); - // DISPLAY("/* R:%5.3f at %5.1f MB/s - %5.1f MB/s */ \n",(double)srcSize / g_winner.result.cSize , g_winner.result.cSpeed / 1000000 , g_winner.result.dSpeed / 1000000); - // DISPLAY("LOSER : "); - // DISPLAY("/* R:%5.3f at %5.1f MB/s - %5.1f MB/s */ \n",(double)srcSize / result.cSize, result.cSpeed / 1000000 , result.dSpeed / 1000000); - //} } static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSize) @@ -903,23 +835,23 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para static ZSTD_compressionParameters sanitizeParams(ZSTD_compressionParameters params) { if (params.strategy == ZSTD_fast) - g_params.chainLog = 0, g_params.searchLog = 0; + params.chainLog = 0, params.searchLog = 0; if (params.strategy == ZSTD_dfast) - g_params.searchLog = 0; + params.searchLog = 0; if (params.strategy != ZSTD_btopt && params.strategy != ZSTD_btultra && params.strategy != ZSTD_fast) - g_params.targetLength = 0; + params.targetLength = 0; return params; } /* new length */ /* keep old array, will need if iter over strategy. */ -static int sanitizeVarArray(const int varLength, const U32* varArray, U32* varNew, const ZSTD_strategy strat) { +static int sanitizeVarArray(varInds_t* varNew, const int varLength, const varInds_t* varArray, const ZSTD_strategy strat) { int i, j = 0; for(i = 0; i < varLength; i++) { - if( !((varArray[i] == CLOG_IND && strat == ZSTD_fast) - || (varArray[i] == SLOG_IND && strat == ZSTD_dfast) - || (varArray[i] == TLEN_IND && strat != ZSTD_btopt && strat != ZSTD_btultra && strat != ZSTD_fast))) { + if( !((varArray[i] == clog_ind && strat == ZSTD_fast) + || (varArray[i] == slog_ind && strat == ZSTD_dfast) + || (varArray[i] == tlen_ind && strat != ZSTD_btopt && strat != ZSTD_btultra && strat != ZSTD_fast))) { varNew[j] = varArray[i]; j++; } @@ -930,69 +862,72 @@ static int sanitizeVarArray(const int varLength, const U32* varArray, U32* varNe /* res should be NUM_PARAMS size */ /* constructs varArray from ZSTD_compressionParameters style parameter */ -static int variableParams(const ZSTD_compressionParameters paramConstraints, U32* res) { +static int variableParams(const ZSTD_compressionParameters paramConstraints, varInds_t* res) { int j = 0; if(!paramConstraints.windowLog) { - res[j] = WLOG_IND; + res[j] = wlog_ind; j++; } if(!paramConstraints.chainLog) { - res[j] = CLOG_IND; + res[j] = clog_ind; j++; } if(!paramConstraints.hashLog) { - res[j] = HLOG_IND; + res[j] = hlog_ind; j++; } if(!paramConstraints.searchLog) { - res[j] = SLOG_IND; + res[j] = slog_ind; j++; } if(!paramConstraints.searchLength) { - res[j] = SLEN_IND; + res[j] = slen_ind; j++; } if(!paramConstraints.targetLength) { - res[j] = TLEN_IND; + res[j] = tlen_ind; j++; } return j; } +/* bin-search on tlen_table for correct index. */ +static int tlen_inv(U32 x) { + int lo = 0; + int hi = TLEN_RANGE; + while(lo < hi) { + int mid = (lo + hi) / 2; + if(tlen_table[mid] < x) { + lo = mid + 1; + } if(tlen_table[mid] == x) { + return mid; + } else { + hi = mid; + } + } + return lo; +} + /* amt will probably always be \pm 1? */ /* slight change from old paramVariation, targetLength can only take on powers of 2 now (999 ~= 1024?) */ /* take max/min bounds into account as well? */ -static void paramVaryOnce(const U32 paramIndex, const int amt, ZSTD_compressionParameters* ptr) { +static void paramVaryOnce(const varInds_t paramIndex, const int amt, ZSTD_compressionParameters* ptr) { switch(paramIndex) { - case WLOG_IND: ptr->windowLog += amt; break; - case CLOG_IND: ptr->chainLog += amt; break; - case HLOG_IND: ptr->hashLog += amt; break; - case SLOG_IND: ptr->searchLog += amt; break; - case SLEN_IND: ptr->searchLength += amt; break; - case TLEN_IND: - if(amt >= 0) { - if(ptr->targetLength == 0) { - if(amt > 0) { - ptr->targetLength = MIN(1 << (amt - 1), 999); - } - } else { - ptr->targetLength <<= amt; - ptr->targetLength = MIN(ptr->targetLength, 999); - } - } else { - if(ptr->targetLength == 999) { - ptr->targetLength = 1024; - } - ptr->targetLength >>= -amt; - } + case wlog_ind: ptr->windowLog += amt; break; + case clog_ind: ptr->chainLog += amt; break; + case hlog_ind: ptr->hashLog += amt; break; + case slog_ind: ptr->searchLog += amt; break; + case slen_ind: ptr->searchLength += amt; break; + case tlen_ind: + ptr->targetLength = tlen_table[MAX(0, MIN(TLEN_RANGE - 1, tlen_inv(ptr->targetLength) + amt))]; break; default: break; } } /* varies ptr by nbChanges respecting varyParams*/ -static void paramVariation(ZSTD_compressionParameters* ptr, const U32* varyParams, const int varyLen, const U32 nbChanges) +static void paramVariation(ZSTD_compressionParameters* ptr, const varInds_t* varyParams, const int varyLen, const U32 nbChanges) { ZSTD_compressionParameters p; U32 validated = 0; @@ -1009,7 +944,7 @@ static void paramVariation(ZSTD_compressionParameters* ptr, const U32* varyParam } /* length of memo table given free variables */ -static size_t memoTableLen(const U32* varyParams, const int varyLen) { +static size_t memoTableLen(const varInds_t* varyParams, const int varyLen) { size_t arrayLen = 1; int i; for(i = 0; i < varyLen; i++) { @@ -1018,53 +953,45 @@ static size_t memoTableLen(const U32* varyParams, const int varyLen) { return arrayLen; } -//sort of ~lg2 (replace 1024 w/ 999, and add 0 at lower end of range) for memoTableInd Tlen -static unsigned lg2(unsigned x) { - if(x == 999) { - return 11; - } - return x ? ZSTD_highbit32(x) + 1 : 0; -} - /* returns unique index of compression parameters */ -static unsigned memoTableInd(const ZSTD_compressionParameters* ptr, const U32* varyParams, const int varyLen) { +static unsigned memoTableInd(const ZSTD_compressionParameters* ptr, const varInds_t* varyParams, const int varyLen) { int i; unsigned ind = 0; for(i = 0; i < varyLen; i++) { switch(varyParams[i]) { - case WLOG_IND: ind *= WLOG_RANGE; ind += ptr->windowLog - ZSTD_WINDOWLOG_MIN ; break; - case CLOG_IND: ind *= CLOG_RANGE; ind += ptr->chainLog - ZSTD_CHAINLOG_MIN ; break; - case HLOG_IND: ind *= HLOG_RANGE; ind += ptr->hashLog - ZSTD_HASHLOG_MIN ; break; - case SLOG_IND: ind *= SLOG_RANGE; ind += ptr->searchLog - ZSTD_SEARCHLOG_MIN ; break; - case SLEN_IND: ind *= SLEN_RANGE; ind += ptr->searchLength - ZSTD_SEARCHLENGTH_MIN; break; - case TLEN_IND: ind *= TLEN_RANGE; ind += lg2(ptr->targetLength) - ZSTD_TARGETLENGTH_MIN; break; + case wlog_ind: ind *= WLOG_RANGE; ind += ptr->windowLog - ZSTD_WINDOWLOG_MIN ; break; + case clog_ind: ind *= CLOG_RANGE; ind += ptr->chainLog - ZSTD_CHAINLOG_MIN ; break; + case hlog_ind: ind *= HLOG_RANGE; ind += ptr->hashLog - ZSTD_HASHLOG_MIN ; break; + case slog_ind: ind *= SLOG_RANGE; ind += ptr->searchLog - ZSTD_SEARCHLOG_MIN ; break; + case slen_ind: ind *= SLEN_RANGE; ind += ptr->searchLength - ZSTD_SEARCHLENGTH_MIN; break; + case tlen_ind: ind *= TLEN_RANGE; ind += tlen_inv(ptr->targetLength) - ZSTD_TARGETLENGTH_MIN; break; } } return ind; } /* inverse of above function (from index to parameters) */ -static void memoTableIndInv(ZSTD_compressionParameters* ptr, const U32* varyParams, const int varyLen, size_t ind) { +static void memoTableIndInv(ZSTD_compressionParameters* ptr, const varInds_t* varyParams, const int varyLen, size_t ind) { int i; for(i = varyLen - 1; i >= 0; i--) { switch(varyParams[i]) { - case WLOG_IND: ptr->windowLog = ind % WLOG_RANGE + ZSTD_WINDOWLOG_MIN; ind /= WLOG_RANGE; break; - case CLOG_IND: ptr->chainLog = ind % CLOG_RANGE + ZSTD_CHAINLOG_MIN; ind /= CLOG_RANGE; break; - case HLOG_IND: ptr->hashLog = ind % HLOG_RANGE + ZSTD_HASHLOG_MIN; ind /= HLOG_RANGE; break; - case SLOG_IND: ptr->searchLog = ind % SLOG_RANGE + ZSTD_SEARCHLOG_MIN; ind /= SLOG_RANGE; break; - case SLEN_IND: ptr->searchLength = ind % SLEN_RANGE + ZSTD_SEARCHLENGTH_MIN; ind /= SLEN_RANGE; break; - case TLEN_IND: ptr->targetLength = (ind % TLEN_RANGE) ? MIN(1 << ((ind % TLEN_RANGE) - 1), 999) : 0; ind /= TLEN_RANGE; break; + case wlog_ind: ptr->windowLog = ind % WLOG_RANGE + ZSTD_WINDOWLOG_MIN; ind /= WLOG_RANGE; break; + case clog_ind: ptr->chainLog = ind % CLOG_RANGE + ZSTD_CHAINLOG_MIN; ind /= CLOG_RANGE; break; + case hlog_ind: ptr->hashLog = ind % HLOG_RANGE + ZSTD_HASHLOG_MIN; ind /= HLOG_RANGE; break; + case slog_ind: ptr->searchLog = ind % SLOG_RANGE + ZSTD_SEARCHLOG_MIN; ind /= SLOG_RANGE; break; + case slen_ind: ptr->searchLength = ind % SLEN_RANGE + ZSTD_SEARCHLENGTH_MIN; ind /= SLEN_RANGE; break; + case tlen_ind: ptr->targetLength = tlen_table[(ind % TLEN_RANGE)]; ind /= TLEN_RANGE; break; } } } - /* Initialize memotable, immediately mark redundant / obviously infeasible params as */ -static void memoTableInit(U8* memoTable, ZSTD_compressionParameters paramConstraints, const constraint_t target, const U32* varyParams, const int varyLen, const size_t srcSize) { +static void memoTableInit(U8* memoTable, ZSTD_compressionParameters paramConstraints, const constraint_t target, const varInds_t* varyParams, const int varyLen, const size_t srcSize) { size_t i; size_t arrayLen = memoTableLen(varyParams, varyLen); int cwFixed = !paramConstraints.chainLog || !paramConstraints.windowLog; int scFixed = !paramConstraints.searchLog || !paramConstraints.chainLog; + int whFixed = !paramConstraints.windowLog || !paramConstraints.hashLog; int wFixed = !paramConstraints.windowLog; int j = 0; memset(memoTable, 0, arrayLen); @@ -1076,7 +1003,7 @@ static void memoTableInit(U8* memoTable, ZSTD_compressionParameters paramConstra memoTable[i] = 255; j++; } - if(wFixed && (1ULL << paramConstraints.windowLog) > (srcSize << 1)) { + if(wFixed && (1ULL << paramConstraints.windowLog) > (srcSize << 2)) { memoTable[i] = 255; } /* nil out parameter sets equivalent to others. */ @@ -1093,12 +1020,20 @@ static void memoTableInit(U8* memoTable, ZSTD_compressionParameters paramConstra } } } + if(scFixed) { if(paramConstraints.searchLog > paramConstraints.chainLog) { if(memoTable[i] != 255) { j++; } memoTable[i] = 255; } } + + if(whFixed) { + if(paramConstraints.hashLog > paramConstraints.windowLog + 1) { + if(memoTable[i] != 255) { j++; } + memoTable[i] = 255; + } + } } DEBUGOUTPUT("%d / %d Invalid\n", j, (int)i); if((int)i == j) { @@ -1106,27 +1041,7 @@ static void memoTableInit(U8* memoTable, ZSTD_compressionParameters paramConstra } } -/* inits memotables for all (including mallocs), all strategies */ -/* takes unsanitized varyParams */ -static U8** memoTableInitAll(ZSTD_compressionParameters paramConstraints, constraint_t target, const U32* varyParams, const int varyLen, const size_t srcSize) { - U32 varNew[NUM_PARAMS]; - int varLenNew; - U8** mtAll = malloc(sizeof(U8*) * (ZSTD_btultra + 1)); - int i; - if(mtAll == NULL) { - return NULL; - } - for(i = 1; i <= (int)ZSTD_btultra; i++) { - varLenNew = sanitizeVarArray(varyLen, varyParams, varNew, i); - mtAll[i] = malloc(sizeof(U8) * memoTableLen(varNew, varLenNew)); - if(mtAll[i] == NULL) { - return NULL; - } - memoTableInit(mtAll[i], paramConstraints, target, varNew, varLenNew, srcSize); - } - return mtAll; -} - +/* frees all allocated memotables */ static void memoTableFreeAll(U8** mtAll) { int i; if(mtAll == NULL) { return; } @@ -1136,6 +1051,28 @@ static void memoTableFreeAll(U8** mtAll) { free(mtAll); } +/* inits memotables for all (including mallocs), all strategies */ +/* takes unsanitized varyParams */ +static U8** memoTableInitAll(ZSTD_compressionParameters paramConstraints, constraint_t target, const varInds_t* varyParams, const int varyLen, const size_t srcSize) { + varInds_t varNew[NUM_PARAMS]; + int varLenNew; + U8** mtAll = calloc(sizeof(U8*),(ZSTD_btultra + 1)); + int i; + if(mtAll == NULL) { + return NULL; + } + for(i = 1; i <= (int)ZSTD_btultra; i++) { + varLenNew = sanitizeVarArray(varNew, varyLen, varyParams, i); + mtAll[i] = malloc(sizeof(U8) * memoTableLen(varNew, varLenNew)); + if(mtAll[i] == NULL) { + memoTableFreeAll(mtAll); + return NULL; + } + memoTableInit(mtAll[i], paramConstraints, target, varNew, varLenNew, srcSize); + } + return mtAll; +} + #define PARAMTABLELOG 25 #define PARAMTABLESIZE (1< 0 && tries > 0); memoTableIndInv(pc, varArray, varLen, (unsigned)ind); - *pc = sanitizeParams(*pc); + //*pc = sanitizeParams(*pc); } static void BMK_selectRandomStart( @@ -1250,7 +1187,7 @@ static void BMK_benchFullTable(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, const void* src winnerInfo_t winners[NB_LEVELS_TRACKED+1]; const char* const rfName = "grillResults.txt"; FILE* const f = fopen(rfName, "w"); - const size_t blockSize = g_blockSize ? g_blockSize : srcSize; /* cut by block or not ? */ + const size_t blockSize = g_blockSize ? g_blockSize : ZSTD_BLOCKSIZE_MAX; /* cut by block or not ? */ /* init */ assert(g_singleRun==0); @@ -1390,574 +1327,204 @@ int benchFiles(const char** fileNamesTable, int nbFiles) return 0; } - - -/* -checks feasibility with uncertainty. --1 : certainly infeasible - 0 : uncertain - 1 : certainly feasible -*/ -static int uncertainFeasibility(double const uncertaintyConstantC, double const uncertaintyConstantD, const constraint_t paramTarget, const BMK_result_t* const results) { - if((paramTarget.cSpeed != 0 && results->cSpeed * uncertaintyConstantC < paramTarget.cSpeed) || - (paramTarget.dSpeed != 0 && results->dSpeed * uncertaintyConstantD < paramTarget.dSpeed) || - (paramTarget.cMem != 0 && results->cMem > paramTarget.cMem)) { - return -1; - } else if((paramTarget.cSpeed == 0 || results->cSpeed / uncertaintyConstantC > paramTarget.cSpeed) && - (paramTarget.dSpeed == 0 || results->dSpeed / uncertaintyConstantD > paramTarget.dSpeed) && - (paramTarget.cMem == 0 || results->cMem <= paramTarget.cMem)) { - return 1; - } else { - return 0; - } -} - -/* 1 - better than prev best - 0 - uncertain - -1 - worse - assume prev_best status is run fully? - but then we'd have to rerun any winners anyway */ -/* not as useful as initially believed */ -static int uncertainComparison(double const uncertaintyConstantC, double const uncertaintyConstantD, const BMK_result_t* candidate, const BMK_result_t* prevBest) { - (void)uncertaintyConstantD; //unused for now - if(candidate->cSpeed > prevBest->cSpeed * uncertaintyConstantC) { - return 1; - } else if (candidate->cSpeed * uncertaintyConstantC < prevBest->cSpeed) { - return -1; - } else { - return 0; - } -} - /*benchmarks and tests feasibility together 1 = true = better 0 = false = not better if true then resultPtr will give results. 2+ on error? */ -//Maybe use compress_only for benchmark -#define INFEASIBLE_RESULT 0 -#define FEASIBLE_RESULT 1 +//Maybe use compress_only for benchmark first run? +#define WORSE_RESULT 0 +#define BETTER_RESULT 1 #define ERROR_RESULT 2 -static int feasibleBench(BMK_result_t* resultPtr, - const void* const * const srcPtrs, size_t const * const srcSizes, - void** const dstPtrs, size_t* dstCapacityToSizes, U32 const nbBlocks, - void* dictBuffer, const size_t dictBufferSize, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, - const ZSTD_compressionParameters cParams, - const constraint_t target, - BMK_result_t* winnerResult) { - BMK_return_t benchres; - U64 loopDurationC = 0, loopDurationD = 0; - double uncertaintyConstantC, uncertaintyConstantD; - size_t srcSize = 0; - U32 i; - //alternative - test 1 iter for ratio, (possibility of error 3 which is fine), - //maybe iter this until 2x measurable for better guarantee? - DEBUGOUTPUT("Feas:\n"); - benchres = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_both, BMK_iterMode, 1); - if(benchres.error) { - DISPLAY("ERROR %d !!\n", benchres.error); - } - for(i = 0; i < nbBlocks; i++) { - srcSize += srcSizes[i]; - } - BMK_printWinner(stdout, CUSTOM_LEVEL, benchres.result, cParams, srcSize); +//add worse result complete for worse results of length > 1 sec? - if(!benchres.error) { - *resultPtr = benchres.result; - /* if speed is 0 (only happens when time = 0) */ - if(eqZero(benchres.result.cSpeed)) { - loopDurationC = 0; - uncertaintyConstantC = 2; - } else { - loopDurationC = (U64)((double)(srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); - //problem - tested in fullbench, saw speed vary 3x between iters, maybe raise uncertaintyConstraint up? - //possibly has to do with initCCtx? or system stuff? - //asymmetric +/- constant needed? - uncertaintyConstantC = MIN((loopDurationC + (double)(2 * g_clockGranularity)/loopDurationC) * 1.1, 3); //.02 seconds - } - if(eqZero(benchres.result.dSpeed)) { - loopDurationD = 0; - uncertaintyConstantD = 2; - } else { - loopDurationD = (U64)((double)(srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); - //problem - tested in fullbench, saw speed vary 3x between iters, maybe raise uncertaintyConstraint up? - //possibly has to do with initCCtx? or system stuff? - //asymmetric +/- constant needed? - uncertaintyConstantD = MIN((loopDurationD + (double)(2 * g_clockGranularity)/loopDurationD) * 1.1, 3); //.02 seconds - } - - - if(benchres.result.cSize < winnerResult->cSize) { //better compression ratio, just needs to be feasible - int feas; - if(loopDurationC < TIMELOOP_NANOSEC / 10) { - BMK_return_t benchres2 = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_compressOnly, BMK_iterMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres = benchres2; - } - } - if(loopDurationD < TIMELOOP_NANOSEC / 10) { - BMK_return_t benchres2 = BMK_benchMemInvertible((const void* const*)dstPtrs, dstCapacityToSizes, NULL, NULL, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_decodeOnly, BMK_iterMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.dSpeed = benchres2.result.dSpeed; - } - } - *resultPtr = benchres.result; - - feas = uncertainFeasibility(uncertaintyConstantC, uncertaintyConstantD, target, &(benchres.result)); - if(feas == 0) { // uncertain feasibility - if(loopDurationC < TIMELOOP_NANOSEC) { - BMK_return_t benchres2 = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_compressOnly, BMK_timeMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.cSpeed = benchres2.result.cSpeed; - } - } - if(loopDurationD < TIMELOOP_NANOSEC) { - BMK_return_t benchres2 = BMK_benchMemInvertible((const void* const*)dstPtrs, dstCapacityToSizes, NULL, NULL, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_decodeOnly, BMK_timeMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.dSpeed = benchres2.result.dSpeed; - } - } - *resultPtr = benchres.result; - return feasible(benchres.result, target); - } else { //feas = 1 or -1 map to 1, 0 respectively - return (feas + 1) >> 1; //relies on INFEASIBLE_RESULT == 0, FEASIBLE_RESULT == 1 - } - } else if (benchres.result.cSize == winnerResult->cSize) { //equal ratio, needs to be better than winner in cSpeed/ dSpeed / cMem - int feas; - if(loopDurationC < TIMELOOP_NANOSEC / 10) { - BMK_return_t benchres2 = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_compressOnly, BMK_iterMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres = benchres2; - } - } - if(loopDurationD < TIMELOOP_NANOSEC / 10) { - BMK_return_t benchres2 = BMK_benchMemInvertible((const void* const*)dstPtrs, dstCapacityToSizes, NULL, NULL, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_decodeOnly, BMK_iterMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.dSpeed = benchres2.result.dSpeed; - } - } - feas = uncertainFeasibility(uncertaintyConstantC, uncertaintyConstantD, target, &(benchres.result)); - if(feas == 0) { // uncertain feasibility - if(loopDurationC < TIMELOOP_NANOSEC) { - BMK_return_t benchres2 = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_compressOnly, BMK_timeMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.cSpeed = benchres2.result.cSpeed; - } - } - if(loopDurationD < TIMELOOP_NANOSEC) { - BMK_return_t benchres2 = BMK_benchMemInvertible((const void* const*)dstPtrs, dstCapacityToSizes, NULL, NULL, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_decodeOnly, BMK_timeMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.dSpeed = benchres2.result.dSpeed; - } - } - - *resultPtr = benchres.result; - return feasible(benchres.result, target) && objective_lt(*winnerResult, benchres.result); - } else if (feas == 1) { //no need to check feasibility compares (maybe only it is chosen as a winner) - int btw = uncertainComparison(uncertaintyConstantC, uncertaintyConstantD, &(benchres.result), winnerResult); - if(btw == -1) { - return INFEASIBLE_RESULT; - } else { //possibly better, benchmark and find out - benchres = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_both, BMK_timeMode, 1); - *resultPtr = benchres.result; - return objective_lt(*winnerResult, benchres.result); - } - } else { //feas == -1 - return INFEASIBLE_RESULT; //infeasible - } - } else { - return INFEASIBLE_RESULT; //infeasible - } - } else { - return ERROR_RESULT; //BMK error - } -} - -//same as before, but +/-? -//alternative, just return comparison result, leave caller to worry about feasibility. -//have version of benchMemAdvanced which takes in dstBuffer/cap as well? -//(motivation: repeat tests (maybe just on decompress) don't need further compress runs) -static int infeasibleBench(BMK_result_t* resultPtr, - const void* const * const srcPtrs, size_t const * const srcSizes, - void** const dstPtrs, size_t* dstCapacityToSizes, U32 const nbBlocks, - void* dictBuffer, const size_t dictBufferSize, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, +/* variation between 2nd run and full second bmk */ +#define VARIANCE 1.1 +static int allBench(BMK_result_t* resultPtr, + buffers_t buf, contexts_t ctx, const ZSTD_compressionParameters cParams, const constraint_t target, - BMK_result_t* winnerResult) { + BMK_result_t* winnerResult, int feas) { BMK_return_t benchres; - BMK_result_t resultMin, resultMax; + BMK_result_t resultMax; U64 loopDurationC = 0, loopDurationD = 0; double uncertaintyConstantC, uncertaintyConstantD; double winnerRS; - size_t srcSize = 0; - U32 i; - benchres = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_both, BMK_iterMode, 1); - for(i = 0; i < nbBlocks; i++) { - srcSize += srcSizes[i]; - } - BMK_printWinner(stdout, CUSTOM_LEVEL, benchres.result, cParams, srcSize); - winnerRS = resultScore(*winnerResult, srcSize, target); + /* initial benchmarking, gives exact ratio and memory, warms up future runs */ + benchres = BMK_benchMemInvertible(buf, ctx, 0, &cParams, BMK_both, BMK_iterMode, 1); + winnerRS = resultScore(*winnerResult, buf.srcSize, target); DEBUGOUTPUT("WinnerScore: %f\n ", winnerRS); - if(!benchres.error) { - *resultPtr = benchres.result; - if(eqZero(benchres.result.cSpeed)) { - loopDurationC = 0; - uncertaintyConstantC = 2; - } else { - loopDurationC = (U64)((double)(srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); - uncertaintyConstantC = MIN((loopDurationC + (double)(2 * g_clockGranularity)/loopDurationC * 1.1), 3); //.02 seconds - } + if(benchres.error) { + DEBUGOUTPUT("Benchmarking failed\n"); + return ERROR_RESULT; + } + *resultPtr = benchres.result; - if(eqZero(benchres.result.dSpeed)) { - loopDurationD = 0; - uncertaintyConstantD = 2; - } else { - loopDurationD = (U64)((double)(srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); - uncertaintyConstantD = MIN((loopDurationD + (double)(2 * g_clockGranularity)/loopDurationD) * 1.1 , 3); //.02 seconds - } + /* calculate uncertainty in compression / decompression runs */ + if(eqZero(benchres.result.cSpeed)) { + loopDurationC = 0; + uncertaintyConstantC = 3; + } else { + loopDurationC = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); + uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE; + } + if(eqZero(benchres.result.dSpeed)) { + loopDurationD = 0; + uncertaintyConstantD = 3; + } else { + loopDurationD = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); + uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE; + } - if(loopDurationC < TIMELOOP_NANOSEC / 10) { - BMK_return_t benchres2 = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_compressOnly, BMK_iterMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres = benchres2; - } - } - if(loopDurationD < TIMELOOP_NANOSEC / 10) { - BMK_return_t benchres2 = BMK_benchMemInvertible((const void* const*)dstPtrs, dstCapacityToSizes, NULL, NULL, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_decodeOnly, BMK_iterMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.dSpeed = benchres2.result.dSpeed; - } - } - *resultPtr = benchres.result; + /* anything with worse ratio in feas is definitely worse, discard */ + if(feas && benchres.result.cSize < winnerResult->cSize) { + return WORSE_RESULT; + } - /* benchres's certainty range. */ - resultMax = benchres.result; - resultMin = benchres.result; - resultMax.cSpeed *= uncertaintyConstantC; - resultMax.dSpeed *= uncertaintyConstantD; - resultMin.cSpeed /= uncertaintyConstantC; - resultMin.dSpeed /= uncertaintyConstantD; - if (winnerRS > resultScore(resultMax, srcSize, target)) { - return INFEASIBLE_RESULT; - } else { - if(loopDurationC < TIMELOOP_NANOSEC) { - BMK_return_t benchres2 = BMK_benchMemInvertible(srcPtrs, srcSizes, dstPtrs, dstCapacityToSizes, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_compressOnly, BMK_timeMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.cSpeed = benchres2.result.cSpeed; - } - } - if(loopDurationD < TIMELOOP_NANOSEC) { - BMK_return_t benchres2 = BMK_benchMemInvertible((const void* const*)dstPtrs, dstCapacityToSizes, NULL, NULL, nbBlocks, 0, &cParams, dictBuffer, dictBufferSize, ctx, dctx, - BMK_decodeOnly, BMK_timeMode, 1); - if(benchres2.error) { - return ERROR_RESULT; - } else { - benchres.result.dSpeed = benchres2.result.dSpeed; - } - } - *resultPtr = benchres.result; - return (resultScore(benchres.result, srcSize, target) > winnerRS); + /* second run, if first run is too short, gives approximate cSpeed + dSpeed */ + if(loopDurationC < TIMELOOP_NANOSEC / 10) { + BMK_return_t benchres2 = BMK_benchMemInvertible(buf, ctx, 0, &cParams, BMK_compressOnly, BMK_iterMode, 1); + if(benchres2.error) { + return ERROR_RESULT; } + benchres = benchres2; + } + if(loopDurationD < TIMELOOP_NANOSEC / 10) { + BMK_return_t benchres2 = BMK_benchMemInvertible(buf, ctx, 0, &cParams, BMK_decodeOnly, BMK_iterMode, 1); + if(benchres2.error) { + return ERROR_RESULT; + } + benchres.result.dSpeed = benchres2.result.dSpeed; + } *resultPtr = benchres.result; - } else { - return ERROR_RESULT; //BMK error + + /* optimistic assumption of benchres.result */ + resultMax = benchres.result; + resultMax.cSpeed *= uncertaintyConstantC; + resultMax.dSpeed *= uncertaintyConstantD; + + /* disregard infeasible results in feas mode */ + /* disregard if resultMax < winner in infeas mode */ + if((feas && !feasible(resultMax, target)) || + (!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) { + return WORSE_RESULT; + } + + /* Final full run if estimates are unclear */ + if(loopDurationC < TIMELOOP_NANOSEC) { + BMK_return_t benchres2 = BMK_benchMemInvertible(buf, ctx, 0, &cParams, BMK_compressOnly, BMK_timeMode, 1); + if(benchres2.error) { + return ERROR_RESULT; + } + benchres.result.cSpeed = benchres2.result.cSpeed; + } + + if(loopDurationD < TIMELOOP_NANOSEC) { + BMK_return_t benchres2 = BMK_benchMemInvertible(buf, ctx, 0, &cParams, BMK_decodeOnly, BMK_timeMode, 1); + if(benchres2.error) { + return ERROR_RESULT; + } + benchres.result.dSpeed = benchres2.result.dSpeed; + } + + *resultPtr = benchres.result; + + /* compare by resultScore when in infeas */ + /* compare by compareResultLT when in feas */ + if((!feas && (resultScore(benchres.result, buf.srcSize, target) > resultScore(*winnerResult, buf.srcSize, target))) || + (feas && (compareResultLT(*winnerResult, benchres.result, target, buf.srcSize))) ) { + return BETTER_RESULT; + } else { + return WORSE_RESULT; } } /* wrap feasibleBench w/ memotable */ #define INFEASIBLE_THRESHOLD 200 -static int feasibleBenchMemo(BMK_result_t* resultPtr, - const void* srcBuffer, const size_t srcSize, - void* dstBuffer, const size_t dstSize, - void* dictBuffer, const size_t dictSize, - const size_t* fileSizes, const size_t nbFiles, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, - const ZSTD_compressionParameters cParams, - const constraint_t target, - BMK_result_t* winnerResult, U8* memoTable, - const U32* varyParams, const int varyLen) { - - const size_t memind = memoTableInd(&cParams, varyParams, varyLen); - if(memoTable[memind] >= INFEASIBLE_THRESHOLD) { - return INFEASIBLE_RESULT; - } else { - const size_t blockSize = g_blockSize ? g_blockSize : srcSize; - U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles; - const void ** const srcPtrs = (const void** const)malloc(maxNbBlocks * sizeof(void*)); - size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - void ** const dstPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); - size_t* const dstCapacities = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - U32 nbBlocks; - int res; - - if(!srcPtrs || !srcSizes || !dstPtrs || !dstCapacities) { - free(srcPtrs); - free(srcSizes); - free(dstPtrs); - free(dstCapacities); - DISPLAY("Allocation Error\n"); - return ERROR_RESULT; - } - - { - const char* srcPtr = (const char*)srcBuffer; - char* dstPtr = (char*)dstBuffer; - size_t dstSizeRemaining = dstSize; - U32 fileNb; - for (nbBlocks=0, fileNb=0; fileNb dstSizeRemaining) { - DEBUGOUTPUT("Warning: dstSize too small to benchmark completely \n"); - remaining = dstSizeRemaining; - dstSizeRemaining = 0; - } else { - dstSizeRemaining -= remaining; - } - for ( ; nbBlocks= INFEASIBLE_THRESHOLD) { - return INFEASIBLE_RESULT; //see feasibleBenchMemo for concerns - } else { - const size_t blockSize = g_blockSize ? g_blockSize : srcSize; - U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles; - const void ** const srcPtrs = (const void** const)malloc(maxNbBlocks * sizeof(void*)); - size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - void ** const dstPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); - size_t* const dstCapacities = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - U32 nbBlocks; - int res; + int res; - if(!srcPtrs || !srcSizes || !dstPtrs || !dstCapacities) { - free(srcPtrs); - free(srcSizes); - free(dstPtrs); - free(dstCapacities); - DISPLAY("Allocation Error\n"); - return ERROR_RESULT; - } + if(memoTable[memind] >= INFEASIBLE_THRESHOLD) { return WORSE_RESULT; } - { - const char* srcPtr = (const char*)srcBuffer; - char* dstPtr = (char*)dstBuffer; - size_t dstSizeRemaining = dstSize; - U32 fileNb; - for (nbBlocks=0, fileNb=0; fileNb dstSizeRemaining) { - DEBUGOUTPUT("Warning: dstSize too small to benchmark completely \n"); - remaining = dstSizeRemaining; - dstSizeRemaining = 0; - } else { - dstSizeRemaining -= remaining; - } - for ( ; nbBlocks (hopefully) feasible */ - /* when nothing is found, this garbages part 2. */ { - winnerInfo_t bestFeasible1; /* uses feasibleBench Metric */ - - //init these params - bestFeasible1.params = cparam; - bestFeasible1.result.cSpeed = 0; - bestFeasible1.result.dSpeed = 0; - bestFeasible1.result.cMem = (size_t)-1; - bestFeasible1.result.cSize = (size_t)-1; + winnerInfo_t bestFeasible1 = initWinnerInfo(cparam); DISPLAY("Climb Part 1\n"); while(better) { - int i, d; + int i, dist, offset; better = 0; DEBUGOUTPUT("Start\n"); cparam = winnerInfo.params; - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, buf.srcSize); candidateInfo.params = cparam; //all dist-1 targets //if we early end this, we should also randomize the order these are picked. for(i = 0; i < varLen; i++) { - paramVaryOnce(varArray[i], 1, &candidateInfo.params); /* +1 */ - candidateInfo.params = sanitizeParams(candidateInfo.params); - //evaluate - if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { - //if(cParamValid(candidateInfo.params)) { - int res = infeasibleBenchMemo(&candidateInfo.result, - srcBuffer, srcSize, - dstBuffer, dstSize, - dictBuffer, dictSize, - fileSizes, nbFiles, - ctx, dctx, - candidateInfo.params, target, &winnerInfo.result, memoTable, - varArray, varLen); - if(res == FEASIBLE_RESULT) { /* synonymous with better when called w/ infeasibleBM */ - winnerInfo = candidateInfo; - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); - better = 1; - if(feasible(candidateInfo.result, target) && objective_lt(bestFeasible1.result, winnerInfo.result)) { - bestFeasible1 = winnerInfo; - } - } - } - candidateInfo.params = cparam; - paramVaryOnce(varArray[i], -1, &candidateInfo.params); /* -1 */ - candidateInfo.params = sanitizeParams(candidateInfo.params); - //evaluate - if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { - //if(cParamValid(candidateInfo.params)) { - int res = infeasibleBenchMemo(&candidateInfo.result, - srcBuffer, srcSize, - dstBuffer, dstSize, - dictBuffer, dictSize, - fileSizes, nbFiles, - ctx, dctx, - candidateInfo.params, target, &winnerInfo.result, memoTable, - varArray, varLen); - if(res == FEASIBLE_RESULT) { - winnerInfo = candidateInfo; - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); - better = 1; - if(feasible(candidateInfo.result, target) && objective_lt(bestFeasible1.result, winnerInfo.result)) { - bestFeasible1 = winnerInfo; + for(offset = -1; offset <= 1; offset += 2) { + candidateInfo.params = cparam; + paramVaryOnce(varArray[i], offset, &candidateInfo.params); /* +1 */ + candidateInfo.params = sanitizeParams(candidateInfo.params); + //evaluate + if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { + int res = benchMemo(&candidateInfo.result, + buf, ctx, + candidateInfo.params, target, &winnerInfo.result, memoTable, + varArray, varLen, feas); + if(res == BETTER_RESULT) { /* synonymous with better when called w/ infeasibleBM */ + winnerInfo = candidateInfo; + BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, buf.srcSize); + better = 1; + if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) { + bestFeasible1 = winnerInfo; + } } } } @@ -1966,235 +1533,255 @@ static winnerInfo_t climbOnce(const constraint_t target, if(better) { continue; } - //if 'better' enough, skip further parameter search, center there? - //possible improvement - guide direction here w/ knowledge rather than completely random variation. - for(d = 2; d < varLen + 2; d++) { /* varLen is # dimensions */ + + for(dist = 2; dist < varLen + 2; dist++) { /* varLen is # dimensions */ for(i = 0; i < 2 * varLen + 2; i++) { int res; candidateInfo.params = cparam; /* param error checking already done here */ - paramVariation(&candidateInfo.params, varArray, varLen, d); - res = infeasibleBenchMemo(&candidateInfo.result, - srcBuffer, srcSize, - dstBuffer, dstSize, - dictBuffer, dictSize, - fileSizes, nbFiles, - ctx, dctx, - candidateInfo.params, target, &winnerInfo.result, memoTable, - varArray, varLen); - if(res == FEASIBLE_RESULT) { /* synonymous with better in this case*/ + paramVariation(&candidateInfo.params, varArray, varLen, dist); + res = benchMemo(&candidateInfo.result, + buf, ctx, + candidateInfo.params, target, &winnerInfo.result, memoTable, + varArray, varLen, feas); + if(res == BETTER_RESULT) { /* synonymous with better in this case*/ winnerInfo = candidateInfo; - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, buf.srcSize); better = 1; - if(feasible(candidateInfo.result, target) && objective_lt(bestFeasible1.result, winnerInfo.result)) { + if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) { bestFeasible1 = winnerInfo; } } } if(better) { - continue; + break; } } - //bias to test previous delta? - //change cparam -> candidate before restart + + if(!better) { //infeas -> feas -> stop. + if(feas) { return winnerInfo; } + + feas = 1; + better = 1; + winnerInfo = bestFeasible1; /* note with change, bestFeasible may not necessarily be feasible, but if one has been benchmarked, it will be. */ + DISPLAY("Climb Part 2\n"); + } } winnerInfo = bestFeasible1; } - //break out if no feasible. - if(winnerInfo.result.cMem == (U32)-1) { - DEBUGOUTPUT("No Feasible Found\n"); - return winnerInfo; - } - DISPLAY("Climb Part 2\n"); - - better = 1; - /* feasible -> best feasible (hopefully) */ - { - while(better) { - int i, d; - better = 0; - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); - //all dist-1 targets - cparam = winnerInfo.params; - candidateInfo.params = cparam; - for(i = 0; i < varLen; i++) { - paramVaryOnce(varArray[i], 1, &candidateInfo.params); - candidateInfo.params = sanitizeParams(candidateInfo.params); - - //evaluate - if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { - //if(cParamValid(candidateInfo.params)) { - int res = feasibleBenchMemo(&candidateInfo.result, - srcBuffer, srcSize, - dstBuffer, dstSize, - dictBuffer, dictSize, - fileSizes, nbFiles, - ctx, dctx, - candidateInfo.params, target, &winnerInfo.result, memoTable, - varArray, varLen); - if(res == FEASIBLE_RESULT) { - winnerInfo = candidateInfo; - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); - better = 1; - } - } - candidateInfo.params = cparam; - paramVaryOnce(varArray[i], -1, &candidateInfo.params); - candidateInfo.params = sanitizeParams(candidateInfo.params); - //evaluate - if(!ZSTD_isError(ZSTD_checkCParams(candidateInfo.params))) { - int res = feasibleBenchMemo(&candidateInfo.result, - srcBuffer, srcSize, - dstBuffer, dstSize, - dictBuffer, dictSize, - fileSizes, nbFiles, - ctx, dctx, - candidateInfo.params, target, &winnerInfo.result, memoTable, - varArray, varLen); - if(res == FEASIBLE_RESULT) { - winnerInfo = candidateInfo; - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); - better = 1; - } - } - } - //if 'better' enough, skip further parameter search, center there? - //possible improvement - guide direction here w/ knowledge rather than completely random variation. - for(d = 2; d < varLen + 2; d++) { /* varLen is # dimensions */ - for(i = 0; i < 2 * varLen + 2; i++) { - int res; - candidateInfo.params = cparam; - /* param error checking already done here */ - paramVariation(&candidateInfo.params, varArray, varLen, d); //info candidateInfo.params is garbage, this is too. - res = feasibleBenchMemo(&candidateInfo.result, - srcBuffer, srcSize, - dstBuffer, dstSize, - dictBuffer, dictSize, - fileSizes, nbFiles, - ctx, dctx, - candidateInfo.params, target, &winnerInfo.result, memoTable, - varArray, varLen); - if(res == FEASIBLE_RESULT) { - winnerInfo = candidateInfo; - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); - better = 1; - } - } - if(better) { - continue; - } - } - //bias to test previous delta? - //change cparam -> candidate before restart - } - } - return winnerInfo; } //optimizeForSize but with fixed strategy //place to configure/filter out strategy specific parameters. -//need args for all buffers and parameter stuff -//sanitization here. //flexible parameters: iterations of (failed?) climbing (or if we do non-random, maybe this is when everything is close to visitied) //weight more on visit for bad results, less on good results/more on later results / ones with more failures. //allocate memoTable here. //only real use for paramTarget is to get the fixed values, right? +//maybe allow giving it a first init? static winnerInfo_t optimizeFixedStrategy( - const void* srcBuffer, const size_t srcSize, - void* dstBuffer, const size_t dstSize, - void* dictBuffer, const size_t dictSize, - const size_t* fileSizes, const size_t nbFiles, - const constraint_t target, ZSTD_compressionParameters paramTarget, - const ZSTD_strategy strat, const U32* varArray, const int varLen, U8* memoTable) { + buffers_t buf, contexts_t ctx, + const constraint_t target, ZSTD_compressionParameters paramTarget, + const ZSTD_strategy strat, + const varInds_t* varArray, const int varLen, + U8* memoTable, const int tries) { int i = 0; - U32* varNew = malloc(sizeof(U32) * varLen); - int varLenNew = sanitizeVarArray(varLen, varArray, varNew, strat); + varInds_t varNew[NUM_PARAMS]; + int varLenNew = sanitizeVarArray(varNew, varLen, varArray, strat); ZSTD_compressionParameters init; - ZSTD_CCtx* ctx = ZSTD_createCCtx(); - ZSTD_DCtx* dctx = ZSTD_createDCtx(); winnerInfo_t winnerInfo, candidateInfo; - winnerInfo.result.cSpeed = 0; - winnerInfo.result.dSpeed = 0; - winnerInfo.result.cMem = (size_t)(-1LL); - winnerInfo.result.cSize = (size_t)(-1LL); + winnerInfo = initWinnerInfo(emptyParams()); /* so climb is given the right fixed strategy */ paramTarget.strategy = strat; /* to pass ZSTD_checkCParams */ - //needs to happen after memoTableInit as that assumes 0 = undefined. cParamZeroMin(¶mTarget); init = paramTarget; - - if(!ctx || !dctx || !memoTable || !varNew) { - DISPLAY("NOT ENOUGH MEMORY ! ! ! \n"); - goto _cleanUp; - } - - while(i < 10) { //make i adjustable (user input?) depending on how much time they have. + while(i < tries) { //make i adjustable (user input?) depending on how much time they have. DEBUGOUTPUT("Restart\n"); //look into improving this to maximize distance from searched infeasible stuff / towards promising regions? randomConstrainedParams(&init, varNew, varLenNew, memoTable); - candidateInfo = climbOnce(target, varNew, varLenNew, memoTable, srcBuffer, srcSize, dstBuffer, dstSize, dictBuffer, dictSize, fileSizes, nbFiles, ctx, dctx, init); - if(objective_lt(winnerInfo.result, candidateInfo.result)) { + candidateInfo = climbOnce(target, varNew, varLenNew, memoTable, buf, ctx, init); + if(compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) { winnerInfo = candidateInfo; - DISPLAY("New Winner: "); - BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, srcSize); + BMK_printWinner(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, buf.srcSize); i = 0; } i++; } - -_cleanUp: - ZSTD_freeCCtx(ctx); - ZSTD_freeDCtx(dctx); - free(varNew); return winnerInfo; } -static int BMK_loadFiles(void* buffer, size_t bufferSize, - size_t* fileSizes, const char* const * const fileNamesTable, - unsigned nbFiles) +static void freeBuffers(buffers_t b) { + if(b.srcPtrs != NULL) { + free(b.srcPtrs[0]); + } + free(b.srcPtrs); + free(b.srcSizes); + + if(b.dstPtrs != NULL) { + free(b.dstPtrs[0]); + } + free(b.dstPtrs); + free(b.dstCapacities); + free(b.dstSizes); + + if(b.resPtrs != NULL) { + free(b.resPtrs[0]); + } + free(b.resPtrs); +} + +/* allocates buffer's arguments. returns success / failuere */ +static int initBuffers(buffers_t* buff, const char* const * const fileNamesTable, + size_t nbFiles) { - size_t pos = 0, totalSize = 0; - unsigned n; - for (n=0; nsrcPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); + buff->srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + buff->dstPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); + buff->dstCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + buff->dstSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + buff->resPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); + buff->resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + if(!buff->srcPtrs || !buff->srcSizes || !buff->dstPtrs || !buff->dstCapacities || !buff->dstCapacities || !buff->resPtrs || !buff->resSizes) { + DISPLAY("alloc error\n"); + freeBuffers(*buff); + return 1; + } + + buff->srcPtrs[0] = malloc(benchedSize); + buff->dstPtrs[0] = malloc(ZSTD_compressBound(benchedSize) + (maxNbBlocks * 1024)); + buff->resPtrs[0] = malloc(benchedSize); + + if(!buff->srcPtrs[0] || !buff->dstPtrs[0] || !buff->resPtrs[0]) { + DISPLAY("alloc error\n"); + freeBuffers(*buff); + return 1; + } + + for(n = 0; n < nbFiles; n++) { FILE* f; U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); if (UTIL_isDirectory(fileNamesTable[n])) { DISPLAY("Ignoring %s directory... \n", fileNamesTable[n]); - fileSizes[n] = 0; continue; } if (fileSize == UTIL_FILESIZE_UNKNOWN) { DISPLAY("Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]); - fileSizes[n] = 0; continue; } f = fopen(fileNamesTable[n], "rb"); if (f==NULL) { - DISPLAY("impossible to open file %s", fileNamesTable[n]); + DISPLAY("impossible to open file %s\n", fileNamesTable[n]); + freeBuffers(*buff); + fclose(f); return 10; } + DISPLAY("Loading %s... \r", fileNamesTable[n]); - if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ - { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); + + if (fileSize + pos > benchedSize) fileSize = benchedSize - pos, nbFiles=n; /* buffer too small - stop after this file */ + { + char* buffer = (char*)(buff->srcPtrs[0]); + size_t const readSize = fread((buffer)+pos, 1, (size_t)fileSize, f); + size_t blocked = 0; + while(blocked < readSize) { + buff->srcPtrs[blockNb] = (buffer) + (pos + blocked); + buff->srcSizes[blockNb] = blockSize; + blocked += blockSize; + blockNb++; + } + if(readSize > 0) { buff->srcSizes[blockNb - 1] = ((readSize - 1) % blockSize) + 1; } + if (readSize != (size_t)fileSize) { DISPLAY("could not read %s", fileNamesTable[n]); - return 11; + freeBuffers(*buff); + fclose(f); + return 1; } - pos += readSize; } - fileSizes[n] = (size_t)fileSize; - totalSize += (size_t)fileSize; + + pos += readSize; + + } fclose(f); } - if (totalSize == 0) { DISPLAY("\nno data to bench\n"); return 12; } + buff->dstCapacities[0] = ZSTD_compressBound(buff->srcSizes[0]); + buff->dstSizes[0] = buff->dstCapacities[0]; + buff->resSizes[0] = buff->srcSizes[0]; + + for(n = 1; n < blockNb; n++) { + buff->dstPtrs[n] = ((char*)buff->dstPtrs[n-1]) + buff->dstCapacities[n-1]; + buff->resPtrs[n] = ((char*)buff->resPtrs[n-1]) + buff->resSizes[n-1]; + buff->dstCapacities[n] = ZSTD_compressBound(buff->srcSizes[n]); + buff->dstSizes[n] = buff->dstCapacities[n]; + buff->resSizes[n] = buff->srcSizes[n]; + } + buff->srcSize = pos; + buff->nbBlocks = blockNb; + + if (pos == 0) { DISPLAY("\nno data to bench\n"); return 1; } + + return 0; +} + +static void freeContexts(contexts_t ctx) { + free(ctx.dictBuffer); + ZSTD_freeCCtx(ctx.cctx); + ZSTD_freeDCtx(ctx.dctx); +} + +static int initContexts(contexts_t* ctx, const char* dictFileName) { + FILE* f; + size_t readSize; + ctx->cctx = ZSTD_createCCtx(); + ctx->dctx = ZSTD_createDCtx(); + if(dictFileName == NULL) { + ctx->dictSize = 0; + ctx->dictBuffer = NULL; + return 0; + } + ctx->dictSize = UTIL_getFileSize(dictFileName); + ctx->dictBuffer = malloc(ctx->dictSize); + + f = fopen(dictFileName, "rb"); + + if(!f) { + DISPLAY("unable to open file\n"); + fclose(f); + freeContexts(*ctx); + return 1; + } + + if(ctx->dictSize > 64 MB || !(ctx->dictBuffer)) { + DISPLAY("dictionary too large\n"); + fclose(f); + freeContexts(*ctx); + return 1; + } + readSize = fread(ctx->dictBuffer, 1, ctx->dictSize, f); + if(readSize != ctx->dictSize) { + DISPLAY("unable to read file\n"); + fclose(f); + freeContexts(*ctx); + return 1; + } return 0; } @@ -2228,107 +1815,98 @@ static int nextStrategy(const int currentStrategy, const int bestStrategy) { } } +static ZSTD_compressionParameters maskParams(ZSTD_compressionParameters base, ZSTD_compressionParameters mask) { + base.windowLog = mask.windowLog ? mask.windowLog : base.windowLog; + base.chainLog = mask.chainLog ? mask.chainLog : base.chainLog; + base.hashLog = mask.hashLog ? mask.hashLog : base.hashLog; + base.searchLog = mask.searchLog ? mask.searchLog : base.searchLog; + base.searchLength = mask.searchLength ? mask.searchLength : base.searchLength; + base.targetLength = mask.targetLength ? mask.targetLength : base.targetLength; + base.strategy = mask.strategy ? mask.strategy : base.strategy; + return base; +} + +#define MAX_TRIES 8 //optimize fixed strategy. static int optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles, const char* dictFileName, constraint_t target, ZSTD_compressionParameters paramTarget, int cLevel) { - size_t benchedSize; - void* origBuff = NULL; - void* dictBuffer = NULL; - size_t dictBufferSize = 0; - U32 varArray [NUM_PARAMS]; + varInds_t varArray [NUM_PARAMS]; int ret = 0; - size_t* fileSizes = calloc(sizeof(size_t),nbFiles); const int varLen = variableParams(paramTarget, varArray); + winnerInfo_t winner = initWinnerInfo(emptyParams()); U8** allMT = NULL; - g_winner.result.cSize = (size_t)-1; + size_t k; + size_t maxBlockSize = 0; + contexts_t ctx; + buffers_t buf; + /* Init */ if(!cParamValid(paramTarget)) { - return 10; + return 1; } /* load dictionary*/ - if (dictFileName != NULL) { - U64 const dictFileSize = UTIL_getFileSize(dictFileName); - if (dictFileSize > 64 MB) { - DISPLAY("dictionary file %s too large", dictFileName); - ret = 10; - goto _cleanUp; - } - dictBufferSize = (size_t)dictFileSize; - dictBuffer = malloc(dictBufferSize); - if (dictBuffer==NULL) { - DISPLAY("not enough memory for dictionary (%u bytes)", - (U32)dictBufferSize); - ret = 11; - goto _cleanUp; - - } - - { - int errorCode = BMK_loadFiles(dictBuffer, dictBufferSize, &dictBufferSize, &dictFileName, 1); - if(errorCode) { - ret = errorCode; - goto _cleanUp; - } - } + if(initBuffers(&buf, fileNamesTable, nbFiles)) { + DISPLAY("unable to load files\n"); + return 1; + } + + if(initContexts(&ctx, dictFileName)) { + DISPLAY("unable to load dictionary\n"); + freeBuffers(buf); + return 2; } - /* Fill input buffer */ if(nbFiles == 1) { DISPLAY("Loading %s... \r", fileNamesTable[0]); } else { DISPLAY("Loading %lu Files... \r", (unsigned long)nbFiles); } - { - U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles); - int ec; - unsigned i; - benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; - origBuff = malloc(benchedSize); - if(!origBuff || !fileSizes) { - DISPLAY("Not enough memory for stuff\n"); - ret = 1; - goto _cleanUp; - } - ec = BMK_loadFiles(origBuff, benchedSize, fileSizes, fileNamesTable, (U32)nbFiles); - if(ec) { - DISPLAY("Error Loading Files"); - ret = ec; - goto _cleanUp; - } - benchedSize = 0; - for(i = 0; i < nbFiles; i++) { - benchedSize += fileSizes[i]; - } - origBuff = realloc(origBuff, benchedSize); + for(k = 0; k < buf.nbBlocks; k++) { + maxBlockSize = MAX(buf.srcSizes[k], maxBlockSize); } - allMT = memoTableInitAll(paramTarget, target, varArray, varLen, benchedSize); + /* if strategy is fixed, only init that part of memotable */ + if(paramTarget.strategy) { + varInds_t varNew[NUM_PARAMS]; + int varLenNew = sanitizeVarArray(varNew, varLen, varArray, paramTarget.strategy); + allMT = calloc(sizeof(U8), (ZSTD_btultra + 1)); + if(allMT == NULL) { + ret = 57; + goto _cleanUp; + } + + allMT[paramTarget.strategy] = malloc(sizeof(U8) * memoTableLen(varNew, varLenNew)); + + if(allMT[paramTarget.strategy] == NULL) { + ret = 58; + goto _cleanUp; + } + + memoTableInit(allMT[paramTarget.strategy], paramTarget, target, varNew, varLenNew, maxBlockSize); + } else { + allMT = memoTableInitAll(paramTarget, target, varArray, varLen, maxBlockSize); + } + + if(!allMT) { + DISPLAY("MemoTable Init Error\n"); ret = 2; goto _cleanUp; } if(cLevel) { - BMK_result_t candidate; - const size_t blockSize = g_blockSize ? g_blockSize : benchedSize; - ZSTD_CCtx* const ctx = ZSTD_createCCtx(); - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - ZSTD_compressionParameters const CParams = ZSTD_getCParams(cLevel, blockSize, dictBufferSize); - if(BMK_benchParam(&candidate, origBuff, benchedSize, fileSizes, (U32)nbFiles, ctx, dctx, CParams)) { - ZSTD_freeCCtx(ctx); - ZSTD_freeDCtx(dctx); + winner.params = ZSTD_getCParams(cLevel, maxBlockSize, ctx.dictSize); + if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) { ret = 3; goto _cleanUp; } - target.cSpeed = (U32)candidate.cSpeed; //Maybe have a small bit of slack here, like x.99? - BMK_printWinner(stdout, cLevel, candidate, CParams, benchedSize); - - ZSTD_freeCCtx(ctx); - ZSTD_freeDCtx(dctx); + target.cSpeed = (U32)winner.result.cSpeed; //Maybe have a small bit of slack here, like x.99? + g_targetConstraints = target; + BMK_printWinner(stdout, cLevel, winner.result, winner.params, buf.srcSize); } g_targetConstraints = target; @@ -2340,106 +1918,40 @@ static int optimizeForSize(const char* const * const fileNamesTable, const size_ } else { DISPLAY("optimizing for %lu Files", (unsigned long)nbFiles); } - if(target.cSpeed != 0) { DISPLAY(" - limit compression speed %u MB/s", target.cSpeed / 1000000); } - if(target.dSpeed != 0) { DISPLAY(" - limit decompression speed %u MB/s", target.dSpeed / 1000000); } - if(target.cMem != (U32)-1) { DISPLAY(" - limit memory %u MB", target.cMem / 1000000); } + if(target.cSpeed != 0) { DISPLAY(" - limit compression speed %u MB/s", target.cSpeed >> 20); } + if(target.dSpeed != 0) { DISPLAY(" - limit decompression speed %u MB/s", target.dSpeed >> 20); } + if(target.cMem != (U32)-1) { DISPLAY(" - limit memory %u MB", target.cMem >> 20); } + DISPLAY("\n"); findClockGranularity(); - { ZSTD_CCtx* const ctx = ZSTD_createCCtx(); - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - winnerInfo_t winner; - U32 varNew[NUM_PARAMS]; - const size_t blockSize = g_blockSize ? g_blockSize : benchedSize; - U32 const maxNbBlocks = (U32) ((benchedSize + (blockSize-1)) / blockSize) + 1; - const size_t maxCompressedSize = ZSTD_compressBound(benchedSize) + (maxNbBlocks * 1024); - void* compressedBuffer = malloc(maxCompressedSize); - - /* init */ - if (ctx==NULL) { DISPLAY("\n ZSTD_createCCtx error \n"); free(origBuff); return 14;} - if(compressedBuffer==NULL) { DISPLAY("\n Allocation Error \n"); free(origBuff); free(ctx); return 15; } - memset(&winner, 0, sizeof(winner)); - winner.result.cSize = (size_t)(-1); - + { + varInds_t varNew[NUM_PARAMS]; /* find best solution from default params */ { /* strategy selection */ const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); DEBUGOUTPUT("Strategy Selection\n"); - if(varLen == NUM_PARAMS && paramTarget.strategy == 0) { /* no variable based constraints */ + if(paramTarget.strategy == 0) { /* no variable based constraints */ BMK_result_t candidate; - int feas = 0, i; + int i; for (i=1; i<=maxSeeds; i++) { - ZSTD_compressionParameters const CParams = ZSTD_getCParams(i, blockSize, dictBufferSize); - int ec = BMK_benchParam(&candidate, origBuff, benchedSize, fileSizes, (U32)nbFiles, ctx, dctx, CParams); - BMK_printWinner(stdout, i, candidate, CParams, benchedSize); - - if(!ec) { - if(feas) { - if(feasible(candidate, relaxTarget(target)) && objective_lt(winner.result, candidate)) { - winner.result = candidate; - winner.params = CParams; - } - } else { - if(feasible(candidate, relaxTarget(target))) { - feas = 1; - winner.result = candidate; - winner.params = CParams; - - } else { - if(resultScore(candidate, benchedSize, target) > resultScore(winner.result, benchedSize, target)) { - winner.result = candidate; - winner.params = CParams; - } - } - } - } - } //best, -1, +1, ..., - - } else if (paramTarget.strategy == 0) { //constrained - int feas = 0, i, j; - for(j = 1; j < 10; j++) { - for(i = 1; i <= maxSeeds; i++) { - int varLenNew = sanitizeVarArray(varLen, varArray, varNew, i); - ZSTD_compressionParameters candidateParams = paramTarget; - BMK_result_t candidate; - int ec; - randomConstrainedParams(&candidateParams, varNew, varLenNew, allMT[i]); - cParamZeroMin(&candidateParams); - candidateParams = sanitizeParams(candidateParams); - ec = BMK_benchParam(&candidate, origBuff, benchedSize, fileSizes, (U32)nbFiles, ctx, dctx, candidateParams); - - if(!ec) { - if(feas) { - if(feasible(candidate, relaxTarget(target)) && objective_lt(winner.result, candidate)) { - winner.result = candidate; - winner.params = candidateParams; - BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize); - } - } else { - if(feasible(candidate, relaxTarget(target))) { - feas = 1; - winner.result = candidate; - winner.params = candidateParams; - BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize); - - } else { - if(resultScore(candidate, benchedSize, target) > resultScore(winner.result, benchedSize, target)) { - winner.result = candidate; - winner.params = candidateParams; - BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize); - } - } - } - } + int ec; + ZSTD_compressionParameters CParams = ZSTD_getCParams(i, maxBlockSize, ctx.dictSize); + CParams = maskParams(CParams, paramTarget); + ec = BMK_benchParam(&candidate, buf, ctx, CParams); + BMK_printWinner(stdout, i, candidate, CParams, buf.srcSize); + if(!ec && compareResultLT(winner.result, candidate, relaxTarget(target), buf.srcSize)) { + winner.result = candidate; + winner.params = CParams; } } } } - BMK_printWinner(stdout, CUSTOM_LEVEL, winner.result, winner.params, benchedSize); + BMK_printWinner(stdout, CUSTOM_LEVEL, winner.result, winner.params, buf.srcSize); BMK_translateAdvancedParams(winner.params); DEBUGOUTPUT("Real Opt\n"); /* start 'real' tests */ @@ -2447,55 +1959,51 @@ static int optimizeForSize(const char* const * const fileNamesTable, const size_ int bestStrategy = (int)winner.params.strategy; if(paramTarget.strategy == 0) { int st = (int)winner.params.strategy; + int tries = MAX_TRIES; { - int varLenNew = sanitizeVarArray(varLen, varArray, varNew, st); + int varLenNew = sanitizeVarArray(varNew, varLen, varArray, st); winnerInfo_t w1 = climbOnce(target, varNew, varLenNew, allMT[st], - origBuff, benchedSize, compressedBuffer, maxCompressedSize, dictBuffer, dictBufferSize, - fileSizes, nbFiles, ctx, dctx, winner.params); - if(objective_lt(winner.result, w1.result)) { + buf, ctx, winner.params); + if(compareResultLT(winner.result, w1.result, target, buf.srcSize)) { winner = w1; } } - while(st) { - winnerInfo_t wc = optimizeFixedStrategy(origBuff, benchedSize, compressedBuffer, maxCompressedSize, dictBuffer, dictBufferSize, fileSizes, nbFiles, - target, paramTarget, st, varArray, varLen, allMT[st]); + while(st && tries) { + winnerInfo_t wc = optimizeFixedStrategy(buf, ctx, target, paramTarget, + st, varArray, varLen, allMT[st], tries); DEBUGOUTPUT("StratNum %d\n", st); - if(objective_lt(winner.result, wc.result)) { + if(compareResultLT(winner.result, wc.result, target, buf.srcSize)) { winner = wc; } //We could double back to increase search of 'better' strategies st = nextStrategy(st, bestStrategy); + tries--; } } else { - winner = optimizeFixedStrategy(origBuff, benchedSize, compressedBuffer, maxCompressedSize, dictBuffer, dictBufferSize, fileSizes, nbFiles, - target, paramTarget, paramTarget.strategy, varArray, varLen, allMT[paramTarget.strategy]); + winner = optimizeFixedStrategy(buf, ctx, target, paramTarget, paramTarget.strategy, + varArray, varLen, allMT[paramTarget.strategy], 10); } } /* no solution found */ if(winner.result.cSize == (size_t)-1) { + ret = 1; DISPLAY("No feasible solution found\n"); - return 1; + goto _cleanUp; } /* end summary */ - BMK_printWinner(stdout, CUSTOM_LEVEL, winner.result, winner.params, benchedSize); + BMK_printWinner(stdout, CUSTOM_LEVEL, winner.result, winner.params, buf.srcSize); BMK_translateAdvancedParams(winner.params); DISPLAY("grillParams size - optimizer completed \n"); - - /* clean up*/ - ZSTD_freeCCtx(ctx); - ZSTD_freeDCtx(dctx); - } _cleanUp: - free(fileSizes); - free(dictBuffer); + freeContexts(ctx); + freeBuffers(buf); memoTableFreeAll(allMT); - free(origBuff); return ret; } @@ -2546,16 +2054,16 @@ static int usage(const char* exename) static int usage_advanced(void) { DISPLAY( "\nAdvanced options :\n"); - DISPLAY( " -T# : set level 1 speed objective \n"); - DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n"); - DISPLAY( " -i# : iteration loops [1-9](default : %i) \n", NBLOOPS); - DISPLAY( " -O# : find Optimized parameters for # MB/s compression speed (default : 0) \n"); - DISPLAY( " -S : Single run \n"); - DISPLAY( " --zstd : Single run, parameter selection same as zstdcli \n"); - DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100); - DISPLAY( " -t# : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n", (U32)g_grillDuration_s, g_grillDuration_s / 3600); - DISPLAY( " -v : Prints Benchmarking output\n"); - DISPLAY( " -D : Next argument dictionary file\n"); + DISPLAY( " -T# : set level 1 speed objective \n"); + DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n"); + DISPLAY( " -i# : iteration loops (default : %i) \n", NBLOOPS); + DISPLAY( " --optimize= : same as -O with more verbose syntax (see README.md)\n"); + DISPLAY( " -S : Single run \n"); + DISPLAY( " --zstd : Single run, parameter selection same as zstdcli \n"); + DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100); + DISPLAY( " -t# : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n", (U32)g_grillDuration_s, g_grillDuration_s / 3600); + DISPLAY( " -v : Prints Benchmarking output\n"); + DISPLAY( " -D : Next argument dictionary file\n"); return 0; } @@ -2572,8 +2080,8 @@ int main(int argc, const char** argv) filenamesStart=0, result; const char* exename=argv[0]; - const char* input_filename = 0; - const char* dictFileName = 0; + const char* input_filename = NULL; + const char* dictFileName = NULL; U32 optimizer = 0; U32 main_pause = 0; int optimizerCLevel = 0; @@ -2591,6 +2099,9 @@ int main(int argc, const char** argv) for(i=1; i