From cc6539f4b9019f2bb025d5828c741c9c4993e57b Mon Sep 17 00:00:00 2001 From: George Lu Date: Mon, 11 Jun 2018 10:59:05 -0400 Subject: [PATCH 01/10] Requested changes Remove g_displaylevel/setNotificationLevel function Add extern "C" Remove averaging Reorder arguments --- tests/paramgrill.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/paramgrill.c b/tests/paramgrill.c index db45220c3..2d7e52a43 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -170,7 +170,6 @@ BMK_benchParam(BMK_result_t* resultPtr, ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, const ZSTD_compressionParameters cParams) { - BMK_return_t res = BMK_benchMem(srcBuffer,srcSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File"); *resultPtr = res.result; return res.errorCode; From 20f4f3237911288d7e1af7e61dc7d8a5ab008cda Mon Sep 17 00:00:00 2001 From: George Lu Date: Tue, 12 Jun 2018 15:54:43 -0400 Subject: [PATCH 02/10] Add to bench -Remove global variables -Remove gv setting functions -Add advancedParams struct -Add defaultAdvancedParams(); -Change return type of bench Files -Change cli to use new interface -Changed error returns to own struct value -Change default compression benchmark to use decompress_generic -Add CustomBench function -Add Documentation for new functions --- programs/bench.c | 916 ++++++++++++++++++++++++++++----------------- programs/bench.h | 137 +++++-- programs/zstdcli.c | 27 +- tests/paramgrill.c | 4 +- 4 files changed, 692 insertions(+), 392 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 09697d1fe..7b9ea8218 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -64,9 +64,10 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); +//TODO: remove this gv as well +//Only used in Synthetic test. Separate? static U32 g_compressibilityDefault = 50; - /* ************************************* * console display ***************************************/ @@ -90,88 +91,51 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; # define DEBUG 0 #endif #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } -#define EXM_THROW(error, ...) { \ + +#define EXM_THROW_INT(errorNum, ...) { \ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ - DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, "Error %i : ", errorNum); \ DISPLAYLEVEL(1, __VA_ARGS__); \ DISPLAYLEVEL(1, " \n"); \ - exit(error); \ + return errorNum; \ } +#define EXM_THROW(errorNum, retType, ...) { \ + retType r; \ + memset(&r, 0, sizeof(retType)); \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", errorNum); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + r.error = errorNum; \ + return r; \ +} /* ************************************* * Benchmark Parameters ***************************************/ -static int g_additionalParam = 0; -static U32 g_decodeOnly = 0; - -void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; } - - -//TODO : Deal with DISPLAYLEVEL for all these set functions - -static U32 g_nbSeconds = BMK_TIMETEST_DEFAULT_S; - -void BMK_setNbSeconds(unsigned nbSeconds) -{ - g_nbSeconds = nbSeconds; - DISPLAY("- test >= %u seconds per compression / decompression - \n", g_nbSeconds); -} - -static size_t g_blockSize = 0; - -void BMK_setBlockSize(size_t blockSize) -{ - g_blockSize = blockSize; - if (g_blockSize) DISPLAY("using blocks of size %u KB \n", (U32)(blockSize>>10)); -} - -void BMK_setDecodeOnlyMode(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); } - -static U32 g_nbWorkers = 0; - -void BMK_setNbWorkers(unsigned nbWorkers) { -#ifndef ZSTD_MULTITHREAD - if (nbWorkers > 0) DISPLAY("Note : multi-threading is disabled \n"); -#endif - g_nbWorkers = nbWorkers; -} - -static U32 g_realTime = 0; -void BMK_setRealTime(unsigned priority) { - g_realTime = (priority>0); -} - -static U32 g_separateFiles = 0; -void BMK_setSeparateFiles(unsigned separate) { - g_separateFiles = (separate>0); -} - -static U32 g_ldmFlag = 0; -void BMK_setLdmFlag(unsigned ldmFlag) { - g_ldmFlag = ldmFlag; -} - -static U32 g_ldmMinMatch = 0; -void BMK_setLdmMinMatch(unsigned ldmMinMatch) { - g_ldmMinMatch = ldmMinMatch; -} - -static U32 g_ldmHashLog = 0; -void BMK_setLdmHashLog(unsigned ldmHashLog) { - g_ldmHashLog = ldmHashLog; -} #define BMK_LDM_PARAM_NOTSET 9999 -static U32 g_ldmBucketSizeLog = BMK_LDM_PARAM_NOTSET; -void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) { - g_ldmBucketSizeLog = ldmBucketSizeLog; + +BMK_advancedParams_t BMK_defaultAdvancedParams(void) { + BMK_advancedParams_t res = { + 0, /* mode */ + 0, /* nbCycles */ + BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ + 0, /* blockSize */ + 0, /* nbWorkers */ + 0, /* realTime */ + 1, /* separateFiles */ + 0, /* additionalParam */ + 0, /* ldmFlag */ + 0, /* ldmMinMatch */ + 0, /* ldmHashLog */ + BMK_LDM_PARAM_NOTSET, /* ldmBuckSizeLog */ + BMK_LDM_PARAM_NOTSET /* ldmHashEveryLog */ + }; + return res; } -static U32 g_ldmHashEveryLog = BMK_LDM_PARAM_NOTSET; -void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) { - g_ldmHashEveryLog = ldmHashEveryLog; -} /* ******************************************************** * Bench functions @@ -191,20 +155,264 @@ typedef struct { #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) -BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, +static void BMK_initCCtx(ZSTD_CCtx* ctx, + const void* dictBuffer, size_t dictBufferSize, int cLevel, + const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) { + if (adv->nbWorkers==1) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, 0); + } else { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, adv->nbWorkers); + } + ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, adv->ldmFlag); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, adv->ldmMinMatch); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, adv->ldmHashLog); + if (adv->ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, adv->ldmBucketSizeLog); + } + if (adv->ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, adv->ldmHashEveryLog); + } + ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy); + ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize); +} + + +static void BMK_initDCtx(ZSTD_DCtx* dctx, + const void* dictBuffer, size_t dictBufferSize) { + ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize); +} + +typedef struct { + ZSTD_CCtx* ctx; + const void* dictBuffer; + size_t dictBufferSize; + int cLevel; + const ZSTD_compressionParameters* comprParams; + const BMK_advancedParams_t* adv; +} BMK_initCCtxArgs; + +static size_t local_initCCtx(void* payload) { + BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload; + BMK_initCCtx(ag->ctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv); + return 0; +} + +typedef struct { + ZSTD_DCtx* dctx; + const void* dictBuffer; + size_t dictBufferSize; +} BMK_initDCtxArgs; + +static size_t local_initDCtx(void* payload) { + BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload; + BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize); + return 0; +} + +/* additional argument is just the context */ +static size_t local_defaultCompress( + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) { + size_t moreToFlush = 1; + ZSTD_CCtx* ctx = (ZSTD_CCtx*)addArgs; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + in.src = srcBuffer; + in.size = srcSize; + in.pos = 0; + out.dst = dstBuffer; + out.size = dstSize; + out.pos = 0; + while (moreToFlush) { + moreToFlush = ZSTD_compress_generic(ctx, &out, &in, ZSTD_e_end); + if (ZSTD_isError(moreToFlush)) { + return moreToFlush; + } + } + return out.pos; +} + +/* addiional argument is just the context */ +static size_t local_defaultDecompress( + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) { + size_t moreToFlush = 1; + ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + in.src = srcBuffer; + in.size = srcSize; + in.pos = 0; + out.dst = dstBuffer; + out.size = dstSize; + out.pos = 0; + while (moreToFlush) { + moreToFlush = ZSTD_decompress_generic(dctx, + &out, &in); + if (ZSTD_isError(moreToFlush)) { + return moreToFlush; + } + } + return out.pos; + +} + +//ignore above for error stuff, return type still undecided + +/* mode 0 : iter = # seconds, else iter = # cycles */ +/* initFn will be measured once, bench fn will be measured x times */ +/* benchFn should return error value or out Size */ +//problem : how to get cSize this way for ratio? +//also possible fastest rounds down to 0 if 0 < loopDuration < nbLoops (that would mean <1ns / op though) +/* takes # of blocks and list of size & stuff for each. */ +BMK_customReturn_t BMK_benchCustom( + const char* functionName, size_t blockCount, + const void* const * const srcBuffers, size_t* srcSizes, + void* const * const dstBuffers, size_t* dstSizes, + size_t (*initFn)(void*), size_t (*benchFn)(const void*, size_t, void*, size_t, void*), + void* initPayload, void* benchPayload, + unsigned mode, unsigned iter, + int displayLevel) { + size_t srcSize = 0, dstSize = 0, ind = 0; + unsigned toAdd = 1; + + BMK_customReturn_t retval; + U64 totalTime = 0, fastest = (U64)(-1LL); + UTIL_time_t clockStart; + + { + unsigned i; + for(i = 0; i < blockCount; i++) { + memset(dstBuffers[i], 0xE5, dstSizes[i]); /* warm up and erase result buffer */ + } + + UTIL_sleepMilli(5); /* give processor time to other processes */ + UTIL_waitForNextTick(); + } + + /* display last 17 char's of functionName*/ + if (strlen(functionName)>17) functionName += strlen(functionName)-17; + if(!iter) { + if(mode) { + EXM_THROW(1, BMK_customReturn_t, "nbSeconds must be nonzero \n"); + } else { + EXM_THROW(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); + } + + } + + for(ind = 0; ind < blockCount; ind++) { + srcSize += srcSizes[ind]; + } + + //change to switch if more modes? + if(!mode) { + int completed = 0; + U64 const maxTime = (iter * TIMELOOP_NANOSEC) + 1; + unsigned nbLoops = 1; + UTIL_time_t coolTime = UTIL_getTime(); + while(!completed) { + unsigned i, j; + /* Overheat protection */ + if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { + DISPLAYLEVEL(2, "\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + coolTime = UTIL_getTime(); + } + + for(i = 0; i < blockCount; i++) { + memset(dstBuffers[i], 0xD6, dstSizes[i]); /* warm up and erase result buffer */ + } + + clockStart = UTIL_getTime(); + (*initFn)(initPayload); + + for(i = 0; i < nbLoops; i++) { + for(j = 0; j < blockCount; j++) { + size_t res = (*benchFn)(srcBuffers[j], srcSizes[j], dstBuffers[j], dstSizes[j], benchPayload); + if(ZSTD_isError(res)) { + EXM_THROW(2, BMK_customReturn_t, "%s() failed on block %u of size %u : %s \n", + functionName, j, (U32)dstSizes[j], ZSTD_getErrorName(res)); + } else if (toAdd) { + dstSize += res; + } + } + toAdd = 0; + } + { U64 const loopDuration = UTIL_clockSpanNano(clockStart); + if (loopDuration > 0) { + fastest = MIN(fastest, loopDuration / nbLoops); + nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; + } else { + assert(nbLoops < 40000000); /* avoid overflow */ + nbLoops *= 100; + } + totalTime += loopDuration; + completed = (totalTime >= maxTime); + } + } + } else { + unsigned i, j; + clockStart = UTIL_getTime(); + for(i = 0; i < iter; i++) { + for(j = 0; j < blockCount; j++) { + size_t res = (*benchFn)(srcBuffers[j], srcSizes[j], dstBuffers[j], dstSizes[j], benchPayload); + if(ZSTD_isError(res)) { + EXM_THROW(2, BMK_customReturn_t, "%s() failed on block %u of size %u : %s \n", + functionName, j, (U32)dstSizes[j], ZSTD_getErrorName(res)); + } else if(toAdd) { + dstSize += res; + } + } + toAdd = 0; + } + totalTime = UTIL_clockSpanNano(clockStart); + if(!totalTime) { + EXM_THROW(3, BMK_customReturn_t, "Cycle count (%u) too short to measure \n", iter); + } else { + fastest = totalTime / iter; + } + } + retval.error = 0; + retval.result.time = fastest; + retval.result.size = dstSize; + return retval; +} + +BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, const int cLevel, const ZSTD_compressionParameters* comprParams, const void* dictBuffer, size_t dictBufferSize, ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, - int displayLevel, const char* displayName) + int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) { - size_t const blockSize = ((g_blockSize>=32 && !g_decodeOnly) ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_DECODE_ONLY)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; - blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); - size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ - void* const compressedBuffer = malloc(maxCompressedSize); + + /* these are the blockTable parameters, just split up */ + const void ** const srcPtrs = malloc(maxNbBlocks * sizeof(void*)); + size_t* const srcSizes = malloc(maxNbBlocks * sizeof(size_t)); + + void ** const cPtrs = malloc(maxNbBlocks * sizeof(void*)); + size_t* const cSizes = malloc(maxNbBlocks * sizeof(size_t)); + + void ** const resPtrs = malloc(maxNbBlocks * sizeof(void*)); + size_t* const resSizes = malloc(maxNbBlocks * sizeof(size_t)); + + const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ + void* compressedBuffer = malloc(maxCompressedSize); void* resultBuffer = malloc(srcSize); + BMK_return_t results; size_t const loadedCompressedSize = srcSize; @@ -213,317 +421,242 @@ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, U32 nbBlocks; /* checks */ - if (!compressedBuffer || !resultBuffer || !blockTable) - EXM_THROW(31, "allocation error : not enough memory"); + if (!compressedBuffer || !resultBuffer || + !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes) + EXM_THROW(31, BMK_return_t, "allocation error : not enough memory"); if(!ctx || !dctx) - EXM_THROW(31, "error: passed in null context"); + EXM_THROW(31, BMK_return_t, "error: passed in null context"); /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */ - if (g_nbWorkers==1) g_nbWorkers=0; /* prefer synchronous mode */ - - if (g_decodeOnly) { /* benchmark only decompression : source must be already compressed */ + if (adv->mode == BMK_DECODE_ONLY) { /* benchmark only decompression : source must be already compressed */ const char* srcPtr = (const char*)srcBuffer; U64 totalDSize64 = 0; U32 fileNb; for (fileNb=0; fileNb decodedSize) EXM_THROW(32, "original size is too large"); /* size_t overflow */ + if (totalDSize64 > decodedSize) EXM_THROW(32, BMK_return_t, "original size is too large"); /* size_t overflow */ free(resultBuffer); resultBuffer = malloc(decodedSize); - if (!resultBuffer) EXM_THROW(33, "not enough memory"); + if (!resultBuffer) EXM_THROW(33, BMK_return_t, "not enough memory"); cSize = srcSize; srcSize = decodedSize; ratio = (double)srcSize / (double)cSize; - } } + } + } - /* Init blockTable data */ + /* Init data blocks */ { const char* srcPtr = (const char*)srcBuffer; char* cPtr = (char*)compressedBuffer; char* resPtr = (char*)resultBuffer; U32 fileNb; for (nbBlocks=0, fileNb=0; fileNbmode == BMK_DECODE_ONLY) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize); U32 const blockEnd = nbBlocks + nbBlocksforThisFile; for ( ; nbBlocksmode == BMK_DECODE_ONLY) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); + //blockTable[nbBlocks].cSize = blockTable[nbBlocks].cRoom; + resPtrs[nbBlocks] = (void*)resPtr; + resSizes[nbBlocks] = (adv->mode == BMK_DECODE_ONLY) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; srcPtr += thisBlockSize; - cPtr += blockTable[nbBlocks].cRoom; + cPtr += cSizes[nbBlocks]; //blockTable[nbBlocks].cRoom; resPtr += thisBlockSize; remaining -= thisBlockSize; - } } } + } + } + } /* warmimg up memory */ - if (g_decodeOnly) { + if (adv->mode == BMK_DECODE_ONLY) { memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); } else { RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); } /* Bench */ - { U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL); - U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0); - UTIL_time_t coolTime; - U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 1; - U32 nbDecodeLoops = (U32)((100 MB) / (srcSize+1)) + 1; /* initial conservative speed estimate */ - U32 nbCompressionLoops = (U32)((2 MB) / (srcSize+1)) + 1; /* initial conservative speed estimate */ - U64 totalCTime=0, totalDTime=0; - U32 cCompleted=g_decodeOnly, dCompleted=0; + + //TODO: Make sure w/o new loop decode_only code isn't run + //TODO: Support nbLoops and nbSeconds + { + U64 const crcOrig = (adv->mode == BMK_DECODE_ONLY) ? 0 : XXH64(srcBuffer, srcSize, 0); # define NB_MARKS 4 const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; U32 markNb = 0; - - coolTime = UTIL_getTime(); DISPLAYLEVEL(2, "\r%79s\r", ""); - while (!cCompleted || !dCompleted) { - /* overheat protection */ - if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { - DISPLAYLEVEL(2, "\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - coolTime = UTIL_getTime(); + if (adv->mode != BMK_DECODE_ONLY) { + BMK_initCCtxArgs cctxprep = { ctx, dictBuffer, dictBufferSize, cLevel, comprParams, adv }; + BMK_customReturn_t compressionResults; + /* Compression */ + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); + compressionResults = BMK_benchCustom("ZSTD_compress_generic", nbBlocks, + srcPtrs, srcSizes, cPtrs, cSizes, + &local_initCCtx, &local_defaultCompress, + (void*)&cctxprep, (void*)(ctx), + adv->loopMode, adv->nbSeconds, displayLevel); + + if(compressionResults.error) { + results.error = compressionResults.error; + return results; } - if (!g_decodeOnly) { - /* Compression */ - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); - if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ + results.result.cSize = compressionResults.result.size; + ratio = (double)srcSize / (double)results.result.cSize; + markNb = (markNb+1) % NB_MARKS; + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = ((double)srcSize / compressionResults.result.time) * 1000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + results.result.cSpeed = compressionSpeed * 1000000; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed); + } + } /* if (adv->mode != BMK_DECODE_ONLY) */ + { + BMK_initDCtxArgs dctxprep = { dctx, dictBuffer, dictBufferSize }; + BMK_customReturn_t decompressionResults; - UTIL_sleepMilli(5); /* give processor time to other processes */ - UTIL_waitForNextTick(); + decompressionResults = BMK_benchCustom("ZSTD_decompress_generic", nbBlocks, + (const void * const *)cPtrs, cSizes, resPtrs, resSizes, + &local_initDCtx, &local_defaultDecompress, + (void*)&dctxprep, (void*)(dctx), + adv->loopMode, adv->nbSeconds, displayLevel); - if (!cCompleted) { /* still some time to do compression tests */ - U32 nbLoops = 0; - UTIL_time_t const clockStart = UTIL_getTime(); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, g_nbWorkers); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog); - if (g_ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) { - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog); - } - if (g_ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) { - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog); - } - ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy); - ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize); - - if (!g_nbSeconds) nbCompressionLoops=1; - for (nbLoops=0; nbLoops 0) { - if (loopDuration < fastestC * nbCompressionLoops) - fastestC = loopDuration / nbCompressionLoops; - nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1; - } else { - assert(nbCompressionLoops < 40000000); /* avoid overflow */ - nbCompressionLoops *= 100; - } - totalCTime += loopDuration; - cCompleted = (totalCTime >= maxTime); /* end compression tests */ - } } - - cSize = 0; - { U32 blockNb; for (blockNb=0; blockNb%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed ); - } - } /* if (!g_decodeOnly) */ - -#if 0 /* disable decompression test */ - dCompleted=1; - (void)totalDTime; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ -#else - /* Decompression */ - if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ - - UTIL_sleepMilli(5); /* give processor time to other processes */ - UTIL_waitForNextTick(); - - if (!dCompleted) { - U32 nbLoops = 0; - ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize); - UTIL_time_t const clockStart = UTIL_getTime(); - if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure"); - if (!g_nbSeconds) nbDecodeLoops = 1; - for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) { - U32 blockNb; - for (blockNb=0; blockNb 0) { - if (loopDuration < fastestD * nbDecodeLoops) - fastestD = loopDuration / nbDecodeLoops; - nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1; - } else { - assert(nbDecodeLoops < 40000000); /* avoid overflow */ - nbDecodeLoops *= 100; - } - totalDTime += loopDuration; - dCompleted = (totalDTime >= maxTime); - } } + if(decompressionResults.error) { + results.error = decompressionResults.error; + return results; + } markNb = (markNb+1) % NB_MARKS; { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = ((double)srcSize / fastestC) * 1000; + double const compressionSpeed = results.result.cSpeed / 1000000; int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / fastestD) * 1000; - results.result.cSpeed = compressionSpeed * 1000000; + double const decompressionSpeed = ((double)srcSize / decompressionResults.result.time) * 1000; results.result.dSpeed = decompressionSpeed * 1000000; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)cSize, + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, ratioAccuracy, ratio, cSpeedAccuracy, compressionSpeed, decompressionSpeed); } - - /* CRC Checking */ - { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); - if (!g_decodeOnly && (crcOrig!=crcCheck)) { - size_t u; - DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); - for (u=0; u u) break; - bacc += blockTable[segNb].srcSize; - } - pos = (U32)(u - bacc); - bNb = pos / (128 KB); - DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos); - if (u>5) { - int n; - DISPLAY("origin: "); - for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); - DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); - for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); - DISPLAY(" \n"); - DISPLAY("decode: "); - for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); - DISPLAY(" :%02X: ", ((const BYTE*)resultBuffer)[u]); - for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); - DISPLAY(" \n"); - } - break; - } - if (u==srcSize-1) { /* should never happen */ - DISPLAY("no difference detected\n"); - } } - break; - } } /* CRC Checking */ -#endif - } /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */ - - if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ - double const cSpeed = ((double)srcSize / fastestC) * 1000; - double const dSpeed = ((double)srcSize / fastestD) * 1000; - if (g_additionalParam) - DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam); - else - DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); } - DISPLAYLEVEL(2, "%2i#\n", cLevel); - } /* Bench */ + + /* CRC Checking */ + { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + if ((adv->mode != BMK_DECODE_ONLY) && (crcOrig!=crcCheck)) { + size_t u; + DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + for (u=0; u u) break; + bacc += srcSizes[segNb]; + } + pos = (U32)(u - bacc); + bNb = pos / (128 KB); + DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos); + if (u>5) { + int n; + DISPLAY("origin: "); + for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); + for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" \n"); + DISPLAY("decode: "); + for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); + DISPLAY(" :%02X: ", ((const BYTE*)resultBuffer)[u]); + for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); + DISPLAY(" \n"); + } + break; + } + if (u==srcSize-1) { /* should never happen */ + DISPLAY("no difference detected\n"); + } + } + } + } /* CRC Checking */ + + if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ + double const cSpeed = results.result.cSpeed / 1000000; + double const dSpeed = results.result.dSpeed / 1000000; + if (adv->additionalParam) + DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam); + else + DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); + } + DISPLAYLEVEL(2, "%2i#\n", cLevel); +} /* Bench */ /* clean up */ - free(blockTable); free(compressedBuffer); free(resultBuffer); - results.errorCode = 0; + + free(srcPtrs); + free(srcSizes); + free(cPtrs); + free(cSizes); + free(resPtrs); + free(resSizes); + + results.error = 0; return results; } -static void BMK_benchMemCtxless(const void* srcBuffer, size_t srcSize, +BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName) { + + const BMK_advancedParams_t adv = BMK_defaultAdvancedParams(); + return BMK_benchMemAdvanced(srcBuffer, srcSize, + fileSizes, nbFiles, + cLevel, comprParams, + dictBuffer, dictBufferSize, + ctx, dctx, + displayLevel, displayName, &adv); +} + +static BMK_return_t BMK_benchMemCtxless(const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, int cLevel, const ZSTD_compressionParameters* const comprParams, const void* dictBuffer, size_t dictBufferSize, - int displayLevel, const char* displayName) + int displayLevel, const char* displayName, + const BMK_advancedParams_t * const adv) { + BMK_return_t res; ZSTD_CCtx* ctx = ZSTD_createCCtx(); ZSTD_DCtx* dctx = ZSTD_createDCtx(); if(ctx == NULL || dctx == NULL) { - EXM_THROW(12, "not enough memory for contexts"); + EXM_THROW(12, BMK_return_t, "not enough memory for contexts"); } - BMK_benchMem(srcBuffer, srcSize, + res = BMK_benchMemAdvanced(srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, dictBuffer, dictBufferSize, ctx, dctx, - displayLevel, displayName); + displayLevel, displayName, adv); ZSTD_freeCCtx(ctx); ZSTD_freeDCtx(dctx); + return res; } static size_t BMK_findMaxMem(U64 requiredMem) @@ -544,44 +677,59 @@ static size_t BMK_findMaxMem(U64 requiredMem) return (size_t)(requiredMem); } +ERROR_STRUCT(BMK_result_t*, BMK_returnPtr_t); + /* returns average stats over all range [cLevel, cLevelLast] */ -static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, +static BMK_returnPtr_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, const size_t* fileSizes, unsigned nbFiles, const int cLevel, const int cLevelLast, const ZSTD_compressionParameters* comprParams, const void* dictBuffer, size_t dictBufferSize, - int displayLevel, const char* displayName) + int displayLevel, const char* displayName, + BMK_advancedParams_t const * const adv) { int l; + BMK_result_t* res = (BMK_result_t*)malloc(sizeof(BMK_result_t) * (cLevelLast - cLevel + 1)); + BMK_returnPtr_t ret = { 0, res }; const char* pch = strrchr(displayName, '\\'); /* Windows */ if (!pch) pch = strrchr(displayName, '/'); /* Linux */ if (pch) displayName = pch+1; - if (g_realTime) { + if(res == NULL) { + EXM_THROW(12, BMK_returnPtr_t, "not enough memory\n"); + } + if (adv->realTime) { DISPLAYLEVEL(2, "Note : switching to real-time priority \n"); SET_REALTIME_PRIORITY; } - if (displayLevel == 1 && !g_additionalParam) - DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); + if (displayLevel == 1 && !adv->additionalParam) + DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, adv->nbSeconds, (U32)(adv->blockSize>>10)); for (l=cLevel; l <= cLevelLast; l++) { + BMK_return_t rettmp; if (l==0) continue; /* skip level 0 */ - BMK_benchMemCtxless(srcBuffer, benchedSize, - fileSizes, nbFiles, - l, comprParams, - dictBuffer, dictBufferSize, - displayLevel, displayName); + rettmp = BMK_benchMemCtxless(srcBuffer, benchedSize, + fileSizes, nbFiles, + l, comprParams, + dictBuffer, dictBufferSize, + displayLevel, displayName, + adv); + if(rettmp.error) { + ret.error = rettmp.error; + return ret; + } + res[l-cLevel] = rettmp.result; } - return; + return ret; } /*! BMK_loadFiles() : * Loads `buffer` with content of files listed within `fileNamesTable`. * At most, fills `buffer` entirely. */ -static void BMK_loadFiles(void* buffer, size_t bufferSize, +static int BMK_loadFiles(void* buffer, size_t bufferSize, size_t* fileSizes, const char* const * const fileNamesTable, unsigned nbFiles, int displayLevel) { @@ -601,44 +749,55 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize, continue; } f = fopen(fileNamesTable[n], "rb"); - if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]); + if (f==NULL) EXM_THROW_INT(10, "impossible to open file %s", fileNamesTable[n]); DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]); if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); - if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); + if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]); pos += readSize; } fileSizes[n] = (size_t)fileSize; totalSize += (size_t)fileSize; fclose(f); } - if (totalSize == 0) EXM_THROW(12, "no data to bench"); + if (totalSize == 0) EXM_THROW_INT(12, "no data to bench"); + return 0; } -static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigned const nbFiles, +static BMK_returnSet_t BMK_benchFileTable(const char* const * const fileNamesTable, unsigned const nbFiles, const char* const dictFileName, int const cLevel, int const cLevelLast, - const ZSTD_compressionParameters* const compressionParams, int displayLevel) + const ZSTD_compressionParameters* const compressionParams, int displayLevel, + const BMK_advancedParams_t * const adv) { void* srcBuffer; size_t benchedSize; void* dictBuffer = NULL; size_t dictBufferSize = 0; size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); + BMK_returnSet_t res; U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); - if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes"); + res.result.cLevel = cLevel; + res.result.cLevelLast = cLevelLast; + if (!fileSizes) EXM_THROW(12, BMK_returnSet_t, "not enough memory for fileSizes"); /* Load dictionary */ if (dictFileName != NULL) { U64 const dictFileSize = UTIL_getFileSize(dictFileName); if (dictFileSize > 64 MB) - EXM_THROW(10, "dictionary file %s too large", dictFileName); + EXM_THROW(10, BMK_returnSet_t, "dictionary file %s too large", dictFileName); dictBufferSize = (size_t)dictFileSize; dictBuffer = malloc(dictBufferSize); if (dictBuffer==NULL) - EXM_THROW(11, "not enough memory for dictionary (%u bytes)", + EXM_THROW(11, BMK_returnSet_t, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); - BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1, displayLevel); + { + int errorCode = BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1, displayLevel); + if(errorCode) { + res.error = errorCode; + return res; + } + } } /* Memory allocation & restrictions */ @@ -647,76 +806,112 @@ static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigne if (benchedSize < totalSizeToLoad) DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); srcBuffer = malloc(benchedSize); - if (!srcBuffer) EXM_THROW(12, "not enough memory"); + if (!srcBuffer) EXM_THROW(12, BMK_returnSet_t, "not enough memory"); /* Load input buffer */ - BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles, displayLevel); - + { + int errorCode = BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles, displayLevel); + if(errorCode) { + res.error = errorCode; + return res; + } + } /* Bench */ - if (g_separateFiles) { + if (adv->separateFiles) { const BYTE* srcPtr = (const BYTE*)srcBuffer; U32 fileNb; - BMK_result_t* resultarray = (BMK_result_t*)malloc(sizeof(BMK_result_t) * nbFiles); - if(resultarray == NULL) EXM_THROW(12, "not enough memory"); + res.result.results = (BMK_result_t**)malloc(sizeof(BMK_result_t*) * nbFiles); + res.result.nbFiles = nbFiles; + if(res.result.results == NULL) EXM_THROW(12, BMK_returnSet_t, "not enough memory"); for (fileNb=0; fileNb 1) ? mfName : fileNamesTable[0]; - BMK_benchCLevel(srcBuffer, benchedSize, + { + const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; + res.result.results = (BMK_result_t**)malloc(sizeof(BMK_result_t*)); + BMK_returnPtr_t errorOrPtr = BMK_benchCLevel(srcBuffer, benchedSize, fileSizes, nbFiles, cLevel, cLevelLast, compressionParams, dictBuffer, dictBufferSize, - displayLevel, displayName); + displayLevel, displayName, + adv); + if(res.result.results == NULL) EXM_THROW(12, BMK_returnSet_t, "not enough memory"); + if(errorOrPtr.error) { + res.error = errorOrPtr.error; + return res; + } + res.result.results[0] = errorOrPtr.result; } } /* clean up */ free(srcBuffer); free(dictBuffer); free(fileSizes); + res.error = 0; + return res; } -static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, +static BMK_returnSet_t BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, const ZSTD_compressionParameters* compressionParams, - int displayLevel) + int displayLevel, const BMK_advancedParams_t * const adv) { char name[20] = {0}; size_t benchedSize = 10000000; void* const srcBuffer = malloc(benchedSize); - + BMK_returnSet_t res; + res.result.results = malloc(sizeof(BMK_result_t*)); + res.result.nbFiles = 1; + res.result.cLevel = cLevel; + res.result.cLevelLast = cLevelLast; /* Memory allocation */ - if (!srcBuffer) EXM_THROW(21, "not enough memory"); + if (!srcBuffer || !res.result.results) EXM_THROW(21, BMK_returnSet_t, "not enough memory"); /* Fill input buffer */ RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); /* Bench */ snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - BMK_benchCLevel(srcBuffer, benchedSize, + BMK_returnPtr_t errPtr = BMK_benchCLevel(srcBuffer, benchedSize, &benchedSize, 1, cLevel, cLevelLast, compressionParams, NULL, 0, - displayLevel, name); + displayLevel, name, adv); + if(errPtr.error) { + res.error = errPtr.error; + return res; + } + res.result.results[0] = errPtr.result; /* clean up */ free(srcBuffer); + res.error = 0; + return res; } -static void BMK_benchFilesFull(const char** fileNamesTable, unsigned nbFiles, +BMK_returnSet_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, int cLevel, int cLevelLast, - const ZSTD_compressionParameters* compressionParams, int displayLevel) + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t * const adv) { double const compressibility = (double)g_compressibilityDefault / 100; @@ -726,10 +921,12 @@ static void BMK_benchFilesFull(const char** fileNamesTable, unsigned nbFiles, if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); - if (nbFiles == 0) - BMK_syntheticTest(cLevel, cLevelLast, compressibility, compressionParams, displayLevel); - else - BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, displayLevel); + if (nbFiles == 0) { + return BMK_syntheticTest(cLevel, cLevelLast, compressibility, compressionParams, displayLevel, adv); + } + else { + return BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, displayLevel, adv); + } } int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, @@ -737,6 +934,21 @@ int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, int cLevel, int cLevelLast, const ZSTD_compressionParameters* compressionParams, int displayLevel) { - BMK_benchFilesFull(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, displayLevel); - return 0; + const BMK_advancedParams_t adv = BMK_defaultAdvancedParams(); + return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, displayLevel, &adv).error; +} + +/* errorable or just return? */ +BMK_result_t BMK_getResult(BMK_resultSet_t resultSet, unsigned fileIdx, int cLevel) { + assert(resultSet.nbFiles > fileIdx); + assert(resultSet.cLevel <= cLevel && cLevel <= resultSet.cLevelLast); + return resultSet.results[fileIdx][cLevel - resultSet.cLevel]; +} + +void BMK_freeResultSet(BMK_resultSet_t src) { + unsigned i; + for(i = 0; i <= src.nbFiles; i++) { + free(src.results[i]); + } + free(src.results); } diff --git a/programs/bench.h b/programs/bench.h index 0ba6f8985..ad2682e9a 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -19,25 +19,97 @@ extern "C" { #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ #include "zstd.h" /* ZSTD_compressionParameters */ +#define BMK_COMPRESS_ONLY 2 +#define BMK_DECODE_ONLY 1 + +#define TIME_MODE = 0 +#define ITER_MODE = 1 + +#define ERROR_STRUCT(baseType, typeName) typedef struct { \ + int error; \ + baseType result; \ +} typeName + typedef struct { size_t cSize; double cSpeed; /* bytes / sec */ double dSpeed; } BMK_result_t; -/* 0 = no Error */ typedef struct { - int errorCode; - BMK_result_t result; -} BMK_return_t; + int cLevel; + int cLevelLast; + unsigned nbFiles; + BMK_result_t** results; +} BMK_resultSet_t; -/* called in cli */ -int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, - int cLevel, int cLevelLast, const ZSTD_compressionParameters* compressionParams, - int displayLevel); +typedef struct { + size_t size; + U64 time; +} BMK_customResult_t; -/* basic benchmarking function, called in paramgrill - * ctx, dctx must be valid */ + +ERROR_STRUCT(BMK_result_t, BMK_return_t); +ERROR_STRUCT(BMK_resultSet_t, BMK_returnSet_t); +ERROR_STRUCT(BMK_customResult_t, BMK_customReturn_t); + +/* want all 0 to be default, but wb ldmBucketSizeLog/ldmHashEveryLog */ +typedef struct { + unsigned mode; /* 0: all, 1: compress only 2: decode only */ + int loopMode; /* if loopmode, then nbSeconds = nbLoops */ + unsigned nbSeconds; /* default timing is in nbSeconds. If nbCycles != 0 then use that */ + size_t blockSize; /* Maximum allowable size of a block*/ + unsigned nbWorkers; /* multithreading */ + unsigned realTime; + unsigned separateFiles; + int additionalParam; + unsigned ldmFlag; + unsigned ldmMinMatch; + unsigned ldmHashLog; + unsigned ldmBucketSizeLog; + unsigned ldmHashEveryLog; +} BMK_advancedParams_t; + +/* returns default parameters used by nonAdvanced functions */ +BMK_advancedParams_t BMK_defaultAdvancedParams(void); + +/* functionName - name of function + * blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstSizes) + * initFn - (*initFn)(initPayload) is run once per benchmark + * benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstSizes[i], benchPayload) + * is run a variable number of times, specified by mode and iter args + * mode - if 0, iter will be interpreted as the minimum number of seconds to run + * iter - see mode + * displayLevel - what gets printed + * 0 : no display; + * 1 : errors; + * 2 : + result + interaction + warnings; + * 3 : + progression; + * 4 : + information + * return + * .error will give a nonzero value if any error has occured + * .result will contain the speed (B/s) and time per loop (ns) + */ +BMK_customReturn_t BMK_benchCustom(const char* functionName, size_t blockCount, + const void* const * const srcBuffers, size_t* srcSizes, + void* const * const dstBuffers, size_t* dstSizes, + size_t (*initFn)(void*), size_t (*benchFn)(const void*, size_t, void*, size_t, void*), + void* initPayload, void* benchPayload, + unsigned mode, unsigned iter, + int displayLevel); + +/* basic benchmarking function, called in paramgrill ctx, dctx must be provided */ +/* srcBuffer - data source, expected to be valid compressed data if in Decode Only Mode + * srcSize - size of data in srcBuffer + * cLevel - compression level + * comprParams - basic compression parameters + * dictBuffer - a dictionary if used, null otherwise + * dictBufferSize - size of dictBuffer, 0 otherwise + * ctx - Compression Context + * dctx - Decompression Context + * diplayLevel - see BMK_benchCustom + * displayName - name used in display + */ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, const int cLevel, const ZSTD_compressionParameters* comprParams, @@ -45,20 +117,37 @@ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, int displayLevel, const char* displayName); -/* Set Parameters */ -void BMK_setNbSeconds(unsigned nbLoops); -void BMK_setBlockSize(size_t blockSize); -void BMK_setNbWorkers(unsigned nbWorkers); -void BMK_setRealTime(unsigned priority); -void BMK_setNotificationLevel(unsigned level); -void BMK_setSeparateFiles(unsigned separate); -void BMK_setAdditionalParam(int additionalParam); -void BMK_setDecodeOnlyMode(unsigned decodeFlag); -void BMK_setLdmFlag(unsigned ldmFlag); -void BMK_setLdmMinMatch(unsigned ldmMinMatch); -void BMK_setLdmHashLog(unsigned ldmHashLog); -void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog); -void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog); +BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, + const BMK_advancedParams_t* adv); + +/* called in cli */ +/* fileNamesTable - name of files to benchmark + * nbFiles - number of files (size of fileNamesTable) + * dictFileName - name of dictionary file to load + * cLevel - lowest compression level to benchmark + * cLevellast - highest compression level to benchmark (everything in the range [cLevel, cLevellast]) will be benchmarked + * compressionParams - basic compression Parameters + * displayLevel - see BMK_benchCustom + */ +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, + int cLevel, int cLevelLast, const ZSTD_compressionParameters* compressionParams, + int displayLevel); + +BMK_returnSet_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFiles, + const char* dictFileName, + int cLevel, int cLevelLast, + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t* adv); + +/* get data from resultSet */ +/* when aggregated (separateFiles = 0), just be getResult(r,0,cl) */ +BMK_result_t BMK_getResult(BMK_resultSet_t results, unsigned fileIdx, int cLevel); +void BMK_freeResultSet(BMK_resultSet_t src); #endif /* BENCH_H_121279284357 */ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 6b6a93528..73fb52169 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -398,6 +398,7 @@ int main(int argCount, const char* argv[]) setRealTimePrio = 0, singleThread = 0, ultra=0; + BMK_advancedParams_t adv = BMK_defaultAdvancedParams(); unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ size_t blockSize = 0; zstd_operation_mode operation = zom_compress; @@ -607,7 +608,7 @@ int main(int argCount, const char* argv[]) /* Decoding */ case 'd': #ifndef ZSTD_NOBENCH - BMK_setDecodeOnlyMode(1); + adv.mode = BMK_DECODE_ONLY; if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */ #endif operation=zom_decompress; argument++; break; @@ -700,7 +701,7 @@ int main(int argCount, const char* argv[]) case 'p': argument++; #ifndef ZSTD_NOBENCH if ((*argument>='0') && (*argument<='9')) { - BMK_setAdditionalParam(readU32FromChar(&argument)); + adv.additionalParam = (int)readU32FromChar(&argument); } else #endif main_pause=1; @@ -801,21 +802,21 @@ int main(int argCount, const char* argv[]) /* Check if benchmark is selected */ if (operation==zom_bench) { #ifndef ZSTD_NOBENCH - BMK_setSeparateFiles(separateFiles); - BMK_setBlockSize(blockSize); - BMK_setNbWorkers(nbWorkers); - BMK_setRealTime(setRealTimePrio); - BMK_setNbSeconds(bench_nbSeconds); - BMK_setLdmFlag(ldmFlag); - BMK_setLdmMinMatch(g_ldmMinMatch); - BMK_setLdmHashLog(g_ldmHashLog); + adv.separateFiles = separateFiles; + adv.blockSize = blockSize; + adv.nbWorkers = nbWorkers; + adv.realTime = setRealTimePrio; + adv.nbSeconds = bench_nbSeconds; + adv.ldmFlag = ldmFlag; + adv.ldmMinMatch = g_ldmMinMatch; + adv.ldmHashLog = g_ldmHashLog; if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { - BMK_setLdmBucketSizeLog(g_ldmBucketSizeLog); + adv.ldmBucketSizeLog = g_ldmBucketSizeLog; } if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) { - BMK_setLdmHashEveryLog(g_ldmHashEveryLog); + adv.ldmHashEveryLog = g_ldmHashEveryLog; } - BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, g_displayLevel); + BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, g_displayLevel, &adv); #else (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; #endif diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 2d7e52a43..025bc6aad 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -162,8 +162,6 @@ const char* g_stratName[ZSTD_btultra+1] = { "ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra "}; /* TODO: support additional parameters (more files, fileSizes) */ - -//TODO: benchMem dctx can't = NULL in new system static size_t BMK_benchParam(BMK_result_t* resultPtr, const void* srcBuffer, size_t srcSize, @@ -172,7 +170,7 @@ BMK_benchParam(BMK_result_t* resultPtr, BMK_return_t res = BMK_benchMem(srcBuffer,srcSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File"); *resultPtr = res.result; - return res.errorCode; + return res.error; } static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compressionParameters params, size_t srcSize) From 8522346322d3209a8d9e078f927028c2e3d35341 Mon Sep 17 00:00:00 2001 From: George Lu Date: Thu, 14 Jun 2018 14:46:17 -0400 Subject: [PATCH 03/10] Make Fullbench use new function Rearrange Args Add nothing function Use new function, change locals to match New Display Comment cleanup Change builds --- .../fullbench-dll/fullbench-dll.vcxproj | 2 + build/VS2010/fullbench/fullbench.vcxproj | 2 + programs/bench.c | 214 ++++++++++-------- programs/bench.h | 21 +- programs/zstdcli.c | 4 +- tests/Makefile | 2 +- tests/fullbench.c | 82 +++---- 7 files changed, 167 insertions(+), 160 deletions(-) diff --git a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj index e697318e0..6939d4406 100644 --- a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj +++ b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj @@ -167,11 +167,13 @@ + + diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj index d0cbcae98..25ae07d4b 100644 --- a/build/VS2010/fullbench/fullbench.vcxproj +++ b/build/VS2010/fullbench/fullbench.vcxproj @@ -174,6 +174,7 @@ + @@ -195,6 +196,7 @@ + diff --git a/programs/bench.c b/programs/bench.c index 7b9ea8218..ff0403ab1 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -64,8 +64,7 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); -//TODO: remove this gv as well -//Only used in Synthetic test. Separate? +/* remove this in the future? */ static U32 g_compressibilityDefault = 50; /* ************************************* @@ -119,8 +118,8 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; BMK_advancedParams_t BMK_defaultAdvancedParams(void) { BMK_advancedParams_t res = { - 0, /* mode */ - 0, /* nbCycles */ + BMK_both, /* mode */ + BMK_timeMode, /* loopMode */ BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ 0, /* blockSize */ 0, /* nbWorkers */ @@ -266,18 +265,14 @@ static size_t local_defaultDecompress( } -//ignore above for error stuff, return type still undecided - /* mode 0 : iter = # seconds, else iter = # cycles */ /* initFn will be measured once, bench fn will be measured x times */ /* benchFn should return error value or out Size */ -//problem : how to get cSize this way for ratio? -//also possible fastest rounds down to 0 if 0 < loopDuration < nbLoops (that would mean <1ns / op though) /* takes # of blocks and list of size & stuff for each. */ BMK_customReturn_t BMK_benchCustom( const char* functionName, size_t blockCount, - const void* const * const srcBuffers, size_t* srcSizes, - void* const * const dstBuffers, size_t* dstSizes, + const void* const * const srcBuffers, const size_t* srcSizes, + void* const * const dstBuffers, const size_t* dstSizes, size_t (*initFn)(void*), size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* initPayload, void* benchPayload, unsigned mode, unsigned iter, @@ -302,9 +297,7 @@ BMK_customReturn_t BMK_benchCustom( /* display last 17 char's of functionName*/ if (strlen(functionName)>17) functionName += strlen(functionName)-17; if(!iter) { - if(mode) { - EXM_THROW(1, BMK_customReturn_t, "nbSeconds must be nonzero \n"); - } else { + if(mode == BMK_iterMode) { EXM_THROW(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); } @@ -314,73 +307,81 @@ BMK_customReturn_t BMK_benchCustom( srcSize += srcSizes[ind]; } - //change to switch if more modes? - if(!mode) { - int completed = 0; - U64 const maxTime = (iter * TIMELOOP_NANOSEC) + 1; - unsigned nbLoops = 1; - UTIL_time_t coolTime = UTIL_getTime(); - while(!completed) { + switch(mode) { + case BMK_timeMode: + { + int completed = 0; + U64 const maxTime = (iter * TIMELOOP_NANOSEC) + 1; + unsigned nbLoops = 1; + UTIL_time_t coolTime = UTIL_getTime(); + while(!completed) { + unsigned i, j; + /* Overheat protection */ + if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { + DISPLAYLEVEL(2, "\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + coolTime = UTIL_getTime(); + } + + for(i = 0; i < blockCount; i++) { + memset(dstBuffers[i], 0xD6, dstSizes[i]); /* warm up and erase result buffer */ + } + + clockStart = UTIL_getTime(); + (*initFn)(initPayload); + + for(i = 0; i < nbLoops; i++) { + for(j = 0; j < blockCount; j++) { + size_t res = (*benchFn)(srcBuffers[j], srcSizes[j], dstBuffers[j], dstSizes[j], benchPayload); + if(ZSTD_isError(res)) { + EXM_THROW(2, BMK_customReturn_t, "%s() failed on block %u of size %u : %s \n", + functionName, j, (U32)dstSizes[j], ZSTD_getErrorName(res)); + } else if (toAdd) { + dstSize += res; + } + } + toAdd = 0; + } + { U64 const loopDuration = UTIL_clockSpanNano(clockStart); + if (loopDuration > 0) { + fastest = MIN(fastest, loopDuration / nbLoops); + nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; + } else { + assert(nbLoops < 40000000); /* avoid overflow */ + nbLoops *= 100; + } + totalTime += loopDuration; + completed = (totalTime >= maxTime); + } + } + break; + } + case BMK_iterMode: + { unsigned i, j; - /* Overheat protection */ - if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { - DISPLAYLEVEL(2, "\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - coolTime = UTIL_getTime(); - } - - for(i = 0; i < blockCount; i++) { - memset(dstBuffers[i], 0xD6, dstSizes[i]); /* warm up and erase result buffer */ - } - clockStart = UTIL_getTime(); - (*initFn)(initPayload); - - for(i = 0; i < nbLoops; i++) { + for(i = 0; i < iter; i++) { for(j = 0; j < blockCount; j++) { size_t res = (*benchFn)(srcBuffers[j], srcSizes[j], dstBuffers[j], dstSizes[j], benchPayload); if(ZSTD_isError(res)) { EXM_THROW(2, BMK_customReturn_t, "%s() failed on block %u of size %u : %s \n", functionName, j, (U32)dstSizes[j], ZSTD_getErrorName(res)); - } else if (toAdd) { + } else if(toAdd) { dstSize += res; } } toAdd = 0; } - { U64 const loopDuration = UTIL_clockSpanNano(clockStart); - if (loopDuration > 0) { - fastest = MIN(fastest, loopDuration / nbLoops); - nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; - } else { - assert(nbLoops < 40000000); /* avoid overflow */ - nbLoops *= 100; - } - totalTime += loopDuration; - completed = (totalTime >= maxTime); - } - } - } else { - unsigned i, j; - clockStart = UTIL_getTime(); - for(i = 0; i < iter; i++) { - for(j = 0; j < blockCount; j++) { - size_t res = (*benchFn)(srcBuffers[j], srcSizes[j], dstBuffers[j], dstSizes[j], benchPayload); - if(ZSTD_isError(res)) { - EXM_THROW(2, BMK_customReturn_t, "%s() failed on block %u of size %u : %s \n", - functionName, j, (U32)dstSizes[j], ZSTD_getErrorName(res)); - } else if(toAdd) { - dstSize += res; - } + totalTime = UTIL_clockSpanNano(clockStart); + if(!totalTime) { + EXM_THROW(3, BMK_customReturn_t, "Cycle count (%u) too short to measure \n", iter); + } else { + fastest = totalTime / iter; } - toAdd = 0; - } - totalTime = UTIL_clockSpanNano(clockStart); - if(!totalTime) { - EXM_THROW(3, BMK_customReturn_t, "Cycle count (%u) too short to measure \n", iter); - } else { - fastest = totalTime / iter; + break; } + default: + EXM_THROW(4, BMK_customReturn_t, "Unknown Mode \n"); } retval.error = 0; retval.result.time = fastest; @@ -396,18 +397,18 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) { - size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_DECODE_ONLY)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; /* these are the blockTable parameters, just split up */ - const void ** const srcPtrs = malloc(maxNbBlocks * sizeof(void*)); - size_t* const srcSizes = malloc(maxNbBlocks * sizeof(size_t)); + const void ** const srcPtrs = (const void ** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - void ** const cPtrs = malloc(maxNbBlocks * sizeof(void*)); - size_t* const cSizes = malloc(maxNbBlocks * sizeof(size_t)); + void ** const cPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const cSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - void ** const resPtrs = malloc(maxNbBlocks * sizeof(void*)); - size_t* const resSizes = malloc(maxNbBlocks * sizeof(size_t)); + void ** const resPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const resSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* compressedBuffer = malloc(maxCompressedSize); @@ -430,7 +431,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */ - if (adv->mode == BMK_DECODE_ONLY) { /* benchmark only decompression : source must be already compressed */ + if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */ const char* srcPtr = (const char*)srcBuffer; U64 totalDSize64 = 0; U32 fileNb; @@ -458,19 +459,18 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, U32 fileNb; for (nbBlocks=0, fileNb=0; fileNbmode == BMK_DECODE_ONLY) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize); + U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize); U32 const blockEnd = nbBlocks + nbBlocksforThisFile; for ( ; nbBlocksmode == BMK_DECODE_ONLY) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); - //blockTable[nbBlocks].cSize = blockTable[nbBlocks].cRoom; + cSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); resPtrs[nbBlocks] = (void*)resPtr; - resSizes[nbBlocks] = (adv->mode == BMK_DECODE_ONLY) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; + resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; srcPtr += thisBlockSize; - cPtr += cSizes[nbBlocks]; //blockTable[nbBlocks].cRoom; + cPtr += cSizes[nbBlocks]; resPtr += thisBlockSize; remaining -= thisBlockSize; } @@ -478,26 +478,29 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, } /* warmimg up memory */ - if (adv->mode == BMK_DECODE_ONLY) { + if (adv->mode == BMK_decodeOnly) { memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); } else { RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); } /* Bench */ - - //TODO: Make sure w/o new loop decode_only code isn't run - //TODO: Support nbLoops and nbSeconds { - U64 const crcOrig = (adv->mode == BMK_DECODE_ONLY) ? 0 : XXH64(srcBuffer, srcSize, 0); + U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); # define NB_MARKS 4 const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; U32 markNb = 0; DISPLAYLEVEL(2, "\r%79s\r", ""); - if (adv->mode != BMK_DECODE_ONLY) { - BMK_initCCtxArgs cctxprep = { ctx, dictBuffer, dictBufferSize, cLevel, comprParams, adv }; + if (adv->mode != BMK_decodeOnly) { + BMK_initCCtxArgs cctxprep; BMK_customReturn_t compressionResults; + cctxprep.ctx = ctx; + cctxprep.dictBuffer = dictBuffer; + cctxprep.dictBufferSize = dictBufferSize; + cctxprep.cLevel = cLevel; + cctxprep.comprParams = comprParams; + cctxprep.adv = adv; /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); compressionResults = BMK_benchCustom("ZSTD_compress_generic", nbBlocks, @@ -524,11 +527,15 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, ratioAccuracy, ratio, cSpeedAccuracy, compressionSpeed); } - } /* if (adv->mode != BMK_DECODE_ONLY) */ - { - BMK_initDCtxArgs dctxprep = { dctx, dictBuffer, dictBufferSize }; + } /* if (adv->mode != BMK_decodeOnly) */ + + if(adv->mode != BMK_compressOnly) { + BMK_initDCtxArgs dctxprep; BMK_customReturn_t decompressionResults; + dctxprep.dctx = dctx; + dctxprep.dictBuffer = dictBuffer; + dctxprep.dictBufferSize = dictBufferSize; decompressionResults = BMK_benchCustom("ZSTD_decompress_generic", nbBlocks, (const void * const *)cPtrs, cSizes, resPtrs, resSizes, &local_initDCtx, &local_defaultDecompress, @@ -556,7 +563,8 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, /* CRC Checking */ { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); - if ((adv->mode != BMK_DECODE_ONLY) && (crcOrig!=crcCheck)) { + /* adv->mode == 0 -> compress + decompress */ + if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) { size_t u; DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); for (u=0; u 1) ? mfName : fileNamesTable[0]; res.result.results = (BMK_result_t**)malloc(sizeof(BMK_result_t*)); - BMK_returnPtr_t errorOrPtr = BMK_benchCLevel(srcBuffer, benchedSize, + errorOrPtr = BMK_benchCLevel(srcBuffer, benchedSize, fileSizes, nbFiles, cLevel, cLevelLast, compressionParams, dictBuffer, dictBufferSize, @@ -877,7 +891,8 @@ static BMK_returnSet_t BMK_syntheticTest(int cLevel, int cLevelLast, double comp size_t benchedSize = 10000000; void* const srcBuffer = malloc(benchedSize); BMK_returnSet_t res; - res.result.results = malloc(sizeof(BMK_result_t*)); + BMK_returnPtr_t errPtr; + res.result.results = (BMK_result_t**)calloc(1,sizeof(BMK_result_t*)); res.result.nbFiles = 1; res.result.cLevel = cLevel; res.result.cLevelLast = cLevelLast; @@ -889,7 +904,7 @@ static BMK_returnSet_t BMK_syntheticTest(int cLevel, int cLevelLast, double comp /* Bench */ snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - BMK_returnPtr_t errPtr = BMK_benchCLevel(srcBuffer, benchedSize, + errPtr = BMK_benchCLevel(srcBuffer, benchedSize, &benchedSize, 1, cLevel, cLevelLast, compressionParams, NULL, 0, @@ -901,7 +916,7 @@ static BMK_returnSet_t BMK_syntheticTest(int cLevel, int cLevelLast, double comp res.result.results[0] = errPtr.result; /* clean up */ - free(srcBuffer); + free((void*)srcBuffer); res.error = 0; return res; } @@ -947,7 +962,8 @@ BMK_result_t BMK_getResult(BMK_resultSet_t resultSet, unsigned fileIdx, int cLev void BMK_freeResultSet(BMK_resultSet_t src) { unsigned i; - for(i = 0; i <= src.nbFiles; i++) { + if(src.results == NULL) { return; } + for(i = 0; i < src.nbFiles; i++) { free(src.results[i]); } free(src.results); diff --git a/programs/bench.h b/programs/bench.h index ad2682e9a..67430f33d 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -19,11 +19,16 @@ extern "C" { #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ #include "zstd.h" /* ZSTD_compressionParameters */ -#define BMK_COMPRESS_ONLY 2 -#define BMK_DECODE_ONLY 1 +typedef enum { + BMK_timeMode = 0, + BMK_iterMode = 1 +} BMK_loopMode_t; -#define TIME_MODE = 0 -#define ITER_MODE = 1 +typedef enum { + BMK_both = 0, + BMK_decodeOnly = 1, + BMK_compressOnly = 2 +} BMK_mode_t; #define ERROR_STRUCT(baseType, typeName) typedef struct { \ int error; \ @@ -55,8 +60,8 @@ ERROR_STRUCT(BMK_customResult_t, BMK_customReturn_t); /* want all 0 to be default, but wb ldmBucketSizeLog/ldmHashEveryLog */ typedef struct { - unsigned mode; /* 0: all, 1: compress only 2: decode only */ - int loopMode; /* if loopmode, then nbSeconds = nbLoops */ + BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */ + BMK_loopMode_t loopMode; /* if loopmode, then nbSeconds = nbLoops */ unsigned nbSeconds; /* default timing is in nbSeconds. If nbCycles != 0 then use that */ size_t blockSize; /* Maximum allowable size of a block*/ unsigned nbWorkers; /* multithreading */ @@ -91,8 +96,8 @@ BMK_advancedParams_t BMK_defaultAdvancedParams(void); * .result will contain the speed (B/s) and time per loop (ns) */ BMK_customReturn_t BMK_benchCustom(const char* functionName, size_t blockCount, - const void* const * const srcBuffers, size_t* srcSizes, - void* const * const dstBuffers, size_t* dstSizes, + const void* const * const srcBuffers, const size_t* srcSizes, + void* const * const dstBuffers, const size_t* dstSizes, size_t (*initFn)(void*), size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* initPayload, void* benchPayload, unsigned mode, unsigned iter, diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 73fb52169..8b31a98b1 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -608,7 +608,7 @@ int main(int argCount, const char* argv[]) /* Decoding */ case 'd': #ifndef ZSTD_NOBENCH - adv.mode = BMK_DECODE_ONLY; + adv.mode = BMK_decodeOnly; if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */ #endif operation=zom_decompress; argument++; break; @@ -816,7 +816,7 @@ int main(int argCount, const char* argv[]) if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) { adv.ldmHashEveryLog = g_ldmHashEveryLog; } - BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, g_displayLevel, &adv); + BMK_freeResultSet(BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, g_displayLevel, &adv).result); #else (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; #endif diff --git a/tests/Makefile b/tests/Makefile index c1482bc9c..0db60f1b1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -133,7 +133,7 @@ fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD) fullbench fullbench32 : DEBUGFLAGS = # turn off assert() for speed measurements fullbench fullbench32 : $(ZSTD_FILES) -fullbench fullbench32 : $(PRGDIR)/datagen.c fullbench.c +fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/bench.c fullbench.c $(CC) $(FLAGS) $^ -o $@$(EXT) fullbench-lib : zstd-staticLib diff --git a/tests/fullbench.c b/tests/fullbench.c index 6abdd4da0..2dee3db94 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -30,6 +30,7 @@ #include "zstd.h" /* ZSTD_versionString */ #include "util.h" /* time functions */ #include "datagen.h" +#include "bench.h" /* CustomBench*/ /*_************************************ @@ -93,14 +94,19 @@ static size_t BMK_findMaxMem(U64 requiredMem) /*_******************************************************* * Benchmark wrappers *********************************************************/ -size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +size_t local_nothing(void* x) { + (void)x; + return 0; +} + +size_t local_ZSTD_compress(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { (void)buff2; return ZSTD_compress(dst, dstSize, src, srcSize, 1); } static size_t g_cSize = 0; -size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_decompress(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { (void)src; (void)srcSize; return ZSTD_decompress(dst, dstSize, buff2, g_cSize); @@ -110,14 +116,14 @@ static ZSTD_DCtx* g_zdc = NULL; #ifndef ZSTD_DLL_IMPORT extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); -size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { (void)src; (void)srcSize; (void)dst; (void)dstSize; return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_zdc, buff2, g_cSize); } extern size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeq, const void* src, size_t srcSize); -size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { int nbSeq; (void)src; (void)srcSize; (void)dst; (void)dstSize; @@ -126,7 +132,7 @@ size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const #endif static ZSTD_CStream* g_cstream= NULL; -size_t local_ZSTD_compressStream(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_compressStream(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -143,7 +149,7 @@ size_t local_ZSTD_compressStream(void* dst, size_t dstCapacity, void* buff2, con return buffOut.pos; } -static size_t local_ZSTD_compress_generic_end(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_compress_generic_end(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -159,7 +165,7 @@ static size_t local_ZSTD_compress_generic_end(void* dst, size_t dstCapacity, voi return buffOut.pos; } -static size_t local_ZSTD_compress_generic_continue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_compress_generic_continue(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -176,7 +182,7 @@ static size_t local_ZSTD_compress_generic_continue(void* dst, size_t dstCapacity return buffOut.pos; } -static size_t local_ZSTD_compress_generic_T2_end(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -193,7 +199,7 @@ static size_t local_ZSTD_compress_generic_T2_end(void* dst, size_t dstCapacity, return buffOut.pos; } -static size_t local_ZSTD_compress_generic_T2_continue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_compress_generic_T2_continue(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -212,7 +218,7 @@ static size_t local_ZSTD_compress_generic_T2_continue(void* dst, size_t dstCapac } static ZSTD_DStream* g_dstream= NULL; -static size_t local_ZSTD_decompressStream(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_decompressStream(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -231,7 +237,7 @@ static size_t local_ZSTD_decompressStream(void* dst, size_t dstCapacity, void* b static ZSTD_CCtx* g_zcc = NULL; #ifndef ZSTD_DLL_IMPORT -size_t local_ZSTD_compressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_compressContinue(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { (void)buff2; ZSTD_compressBegin(g_zcc, 1 /* compressionLevel */); @@ -239,7 +245,7 @@ size_t local_ZSTD_compressContinue(void* dst, size_t dstCapacity, void* buff2, c } #define FIRST_BLOCK_SIZE 8 -size_t local_ZSTD_compressContinue_extDict(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { BYTE firstBlockBuf[FIRST_BLOCK_SIZE]; @@ -255,7 +261,7 @@ size_t local_ZSTD_compressContinue_extDict(void* dst, size_t dstCapacity, void* return ZSTD_compressEnd(g_zcc, dst, dstCapacity, (const BYTE*)src + FIRST_BLOCK_SIZE, srcSize - FIRST_BLOCK_SIZE); } -size_t local_ZSTD_decompressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_decompressContinue(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { size_t regeneratedSize = 0; const BYTE* ip = (const BYTE*)buff2; @@ -288,8 +294,7 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) size_t const dstBuffSize = ZSTD_compressBound(srcSize); void* buff2; const char* benchName; - size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize); - double bestTime = 100000000.; + size_t (*benchFunction)(const void* src, size_t srcSize, void* dst, size_t dstSize, void* verifBuff); /* Selection */ switch(benchNb) @@ -419,46 +424,23 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) default : ; } + /* warming up memory */ { size_t i; for (i=0; i %s !! \n", benchName, ZSTD_getErrorName(benchResult)); - exit(1); - } } - { U64 const clockSpanNano = UTIL_clockSpanNano(clockStart); - double const averageTime = (double)clockSpanNano / TIME_SEC_NANOSEC / nbRounds; - if (clockSpanNano > 0) { - if (averageTime < bestTime) bestTime = averageTime; - assert(bestTime > (1./2000000000)); - nbRounds = (U32)(1. / bestTime); /* aim for 1 sec */ - DISPLAY("%2i- %-30.30s : %7.1f MB/s (%9u)\r", - loopNb, benchName, - (double)srcSize / (1 MB) / bestTime, - (U32)benchResult); - } else { - assert(nbRounds < 40000000); /* avoid overflow */ - nbRounds *= 100; - } - } } } - DISPLAY("%2u\n", benchNb); + { + BMK_customReturn_t r = BMK_benchCustom(benchName, 1, &src, &srcSize, (void * const * const)&dstBuff, &dstBuffSize, &local_nothing, benchFunction, + NULL, buff2, BMK_timeMode, 1, 2); + if(r.error) { + DISPLAY("ERROR %d ! ! \n", r.error); + exit(1); + } + DISPLAY("%2u#Speed: %f MB/s - Size: %f MB\n", benchNb, (double)srcSize / r.result.time * 1000, (double)r.result.size / 1000000); + } + _cleanOut: free(dstBuff); free(buff2); From 0d1ee22990a4f989d8991cb3aced85bbaca62d11 Mon Sep 17 00:00:00 2001 From: George Lu Date: Fri, 15 Jun 2018 16:21:08 -0400 Subject: [PATCH 04/10] Requested Changes Add Comment Simplify Interface (Remove resultSet) Reorder Arguments Remove customBench displayLevel Reorder bench.h Change benchFiles return type to match advanced Rename stuff --- programs/bench.c | 231 ++++++++++++++++++--------------------------- programs/bench.h | 150 +++++++++++++++-------------- programs/zstdcli.c | 16 +++- tests/fullbench.c | 8 +- 4 files changed, 188 insertions(+), 217 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index ff0403ab1..6d0e1c1ff 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -110,13 +110,25 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; return r; \ } +/* error without displaying */ +#define EXM_THROW_ND(errorNum, retType, ...) { \ + retType r; \ + memset(&r, 0, sizeof(retType)); \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DEBUGOUTPUT("Error %i : ", errorNum); \ + DEBUGOUTPUT(__VA_ARGS__); \ + DEBUGOUTPUT(" \n"); \ + r.error = errorNum; \ + return r; \ +} + /* ************************************* * Benchmark Parameters ***************************************/ #define BMK_LDM_PARAM_NOTSET 9999 -BMK_advancedParams_t BMK_defaultAdvancedParams(void) { +BMK_advancedParams_t BMK_initAdvancedParams(void) { BMK_advancedParams_t res = { BMK_both, /* mode */ BMK_timeMode, /* loopMode */ @@ -124,7 +136,6 @@ BMK_advancedParams_t BMK_defaultAdvancedParams(void) { 0, /* blockSize */ 0, /* nbWorkers */ 0, /* realTime */ - 1, /* separateFiles */ 0, /* additionalParam */ 0, /* ldmFlag */ 0, /* ldmMinMatch */ @@ -239,7 +250,7 @@ static size_t local_defaultCompress( return out.pos; } -/* addiional argument is just the context */ +/* additional argument is just the context */ static size_t local_defaultDecompress( const void* srcBuffer, size_t srcSize, void* dstBuffer, size_t dstSize, @@ -269,14 +280,13 @@ static size_t local_defaultDecompress( /* initFn will be measured once, bench fn will be measured x times */ /* benchFn should return error value or out Size */ /* takes # of blocks and list of size & stuff for each. */ -BMK_customReturn_t BMK_benchCustom( - const char* functionName, size_t blockCount, - const void* const * const srcBuffers, const size_t* srcSizes, - void* const * const dstBuffers, const size_t* dstSizes, - size_t (*initFn)(void*), size_t (*benchFn)(const void*, size_t, void*, size_t, void*), - void* initPayload, void* benchPayload, - unsigned mode, unsigned iter, - int displayLevel) { +BMK_customReturn_t BMK_benchFunction( + size_t blockCount, + const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, + void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, + size_t (*initFn)(void*), void* initPayload, + size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, + unsigned mode, unsigned iter) { size_t srcSize = 0, dstSize = 0, ind = 0; unsigned toAdd = 1; @@ -287,24 +297,22 @@ BMK_customReturn_t BMK_benchCustom( { unsigned i; for(i = 0; i < blockCount; i++) { - memset(dstBuffers[i], 0xE5, dstSizes[i]); /* warm up and erase result buffer */ + memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ } UTIL_sleepMilli(5); /* give processor time to other processes */ UTIL_waitForNextTick(); } - /* display last 17 char's of functionName*/ - if (strlen(functionName)>17) functionName += strlen(functionName)-17; if(!iter) { if(mode == BMK_iterMode) { - EXM_THROW(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); + EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); } } for(ind = 0; ind < blockCount; ind++) { - srcSize += srcSizes[ind]; + srcSize += srcBlockSizes[ind]; } switch(mode) { @@ -318,13 +326,13 @@ BMK_customReturn_t BMK_benchCustom( unsigned i, j; /* Overheat protection */ if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { - DISPLAYLEVEL(2, "\rcooling down ... \r"); + DEBUGOUTPUT("\rcooling down ... \r"); UTIL_sleep(COOLPERIOD_SEC); coolTime = UTIL_getTime(); } for(i = 0; i < blockCount; i++) { - memset(dstBuffers[i], 0xD6, dstSizes[i]); /* warm up and erase result buffer */ + memset(dstBlockBuffers[i], 0xD6, dstBlockCapacities[i]); /* warm up and erase result buffer */ } clockStart = UTIL_getTime(); @@ -332,10 +340,10 @@ BMK_customReturn_t BMK_benchCustom( for(i = 0; i < nbLoops; i++) { for(j = 0; j < blockCount; j++) { - size_t res = (*benchFn)(srcBuffers[j], srcSizes[j], dstBuffers[j], dstSizes[j], benchPayload); + size_t res = (*benchFn)(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); if(ZSTD_isError(res)) { - EXM_THROW(2, BMK_customReturn_t, "%s() failed on block %u of size %u : %s \n", - functionName, j, (U32)dstSizes[j], ZSTD_getErrorName(res)); + EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", + j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); } else if (toAdd) { dstSize += res; } @@ -362,10 +370,10 @@ BMK_customReturn_t BMK_benchCustom( clockStart = UTIL_getTime(); for(i = 0; i < iter; i++) { for(j = 0; j < blockCount; j++) { - size_t res = (*benchFn)(srcBuffers[j], srcSizes[j], dstBuffers[j], dstSizes[j], benchPayload); + size_t res = (*benchFn)(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); if(ZSTD_isError(res)) { - EXM_THROW(2, BMK_customReturn_t, "%s() failed on block %u of size %u : %s \n", - functionName, j, (U32)dstSizes[j], ZSTD_getErrorName(res)); + EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", + j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); } else if(toAdd) { dstSize += res; } @@ -374,18 +382,18 @@ BMK_customReturn_t BMK_benchCustom( } totalTime = UTIL_clockSpanNano(clockStart); if(!totalTime) { - EXM_THROW(3, BMK_customReturn_t, "Cycle count (%u) too short to measure \n", iter); + EXM_THROW_ND(3, BMK_customReturn_t, "Cycle count (%u) too short to measure \n", iter); } else { fastest = totalTime / iter; } break; } default: - EXM_THROW(4, BMK_customReturn_t, "Unknown Mode \n"); + EXM_THROW_ND(4, BMK_customReturn_t, "Unknown Mode \n"); } retval.error = 0; - retval.result.time = fastest; - retval.result.size = dstSize; + retval.result.nanoSecPerRun = fastest; + retval.result.sumOfReturn = dstSize; return retval; } @@ -503,23 +511,23 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, cctxprep.adv = adv; /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); - compressionResults = BMK_benchCustom("ZSTD_compress_generic", nbBlocks, + compressionResults = BMK_benchFunction(nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, - &local_initCCtx, &local_defaultCompress, - (void*)&cctxprep, (void*)(ctx), - adv->loopMode, adv->nbSeconds, displayLevel); + &local_initCCtx, (void*)&cctxprep, + &local_defaultCompress, (void*)(ctx), + adv->loopMode, adv->nbSeconds); if(compressionResults.error) { results.error = compressionResults.error; return results; } - results.result.cSize = compressionResults.result.size; + results.result.cSize = compressionResults.result.sumOfReturn; ratio = (double)srcSize / (double)results.result.cSize; markNb = (markNb+1) % NB_MARKS; { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = ((double)srcSize / compressionResults.result.time) * 1000; + double const compressionSpeed = ((double)srcSize / compressionResults.result.nanoSecPerRun) * 1000; int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; results.result.cSpeed = compressionSpeed * 1000000; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", @@ -536,11 +544,11 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, dctxprep.dctx = dctx; dctxprep.dictBuffer = dictBuffer; dctxprep.dictBufferSize = dictBufferSize; - decompressionResults = BMK_benchCustom("ZSTD_decompress_generic", nbBlocks, + decompressionResults = BMK_benchFunction(nbBlocks, (const void * const *)cPtrs, cSizes, resPtrs, resSizes, - &local_initDCtx, &local_defaultDecompress, - (void*)&dctxprep, (void*)(dctx), - adv->loopMode, adv->nbSeconds, displayLevel); + &local_initDCtx, (void*)&dctxprep, + &local_defaultDecompress, (void*)(dctx), + adv->loopMode, adv->nbSeconds); if(decompressionResults.error) { results.error = decompressionResults.error; @@ -551,7 +559,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; double const compressionSpeed = results.result.cSpeed / 1000000; int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / decompressionResults.result.time) * 1000; + double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000; results.result.dSpeed = decompressionSpeed * 1000000; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, @@ -634,7 +642,7 @@ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, int displayLevel, const char* displayName) { - const BMK_advancedParams_t adv = BMK_defaultAdvancedParams(); + const BMK_advancedParams_t adv = BMK_initAdvancedParams(); return BMK_benchMemAdvanced(srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, @@ -685,31 +693,20 @@ static size_t BMK_findMaxMem(U64 requiredMem) return (size_t)(requiredMem); } -ERROR_STRUCT(BMK_result_t*, BMK_returnPtr_t); - -/* returns average stats over all range [cLevel, cLevelLast] */ -static BMK_returnPtr_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, +static BMK_return_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, const size_t* fileSizes, unsigned nbFiles, - const int cLevel, const int cLevelLast, const ZSTD_compressionParameters* comprParams, + const int cLevel, const ZSTD_compressionParameters* comprParams, const void* dictBuffer, size_t dictBufferSize, int displayLevel, const char* displayName, BMK_advancedParams_t const * const adv) { - int l; - BMK_result_t* res = (BMK_result_t*)malloc(sizeof(BMK_result_t) * (cLevelLast - cLevel + 1)); - BMK_returnPtr_t ret; + BMK_return_t res; const char* pch = strrchr(displayName, '\\'); /* Windows */ - ret.error = 0; - ret.result = res; - if (!pch) pch = strrchr(displayName, '/'); /* Linux */ if (pch) displayName = pch+1; - if(res == NULL) { - EXM_THROW(12, BMK_returnPtr_t, "not enough memory\n"); - } if (adv->realTime) { DISPLAYLEVEL(2, "Note : switching to real-time priority \n"); SET_REALTIME_PRIORITY; @@ -718,23 +715,14 @@ static BMK_returnPtr_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize if (displayLevel == 1 && !adv->additionalParam) DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, adv->nbSeconds, (U32)(adv->blockSize>>10)); - for (l=cLevel; l <= cLevelLast; l++) { - BMK_return_t rettmp; - if (l==0) continue; /* skip level 0 */ - rettmp = BMK_benchMemCtxless(srcBuffer, benchedSize, - fileSizes, nbFiles, - l, comprParams, - dictBuffer, dictBufferSize, - displayLevel, displayName, - adv); - if(rettmp.error) { - ret.error = rettmp.error; - return ret; - } - res[l-cLevel] = rettmp.result; - } + res = BMK_benchMemCtxless(srcBuffer, benchedSize, + fileSizes, nbFiles, + cLevel, comprParams, + dictBuffer, dictBufferSize, + displayLevel, displayName, + adv); - return ret; + return res; } @@ -776,8 +764,8 @@ static int BMK_loadFiles(void* buffer, size_t bufferSize, return 0; } -static BMK_returnSet_t BMK_benchFileTable(const char* const * const fileNamesTable, unsigned const nbFiles, - const char* const dictFileName, int const cLevel, int const cLevelLast, +static BMK_return_t BMK_benchFileTable(const char* const * const fileNamesTable, unsigned const nbFiles, + const char* const dictFileName, int const cLevel, const ZSTD_compressionParameters* const compressionParams, int displayLevel, const BMK_advancedParams_t * const adv) { @@ -786,23 +774,20 @@ static BMK_returnSet_t BMK_benchFileTable(const char* const * const fileNamesTab void* dictBuffer = NULL; size_t dictBufferSize = 0; size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t)); - BMK_returnSet_t res; + BMK_return_t res; U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); - res.result.cLevel = cLevel; - res.result.cLevelLast = cLevelLast; - res.result.results = NULL; - if (!fileSizes) EXM_THROW(12, BMK_returnSet_t, "not enough memory for fileSizes"); + if (!fileSizes) EXM_THROW(12, BMK_return_t, "not enough memory for fileSizes"); /* Load dictionary */ if (dictFileName != NULL) { U64 const dictFileSize = UTIL_getFileSize(dictFileName); if (dictFileSize > 64 MB) - EXM_THROW(10, BMK_returnSet_t, "dictionary file %s too large", dictFileName); + EXM_THROW(10, BMK_return_t, "dictionary file %s too large", dictFileName); dictBufferSize = (size_t)dictFileSize; dictBuffer = malloc(dictBufferSize); if (dictBuffer==NULL) - EXM_THROW(11, BMK_returnSet_t, "not enough memory for dictionary (%u bytes)", + EXM_THROW(11, BMK_return_t, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); { int errorCode = BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1, displayLevel); @@ -819,7 +804,7 @@ static BMK_returnSet_t BMK_benchFileTable(const char* const * const fileNamesTab if (benchedSize < totalSizeToLoad) DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); srcBuffer = malloc(benchedSize); - if (!srcBuffer) EXM_THROW(12, BMK_returnSet_t, "not enough memory"); + if (!srcBuffer) EXM_THROW(12, BMK_return_t, "not enough memory"); /* Load input buffer */ { @@ -830,17 +815,18 @@ static BMK_returnSet_t BMK_benchFileTable(const char* const * const fileNamesTab } } /* Bench */ + /* if (adv->separateFiles) { const BYTE* srcPtr = (const BYTE*)srcBuffer; U32 fileNb; res.result.results = (BMK_result_t**)malloc(sizeof(BMK_result_t*) * nbFiles); res.result.nbFiles = nbFiles; - if(res.result.results == NULL) EXM_THROW(12, BMK_returnSet_t, "not enough memory"); + if(res.result.results == NULL) EXM_THROW(12, BMK_return_t, "not enough memory"); for (fileNb=0; fileNb 1) ? mfName : fileNamesTable[0]; - res.result.results = (BMK_result_t**)malloc(sizeof(BMK_result_t*)); - errorOrPtr = BMK_benchCLevel(srcBuffer, benchedSize, + res = BMK_benchCLevel(srcBuffer, benchedSize, fileSizes, nbFiles, - cLevel, cLevelLast, compressionParams, + cLevel, compressionParams, dictBuffer, dictBufferSize, displayLevel, displayName, adv); - if(res.result.results == NULL) EXM_THROW(12, BMK_returnSet_t, "not enough memory"); - if(errorOrPtr.error) { - res.error = errorOrPtr.error; - return res; - } - res.result.results[0] = errorOrPtr.result; } } /* clean up */ free(srcBuffer); free(dictBuffer); free(fileSizes); - res.error = 0; return res; } -static BMK_returnSet_t BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, +static BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, const ZSTD_compressionParameters* compressionParams, int displayLevel, const BMK_advancedParams_t * const adv) { char name[20] = {0}; size_t benchedSize = 10000000; void* const srcBuffer = malloc(benchedSize); - BMK_returnSet_t res; - BMK_returnPtr_t errPtr; - res.result.results = (BMK_result_t**)calloc(1,sizeof(BMK_result_t*)); - res.result.nbFiles = 1; - res.result.cLevel = cLevel; - res.result.cLevelLast = cLevelLast; + BMK_return_t res; /* Memory allocation */ - if (!srcBuffer || !res.result.results) EXM_THROW(21, BMK_returnSet_t, "not enough memory"); + if (!srcBuffer) EXM_THROW(21, BMK_return_t, "not enough memory"); /* Fill input buffer */ RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); /* Bench */ snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - errPtr = BMK_benchCLevel(srcBuffer, benchedSize, + res = BMK_benchCLevel(srcBuffer, benchedSize, &benchedSize, 1, - cLevel, cLevelLast, compressionParams, + cLevel, compressionParams, NULL, 0, displayLevel, name, adv); - if(errPtr.error) { - res.error = errPtr.error; - return res; - } - res.result.results[0] = errPtr.result; /* clean up */ free((void*)srcBuffer); - res.error = 0; + return res; } -BMK_returnSet_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFiles, +BMK_return_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, - int cLevel, int cLevelLast, - const ZSTD_compressionParameters* compressionParams, + int cLevel, const ZSTD_compressionParameters* compressionParams, int displayLevel, const BMK_advancedParams_t * const adv) { double const compressibility = (double)g_compressibilityDefault / 100; if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); - if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); + /* if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); if (cLevelLast < cLevel) cLevelLast = cLevel; - if (cLevelLast > cLevel) - DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + if (cLevelLast > cLevel) + DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); */ if (nbFiles == 0) { - return BMK_syntheticTest(cLevel, cLevelLast, compressibility, compressionParams, displayLevel, adv); + return BMK_syntheticTest(cLevel, compressibility, compressionParams, displayLevel, adv); } else { - return BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, displayLevel, adv); + return BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, adv); } } -int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, +BMK_return_t BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, - int cLevel, int cLevelLast, - const ZSTD_compressionParameters* compressionParams, + int cLevel, const ZSTD_compressionParameters* compressionParams, int displayLevel) { - const BMK_advancedParams_t adv = BMK_defaultAdvancedParams(); - return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, displayLevel, &adv).error; -} - -/* errorable or just return? */ -BMK_result_t BMK_getResult(BMK_resultSet_t resultSet, unsigned fileIdx, int cLevel) { - assert(resultSet.nbFiles > fileIdx); - assert(resultSet.cLevel <= cLevel && cLevel <= resultSet.cLevelLast); - return resultSet.results[fileIdx][cLevel - resultSet.cLevel]; -} - -void BMK_freeResultSet(BMK_resultSet_t src) { - unsigned i; - if(src.results == NULL) { return; } - for(i = 0; i < src.nbFiles; i++) { - free(src.results[i]); - } - free(src.results); + const BMK_advancedParams_t adv = BMK_initAdvancedParams(); + return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv); } diff --git a/programs/bench.h b/programs/bench.h index 67430f33d..352166881 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -19,17 +19,12 @@ extern "C" { #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ #include "zstd.h" /* ZSTD_compressionParameters */ -typedef enum { - BMK_timeMode = 0, - BMK_iterMode = 1 -} BMK_loopMode_t; - -typedef enum { - BMK_both = 0, - BMK_decodeOnly = 1, - BMK_compressOnly = 2 -} BMK_mode_t; - +/* Creates a struct of type typeName with an int type .error field + * and a .result field of some baseType. Functions with return + * typeName pass a successful result with .error = 0 and .result + * with the intended result, while returning an error will result + * in .error != 0. + */ #define ERROR_STRUCT(baseType, typeName) typedef struct { \ int error; \ baseType result; \ @@ -42,49 +37,50 @@ typedef struct { } BMK_result_t; typedef struct { - int cLevel; - int cLevelLast; - unsigned nbFiles; - BMK_result_t** results; -} BMK_resultSet_t; - -typedef struct { - size_t size; - U64 time; + size_t sumOfReturn; /* sum of return values */ + U64 nanoSecPerRun; /* time per iteration */ } BMK_customResult_t; ERROR_STRUCT(BMK_result_t, BMK_return_t); -ERROR_STRUCT(BMK_resultSet_t, BMK_returnSet_t); ERROR_STRUCT(BMK_customResult_t, BMK_customReturn_t); -/* want all 0 to be default, but wb ldmBucketSizeLog/ldmHashEveryLog */ +typedef enum { + BMK_timeMode = 0, + BMK_iterMode = 1 +} BMK_loopMode_t; + +typedef enum { + BMK_both = 0, + BMK_decodeOnly = 1, + BMK_compressOnly = 2 +} BMK_mode_t; + typedef struct { BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */ BMK_loopMode_t loopMode; /* if loopmode, then nbSeconds = nbLoops */ - unsigned nbSeconds; /* default timing is in nbSeconds. If nbCycles != 0 then use that */ + unsigned nbSeconds; /* default timing is in nbSeconds */ size_t blockSize; /* Maximum allowable size of a block*/ unsigned nbWorkers; /* multithreading */ - unsigned realTime; - unsigned separateFiles; - int additionalParam; - unsigned ldmFlag; - unsigned ldmMinMatch; - unsigned ldmHashLog; + unsigned realTime; /* real time priority */ + int additionalParam; /* used by python speed benchmark */ + unsigned ldmFlag; /* enables long distance matching */ + unsigned ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md for meaning */ + unsigned ldmHashLog; unsigned ldmBucketSizeLog; unsigned ldmHashEveryLog; } BMK_advancedParams_t; /* returns default parameters used by nonAdvanced functions */ -BMK_advancedParams_t BMK_defaultAdvancedParams(void); +BMK_advancedParams_t BMK_initAdvancedParams(void); -/* functionName - name of function - * blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstSizes) - * initFn - (*initFn)(initPayload) is run once per benchmark - * benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstSizes[i], benchPayload) - * is run a variable number of times, specified by mode and iter args - * mode - if 0, iter will be interpreted as the minimum number of seconds to run - * iter - see mode +/* called in cli */ +/* fileNamesTable - name of files to benchmark + * nbFiles - number of files (size of fileNamesTable) + * dictFileName - name of dictionary file to load + * cLevel - lowest compression level to benchmark + * cLevellast - highest compression level to benchmark (everything in the range [cLevel, cLevellast]) will be benchmarked + * compressionParams - basic compression Parameters * displayLevel - what gets printed * 0 : no display; * 1 : errors; @@ -92,16 +88,20 @@ BMK_advancedParams_t BMK_defaultAdvancedParams(void); * 3 : + progression; * 4 : + information * return - * .error will give a nonzero value if any error has occured - * .result will contain the speed (B/s) and time per loop (ns) + * .error will give a nonzero error value if an error has occured + * .result - if .error = 0, .result will return the time taken to compression speed + * (.cSpeed), decompression speed (.dSpeed), and copmressed size (.cSize) of the original + * file */ -BMK_customReturn_t BMK_benchCustom(const char* functionName, size_t blockCount, - const void* const * const srcBuffers, const size_t* srcSizes, - void* const * const dstBuffers, const size_t* dstSizes, - size_t (*initFn)(void*), size_t (*benchFn)(const void*, size_t, void*, size_t, void*), - void* initPayload, void* benchPayload, - unsigned mode, unsigned iter, - int displayLevel); +BMK_return_t BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, + int cLevel, const ZSTD_compressionParameters* compressionParams, + int displayLevel); + +/* See benchFiles for normal parameter uses and return, see advancedParams_t for adv */ +BMK_return_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFiles, + const char* dictFileName, + int cLevel, const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t* const adv); /* basic benchmarking function, called in paramgrill ctx, dctx must be provided */ /* srcBuffer - data source, expected to be valid compressed data if in Decode Only Mode @@ -112,8 +112,12 @@ BMK_customReturn_t BMK_benchCustom(const char* functionName, size_t blockCount, * dictBufferSize - size of dictBuffer, 0 otherwise * ctx - Compression Context * dctx - Decompression Context - * diplayLevel - see BMK_benchCustom - * displayName - name used in display + * diplayLevel - see BMK_benchFiles + * displayName - name used by display + * return + * .error will give a nonzero value if an error has occured + * .result - if .error = 0, will give the same results as benchFiles + * but for the data stored in srcBuffer */ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, @@ -122,6 +126,7 @@ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, int displayLevel, const char* displayName); +/* See benchMem for normal parameter uses and return, see advancedParams_t for adv */ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, const int cLevel, const ZSTD_compressionParameters* comprParams, @@ -130,29 +135,34 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, int displayLevel, const char* displayName, const BMK_advancedParams_t* adv); -/* called in cli */ -/* fileNamesTable - name of files to benchmark - * nbFiles - number of files (size of fileNamesTable) - * dictFileName - name of dictionary file to load - * cLevel - lowest compression level to benchmark - * cLevellast - highest compression level to benchmark (everything in the range [cLevel, cLevellast]) will be benchmarked - * compressionParams - basic compression Parameters - * displayLevel - see BMK_benchCustom +/* This function benchmarks the running time two functions (function specifics described */ + +/* blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstCapacities) + * srcBuffers - an array of buffers to be operated on by benchFn + * srcSizes - an array of the sizes of above buffers + * dstBuffers - an array of buffers to be written into by benchFn + * dstCapacities - an array of the capacities of above buffers. + * initFn - (*initFn)(initPayload) is run once per benchmark + * benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) + * is run a variable number of times, specified by mode and iter args + * mode - if 0, iter will be interpreted as the minimum number of seconds to run + * iter - see mode + * return + * .error will give a nonzero value if ZSTD_isError() is nonzero for any of the return + * of the calls to initFn and benchFn, or if benchFunction errors internally + * .result - if .error = 0, then .result will contain the sum of all return values of + * benchFn on the first iteration through all of the blocks (.sumOfReturn) and also + * the time per run of benchFn (.nanoSecPerRun). For the former, this + * is generally intended to be used on functions which return the # of bytes written + * into dstBuffer, hence this value will be the total amount of bytes written to + * dstBuffer. */ -int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, - int cLevel, int cLevelLast, const ZSTD_compressionParameters* compressionParams, - int displayLevel); - -BMK_returnSet_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, - int cLevel, int cLevelLast, - const ZSTD_compressionParameters* compressionParams, - int displayLevel, const BMK_advancedParams_t* adv); - -/* get data from resultSet */ -/* when aggregated (separateFiles = 0), just be getResult(r,0,cl) */ -BMK_result_t BMK_getResult(BMK_resultSet_t results, unsigned fileIdx, int cLevel); -void BMK_freeResultSet(BMK_resultSet_t src); +BMK_customReturn_t BMK_benchFunction(size_t blockCount, + const void* const * const srcBuffers, const size_t* srcSizes, + void* const * const dstBuffers, const size_t* dstCapacities, + size_t (*initFn)(void*), void* initPayload, + size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, + unsigned mode, unsigned iter); #endif /* BENCH_H_121279284357 */ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 8b31a98b1..61a43dc30 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -398,7 +398,7 @@ int main(int argCount, const char* argv[]) setRealTimePrio = 0, singleThread = 0, ultra=0; - BMK_advancedParams_t adv = BMK_defaultAdvancedParams(); + BMK_advancedParams_t adv = BMK_initAdvancedParams(); unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ size_t blockSize = 0; zstd_operation_mode operation = zom_compress; @@ -802,7 +802,6 @@ int main(int argCount, const char* argv[]) /* Check if benchmark is selected */ if (operation==zom_bench) { #ifndef ZSTD_NOBENCH - adv.separateFiles = separateFiles; adv.blockSize = blockSize; adv.nbWorkers = nbWorkers; adv.realTime = setRealTimePrio; @@ -816,7 +815,18 @@ int main(int argCount, const char* argv[]) if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) { adv.ldmHashEveryLog = g_ldmHashEveryLog; } - BMK_freeResultSet(BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, g_displayLevel, &adv).result); + + for(; cLevel <= cLevelLast; cLevel++) { + if(separateFiles) { + unsigned i; + for(i = 0; i < filenameIdx; i++) { + BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv); + } + } else { + BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv); + } + } + #else (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; #endif diff --git a/tests/fullbench.c b/tests/fullbench.c index 2dee3db94..b83eb1c77 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -431,14 +431,16 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) /* benchmark loop */ { - BMK_customReturn_t r = BMK_benchCustom(benchName, 1, &src, &srcSize, (void * const * const)&dstBuff, &dstBuffSize, &local_nothing, benchFunction, - NULL, buff2, BMK_timeMode, 1, 2); + BMK_customReturn_t r = BMK_benchFunction(1, &src, &srcSize, + (void * const * const)&dstBuff, &dstBuffSize, + &local_nothing, NULL, + benchFunction, buff2, BMK_timeMode, 1); if(r.error) { DISPLAY("ERROR %d ! ! \n", r.error); exit(1); } - DISPLAY("%2u#Speed: %f MB/s - Size: %f MB\n", benchNb, (double)srcSize / r.result.time * 1000, (double)r.result.size / 1000000); + DISPLAY("%2u#Speed: %f MB/s - Size: %f MB - %s\n", benchNb, (double)srcSize / r.result.nanoSecPerRun * 1000, (double)r.result.sumOfReturn / 1000000, benchName); } _cleanOut: From e482e328cddc9a654185d414bb1e1925ec47046d Mon Sep 17 00:00:00 2001 From: George Lu Date: Mon, 18 Jun 2018 12:08:51 -0700 Subject: [PATCH 05/10] Reorder Arguments make initFn nullable --- programs/bench.c | 21 ++++++++++++--------- programs/bench.h | 16 +++++++++------- tests/fullbench.c | 12 +++++------- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 6d0e1c1ff..fae4ea0fa 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -281,11 +281,11 @@ static size_t local_defaultDecompress( /* benchFn should return error value or out Size */ /* takes # of blocks and list of size & stuff for each. */ BMK_customReturn_t BMK_benchFunction( + size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, + size_t (*initFn)(void*), void* initPayload, size_t blockCount, const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, - size_t (*initFn)(void*), void* initPayload, - size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, unsigned mode, unsigned iter) { size_t srcSize = 0, dstSize = 0, ind = 0; unsigned toAdd = 1; @@ -336,7 +336,7 @@ BMK_customReturn_t BMK_benchFunction( } clockStart = UTIL_getTime(); - (*initFn)(initPayload); + if(initFn != NULL) { (*initFn)(initPayload); } for(i = 0; i < nbLoops; i++) { for(j = 0; j < blockCount; j++) { @@ -368,6 +368,7 @@ BMK_customReturn_t BMK_benchFunction( { unsigned i, j; clockStart = UTIL_getTime(); + if(initFn != NULL) { (*initFn)(initPayload); } for(i = 0; i < iter; i++) { for(j = 0; j < blockCount; j++) { size_t res = (*benchFn)(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); @@ -511,10 +512,11 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, cctxprep.adv = adv; /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); - compressionResults = BMK_benchFunction(nbBlocks, - srcPtrs, srcSizes, cPtrs, cSizes, - &local_initCCtx, (void*)&cctxprep, + compressionResults = BMK_benchFunction( &local_defaultCompress, (void*)(ctx), + &local_initCCtx, (void*)&cctxprep, + nbBlocks, + srcPtrs, srcSizes, cPtrs, cSizes, adv->loopMode, adv->nbSeconds); if(compressionResults.error) { @@ -544,10 +546,11 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, dctxprep.dctx = dctx; dctxprep.dictBuffer = dictBuffer; dctxprep.dictBufferSize = dictBufferSize; - decompressionResults = BMK_benchFunction(nbBlocks, - (const void * const *)cPtrs, cSizes, resPtrs, resSizes, - &local_initDCtx, (void*)&dctxprep, + decompressionResults = BMK_benchFunction( &local_defaultDecompress, (void*)(dctx), + &local_initDCtx, (void*)&dctxprep, + nbBlocks, + (const void * const *)cPtrs, cSizes, resPtrs, resSizes, adv->loopMode, adv->nbSeconds); if(decompressionResults.error) { diff --git a/programs/bench.h b/programs/bench.h index 352166881..2030f0a2f 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -137,14 +137,15 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, /* This function benchmarks the running time two functions (function specifics described */ -/* blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstCapacities) +/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) + * is run a variable number of times, specified by mode and iter args + * initFn - (*initFn)(initPayload) is run once per benchmark at the beginning. This argument can + * be NULL, in which case nothing is run. + * blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstCapacities) * srcBuffers - an array of buffers to be operated on by benchFn * srcSizes - an array of the sizes of above buffers * dstBuffers - an array of buffers to be written into by benchFn * dstCapacities - an array of the capacities of above buffers. - * initFn - (*initFn)(initPayload) is run once per benchmark - * benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) - * is run a variable number of times, specified by mode and iter args * mode - if 0, iter will be interpreted as the minimum number of seconds to run * iter - see mode * return @@ -157,11 +158,12 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, * into dstBuffer, hence this value will be the total amount of bytes written to * dstBuffer. */ -BMK_customReturn_t BMK_benchFunction(size_t blockCount, +BMK_customReturn_t BMK_benchFunction( + size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, + size_t (*initFn)(void*), void* initPayload, + size_t blockCount, const void* const * const srcBuffers, const size_t* srcSizes, void* const * const dstBuffers, const size_t* dstCapacities, - size_t (*initFn)(void*), void* initPayload, - size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, unsigned mode, unsigned iter); #endif /* BENCH_H_121279284357 */ diff --git a/tests/fullbench.c b/tests/fullbench.c index b83eb1c77..91a1370df 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -94,10 +94,6 @@ static size_t BMK_findMaxMem(U64 requiredMem) /*_******************************************************* * Benchmark wrappers *********************************************************/ -size_t local_nothing(void* x) { - (void)x; - return 0; -} size_t local_ZSTD_compress(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { @@ -431,10 +427,12 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) /* benchmark loop */ { - BMK_customReturn_t r = BMK_benchFunction(1, &src, &srcSize, + BMK_customReturn_t r = BMK_benchFunction( + benchFunction, buff2, + NULL, NULL, + 1, &src, &srcSize, (void * const * const)&dstBuff, &dstBuffSize, - &local_nothing, NULL, - benchFunction, buff2, BMK_timeMode, 1); + BMK_timeMode, 1); if(r.error) { DISPLAY("ERROR %d ! ! \n", r.error); exit(1); From a3c8b599901cd3e4a517c066f27c9196ea8c6176 Mon Sep 17 00:00:00 2001 From: George Lu Date: Mon, 18 Jun 2018 15:06:31 -0700 Subject: [PATCH 06/10] Fix cli no print Change looping behavior to match old --- programs/bench.c | 33 +++------------------------------ programs/bench.h | 16 ++++++++++------ programs/zstdcli.c | 26 ++++++++++++++++++-------- 3 files changed, 31 insertions(+), 44 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index fae4ea0fa..a9a8086b2 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -818,30 +818,6 @@ static BMK_return_t BMK_benchFileTable(const char* const * const fileNamesTable, } } /* Bench */ - /* - if (adv->separateFiles) { - const BYTE* srcPtr = (const BYTE*)srcBuffer; - U32 fileNb; - res.result.results = (BMK_result_t**)malloc(sizeof(BMK_result_t*) * nbFiles); - res.result.nbFiles = nbFiles; - if(res.result.results == NULL) EXM_THROW(12, BMK_return_t, "not enough memory"); - for (fileNb=0; fileNb ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); - /* if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); - if (cLevelLast < cLevel) cLevelLast = cLevel; - if (cLevelLast > cLevel) - DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); */ - + if (cLevel > ZSTD_maxCLevel()) { + EXM_THROW(15, BMK_return_t, "Invalid Compression Level"); + } if (nbFiles == 0) { return BMK_syntheticTest(cLevel, compressibility, compressionParams, displayLevel, adv); } diff --git a/programs/bench.h b/programs/bench.h index 2030f0a2f..d553e7b4f 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -75,11 +75,12 @@ typedef struct { BMK_advancedParams_t BMK_initAdvancedParams(void); /* called in cli */ +/* Loads files in fileNamesTable into memory, as well as a dictionary + * from dictFileName, and then uses benchMem */ /* fileNamesTable - name of files to benchmark * nbFiles - number of files (size of fileNamesTable) * dictFileName - name of dictionary file to load - * cLevel - lowest compression level to benchmark - * cLevellast - highest compression level to benchmark (everything in the range [cLevel, cLevellast]) will be benchmarked + * cLevel - compression level to benchmark, errors if invalid * compressionParams - basic compression Parameters * displayLevel - what gets printed * 0 : no display; @@ -103,15 +104,18 @@ BMK_return_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFile int cLevel, const ZSTD_compressionParameters* compressionParams, int displayLevel, const BMK_advancedParams_t* const adv); -/* basic benchmarking function, called in paramgrill ctx, dctx must be provided */ +/* basic benchmarking function, called in paramgrill + * applies ZSTD_compress_generic() and ZSTD_decompress_generic() on data in srcBuffer + * with specific compression parameters specified by other arguments using benchFunction + * (cLevel, comprParams + adv in advanced Mode) */ /* srcBuffer - data source, expected to be valid compressed data if in Decode Only Mode * srcSize - size of data in srcBuffer * cLevel - compression level * comprParams - basic compression parameters * dictBuffer - a dictionary if used, null otherwise * dictBufferSize - size of dictBuffer, 0 otherwise - * ctx - Compression Context - * dctx - Decompression Context + * ctx - Compression Context (must be provided) + * dctx - Decompression Context (must be provided) * diplayLevel - see BMK_benchFiles * displayName - name used by display * return @@ -135,7 +139,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, int displayLevel, const char* displayName, const BMK_advancedParams_t* adv); -/* This function benchmarks the running time two functions (function specifics described */ +/* This function times the execution of 2 argument functions, benchFn and initFn */ /* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) * is run a variable number of times, specified by mode and iter args diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 61a43dc30..63be9ef61 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -32,7 +32,7 @@ #include /* errno */ #include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */ #ifndef ZSTD_NOBENCH -# include "bench.h" /* BMK_benchFiles, BMK_SetNbSeconds */ +# include "bench.h" /* BMK_benchFiles */ #endif #ifndef ZSTD_NODICT # include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */ @@ -816,15 +816,25 @@ int main(int argCount, const char* argv[]) adv.ldmHashEveryLog = g_ldmHashEveryLog; } - for(; cLevel <= cLevelLast; cLevel++) { - if(separateFiles) { - unsigned i; - for(i = 0; i < filenameIdx; i++) { - BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv); + if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); + if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); + if (cLevelLast < cLevel) cLevelLast = cLevel; + if (cLevelLast > cLevel) + DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + + if(separateFiles) { + unsigned i; + for(i = 0; i < filenameIdx; i++) { + DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]); + int c; + for(c = cLevel; c <= cLevelLast; c++) { + BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &adv); } - } else { - BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv); } + } else { + for(; cLevel <= cLevelLast; cLevel++) { + BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv); + } } #else From a8eea99ebe00eb5bfcf7d3689faf55128931ad00 Mon Sep 17 00:00:00 2001 From: George Lu Date: Tue, 19 Jun 2018 10:58:22 -0700 Subject: [PATCH 07/10] Incremental Display + Fn Separations Seperate syntheticTest and fileTableTest (now renamed as benchFiles) Add incremental display to benchMem Change to only iterMode for benchFunction Make Synthetic test's compressibility configurable from cli (using -P#) --- programs/bench.c | 450 ++++++++++++++++++++++++++------------------- programs/bench.h | 40 ++-- programs/zstdcli.c | 39 +++- tests/fullbench.c | 16 +- 4 files changed, 326 insertions(+), 219 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index a9a8086b2..447f9feb9 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -64,9 +64,6 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); -/* remove this in the future? */ -static U32 g_compressibilityDefault = 50; - /* ************************************* * console display ***************************************/ @@ -276,22 +273,23 @@ static size_t local_defaultDecompress( } -/* mode 0 : iter = # seconds, else iter = # cycles */ /* initFn will be measured once, bench fn will be measured x times */ /* benchFn should return error value or out Size */ /* takes # of blocks and list of size & stuff for each. */ +/* only does iterations*/ +/* note time/iter could be zero if interval too short */ BMK_customReturn_t BMK_benchFunction( size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, size_t (*initFn)(void*), void* initPayload, size_t blockCount, const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, - unsigned mode, unsigned iter) { + unsigned iter) { size_t srcSize = 0, dstSize = 0, ind = 0; unsigned toAdd = 1; + U64 totalTime; BMK_customReturn_t retval; - U64 totalTime = 0, fastest = (U64)(-1LL); UTIL_time_t clockStart; { @@ -305,136 +303,60 @@ BMK_customReturn_t BMK_benchFunction( } if(!iter) { - if(mode == BMK_iterMode) { - EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); - } - + EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); } for(ind = 0; ind < blockCount; ind++) { srcSize += srcBlockSizes[ind]; } - switch(mode) { - case BMK_timeMode: - { - int completed = 0; - U64 const maxTime = (iter * TIMELOOP_NANOSEC) + 1; - unsigned nbLoops = 1; - UTIL_time_t coolTime = UTIL_getTime(); - while(!completed) { - unsigned i, j; - /* Overheat protection */ - if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { - DEBUGOUTPUT("\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - coolTime = UTIL_getTime(); + { + unsigned i, j; + clockStart = UTIL_getTime(); + if(initFn != NULL) { (*initFn)(initPayload); } + for(i = 0; i < iter; i++) { + for(j = 0; j < blockCount; j++) { + size_t res = (*benchFn)(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); + if(ZSTD_isError(res)) { + EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", + j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); + } else if(toAdd) { + dstSize += res; } - - for(i = 0; i < blockCount; i++) { - memset(dstBlockBuffers[i], 0xD6, dstBlockCapacities[i]); /* warm up and erase result buffer */ - } - - clockStart = UTIL_getTime(); - if(initFn != NULL) { (*initFn)(initPayload); } - - for(i = 0; i < nbLoops; i++) { - for(j = 0; j < blockCount; j++) { - size_t res = (*benchFn)(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); - if(ZSTD_isError(res)) { - EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", - j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); - } else if (toAdd) { - dstSize += res; - } - } - toAdd = 0; - } - { U64 const loopDuration = UTIL_clockSpanNano(clockStart); - if (loopDuration > 0) { - fastest = MIN(fastest, loopDuration / nbLoops); - nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; - } else { - assert(nbLoops < 40000000); /* avoid overflow */ - nbLoops *= 100; - } - totalTime += loopDuration; - completed = (totalTime >= maxTime); - } } - break; + toAdd = 0; } - case BMK_iterMode: - { - unsigned i, j; - clockStart = UTIL_getTime(); - if(initFn != NULL) { (*initFn)(initPayload); } - for(i = 0; i < iter; i++) { - for(j = 0; j < blockCount; j++) { - size_t res = (*benchFn)(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); - if(ZSTD_isError(res)) { - EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", - j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); - } else if(toAdd) { - dstSize += res; - } - } - toAdd = 0; - } - totalTime = UTIL_clockSpanNano(clockStart); - if(!totalTime) { - EXM_THROW_ND(3, BMK_customReturn_t, "Cycle count (%u) too short to measure \n", iter); - } else { - fastest = totalTime / iter; - } - break; - } - default: - EXM_THROW_ND(4, BMK_customReturn_t, "Unknown Mode \n"); + totalTime = UTIL_clockSpanNano(clockStart); } + retval.error = 0; - retval.result.nanoSecPerRun = fastest; + retval.result.nanoSecPerRun = totalTime / iter; retval.result.sumOfReturn = dstSize; return retval; } -BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, - const size_t* fileSizes, unsigned nbFiles, - const int cLevel, const ZSTD_compressionParameters* comprParams, - const void* dictBuffer, size_t dictBufferSize, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, - int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) +/* benchMem with no allocation */ +static BMK_return_t BMK_benchMemAdvancedNoAlloc( + const void ** const srcPtrs, size_t* const srcSizes, + void** const cPtrs, size_t* const cSizes, + void** const resPtrs, size_t* const resSizes, + void* resultBuffer, void* compressedBuffer, + const size_t maxCompressedSize, + const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) { size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; - U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; - - /* these are the blockTable parameters, just split up */ - const void ** const srcPtrs = (const void ** const)malloc(maxNbBlocks * sizeof(void*)); - size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - - void ** const cPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); - size_t* const cSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - - void ** const resPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); - size_t* const resSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); - - const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ - void* compressedBuffer = malloc(maxCompressedSize); - void* resultBuffer = malloc(srcSize); - BMK_return_t results; - size_t const loadedCompressedSize = srcSize; size_t cSize = 0; double ratio = 0.; U32 nbBlocks; - /* checks */ - if (!compressedBuffer || !resultBuffer || - !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes) - EXM_THROW(31, BMK_return_t, "allocation error : not enough memory"); - if(!ctx || !dctx) EXM_THROW(31, BMK_return_t, "error: passed in null context"); @@ -451,10 +373,15 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, srcPtr += fileSizes[fileNb]; } { size_t const decodedSize = (size_t)totalDSize64; - if (totalDSize64 > decodedSize) EXM_THROW(32, BMK_return_t, "original size is too large"); /* size_t overflow */ free(resultBuffer); resultBuffer = malloc(decodedSize); - if (!resultBuffer) EXM_THROW(33, BMK_return_t, "not enough memory"); + if (!resultBuffer) { + EXM_THROW(33, BMK_return_t, "not enough memory"); + } + if (totalDSize64 > decodedSize) { + free(resultBuffer); + EXM_THROW(32, BMK_return_t, "original size is too large"); /* size_t overflow */ + } cSize = srcSize; srcSize = decodedSize; ratio = (double)srcSize / (double)cSize; @@ -504,6 +431,9 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, if (adv->mode != BMK_decodeOnly) { BMK_initCCtxArgs cctxprep; BMK_customReturn_t compressionResults; + int completed = 0; + U64 totalLoops = 0, totalTime = 0, fastest = (U64)(-1LL); + UTIL_time_t coolTime = UTIL_getTime(); cctxprep.ctx = ctx; cctxprep.dictBuffer = dictBuffer; cctxprep.dictBufferSize = dictBufferSize; @@ -512,63 +442,161 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, cctxprep.adv = adv; /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); - compressionResults = BMK_benchFunction( - &local_defaultCompress, (void*)(ctx), - &local_initCCtx, (void*)&cctxprep, - nbBlocks, - srcPtrs, srcSizes, cPtrs, cSizes, - adv->loopMode, adv->nbSeconds); + if(adv->loopMode == BMK_timeMode) { + U64 maxTime = adv->nbSeconds * TIMELOOP_NANOSEC; + unsigned nbLoops = 1; + while(!completed) { + /* Overheat protection */ + if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { + DEBUGOUTPUT("\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + coolTime = UTIL_getTime(); + } - if(compressionResults.error) { - results.error = compressionResults.error; - return results; + compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, nbLoops); + if(compressionResults.error) { + results.error = compressionResults.error; + return results; + } + + { U64 loopDuration = compressionResults.result.nanoSecPerRun * nbLoops; + totalLoops += nbLoops; + totalTime += loopDuration; + if (loopDuration > 0) { // nanoSec / run + fastest = MIN(fastest, compressionResults.result.nanoSecPerRun); + nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; + } else { + assert(nbLoops < 40000000); /* avoid overflow */ + nbLoops *= 2; + } + completed = (totalTime >= maxTime); + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = (((double)srcSize * totalLoops) / totalTime) * 1000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + results.result.cSpeed = compressionSpeed * 1000000; + results.result.cSize = compressionResults.result.sumOfReturn; + ratio = (double)srcSize / results.result.cSize; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed); + } + } + } + } else { + compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); + if(compressionResults.error) { + results.error = compressionResults.error; + return results; + } + if(compressionResults.result.nanoSecPerRun == 0) { + results.result.cSpeed = 0; + } else { + results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + } + results.result.cSize = compressionResults.result.sumOfReturn; + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = results.result.cSpeed / 1000000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + ratio = (double)srcSize / results.result.cSize; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed); + } } - results.result.cSize = compressionResults.result.sumOfReturn; - ratio = (double)srcSize / (double)results.result.cSize; - markNb = (markNb+1) % NB_MARKS; - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = ((double)srcSize / compressionResults.result.nanoSecPerRun) * 1000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - results.result.cSpeed = compressionSpeed * 1000000; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed); - } } /* if (adv->mode != BMK_decodeOnly) */ if(adv->mode != BMK_compressOnly) { BMK_initDCtxArgs dctxprep; BMK_customReturn_t decompressionResults; - + U64 totalLoops = 0, totalTime = 0, fastest = (U64)(-1LL); + int completed = 0; + UTIL_time_t coolTime = UTIL_getTime(); dctxprep.dctx = dctx; dctxprep.dictBuffer = dictBuffer; dctxprep.dictBufferSize = dictBufferSize; - decompressionResults = BMK_benchFunction( - &local_defaultDecompress, (void*)(dctx), - &local_initDCtx, (void*)&dctxprep, - nbBlocks, - (const void * const *)cPtrs, cSizes, resPtrs, resSizes, - adv->loopMode, adv->nbSeconds); + if(adv->loopMode == BMK_timeMode) { + U64 maxTime = adv->nbSeconds * TIMELOOP_NANOSEC; + unsigned nbLoops = 1; + while(!completed) { + /* Overheat protection */ + if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { + DEBUGOUTPUT("\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + coolTime = UTIL_getTime(); + } + + decompressionResults = BMK_benchFunction( + &local_defaultDecompress, (void*)(dctx), + &local_initDCtx, (void*)&dctxprep, nbBlocks, + (const void * const *)cPtrs, cSizes, resPtrs, resSizes, + nbLoops); - if(decompressionResults.error) { - results.error = decompressionResults.error; - return results; - } + if(decompressionResults.error) { + results.error = decompressionResults.error; + return results; + } - markNb = (markNb+1) % NB_MARKS; - { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = results.result.cSpeed / 1000000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000; - results.result.dSpeed = decompressionSpeed * 1000000; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed, - decompressionSpeed); + { U64 loopDuration = decompressionResults.result.nanoSecPerRun * nbLoops; + totalLoops += nbLoops; + totalTime += loopDuration; + if (loopDuration > 0) { + fastest = MIN(fastest, loopDuration / nbLoops); + nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; + } else { + assert(nbLoops < 40000000); /* avoid overflow */ + nbLoops *= 2; + } + completed = (totalTime >= maxTime); + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = results.result.cSpeed / 1000000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + double const decompressionSpeed = ((double)srcSize * totalLoops / totalTime) * 1000; + results.result.dSpeed = decompressionSpeed * 1000000; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed, + decompressionSpeed); + } + } + } + } else { + decompressionResults = BMK_benchFunction( + &local_defaultDecompress, (void*)(dctx), + &local_initDCtx, (void*)&dctxprep, nbBlocks, + (const void * const *)cPtrs, cSizes, resPtrs, resSizes, + adv->nbSeconds); + if(decompressionResults.error) { + results.error = decompressionResults.error; + return results; + } + if(decompressionResults.result.nanoSecPerRun == 0) { + results.result.dSpeed = 0; + } else { + results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + } + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = results.result.cSpeed / 1000000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000; + results.result.dSpeed = decompressionSpeed * 1000000; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed, + decompressionSpeed); + } } } @@ -622,7 +650,43 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, } DISPLAYLEVEL(2, "%2i#\n", cLevel); } /* Bench */ + return results; +} +BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) + +{ + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; + U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; + + /* these are the blockTable parameters, just split up */ + const void ** const srcPtrs = (const void ** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); + + void ** const cPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const cSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); + + void ** const resPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const resSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); + + const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ + void* compressedBuffer = malloc(maxCompressedSize); + void* resultBuffer = malloc(srcSize); + + BMK_return_t results; + int allocationincomplete = !compressedBuffer || !resultBuffer || + !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes; + if (!allocationincomplete) { + results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cSizes, + resPtrs, resSizes, resultBuffer, compressedBuffer, maxCompressedSize, + srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, + dictBuffer, dictBufferSize, ctx, dctx, displayLevel, displayName, adv); + } /* clean up */ free(compressedBuffer); free(resultBuffer); @@ -634,6 +698,9 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, free(resPtrs); free(resSizes); + if(allocationincomplete) { + EXM_THROW(31, BMK_return_t, "allocation error : not enough memory"); + } results.error = 0; return results; } @@ -767,31 +834,43 @@ static int BMK_loadFiles(void* buffer, size_t bufferSize, return 0; } -static BMK_return_t BMK_benchFileTable(const char* const * const fileNamesTable, unsigned const nbFiles, +BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, unsigned const nbFiles, const char* const dictFileName, int const cLevel, - const ZSTD_compressionParameters* const compressionParams, int displayLevel, - const BMK_advancedParams_t * const adv) + const ZSTD_compressionParameters* const compressionParams, + int displayLevel, const BMK_advancedParams_t * const adv) { void* srcBuffer; size_t benchedSize; void* dictBuffer = NULL; size_t dictBufferSize = 0; - size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t)); + size_t* fileSizes; BMK_return_t res; U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); - if (!fileSizes) EXM_THROW(12, BMK_return_t, "not enough memory for fileSizes"); + if(!nbFiles) { + EXM_THROW(14, BMK_return_t, "No Files to Benchmark"); + } + if (cLevel > ZSTD_maxCLevel()) { + EXM_THROW(15, BMK_return_t, "Invalid Compression Level"); + } + + fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t)); + if (!fileSizes) EXM_THROW(12, BMK_return_t, "not enough memory for fileSizes"); /* Load dictionary */ if (dictFileName != NULL) { U64 const dictFileSize = UTIL_getFileSize(dictFileName); - if (dictFileSize > 64 MB) + if (dictFileSize > 64 MB) { + free(fileSizes); EXM_THROW(10, BMK_return_t, "dictionary file %s too large", dictFileName); + } dictBufferSize = (size_t)dictFileSize; dictBuffer = malloc(dictBufferSize); - if (dictBuffer==NULL) + if (dictBuffer==NULL) { + free(fileSizes); EXM_THROW(11, BMK_return_t, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); + } { int errorCode = BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1, displayLevel); if(errorCode) { @@ -807,7 +886,11 @@ static BMK_return_t BMK_benchFileTable(const char* const * const fileNamesTable, if (benchedSize < totalSizeToLoad) DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); srcBuffer = malloc(benchedSize); - if (!srcBuffer) EXM_THROW(12, BMK_return_t, "not enough memory"); + if (!srcBuffer) { + free(dictBuffer); + free(fileSizes); + EXM_THROW(12, BMK_return_t, "not enough memory"); + } /* Load input buffer */ { @@ -839,15 +922,21 @@ static BMK_return_t BMK_benchFileTable(const char* const * const fileNamesTable, } -static BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, +BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, const ZSTD_compressionParameters* compressionParams, int displayLevel, const BMK_advancedParams_t * const adv) { char name[20] = {0}; size_t benchedSize = 10000000; - void* const srcBuffer = malloc(benchedSize); + void* srcBuffer; BMK_return_t res; + + if (cLevel > ZSTD_maxCLevel()) { + EXM_THROW(15, BMK_return_t, "Invalid Compression Level"); + } + /* Memory allocation */ + srcBuffer = malloc(benchedSize); if (!srcBuffer) EXM_THROW(21, BMK_return_t, "not enough memory"); /* Fill input buffer */ @@ -867,28 +956,9 @@ static BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, return res; } - -BMK_return_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, - int cLevel, const ZSTD_compressionParameters* compressionParams, - int displayLevel, const BMK_advancedParams_t * const adv) -{ - double const compressibility = (double)g_compressibilityDefault / 100; - - if (cLevel > ZSTD_maxCLevel()) { - EXM_THROW(15, BMK_return_t, "Invalid Compression Level"); - } - if (nbFiles == 0) { - return BMK_syntheticTest(cLevel, compressibility, compressionParams, displayLevel, adv); - } - else { - return BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, adv); - } -} - -BMK_return_t BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, - int cLevel, const ZSTD_compressionParameters* compressionParams, +BMK_return_t BMK_benchFiles(const char* const * const fileNamesTable, unsigned const nbFiles, + const char* const dictFileName, + int const cLevel, const ZSTD_compressionParameters* const compressionParams, int displayLevel) { const BMK_advancedParams_t adv = BMK_initAdvancedParams(); return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv); diff --git a/programs/bench.h b/programs/bench.h index d553e7b4f..5f3a55285 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -40,7 +40,8 @@ typedef struct { size_t sumOfReturn; /* sum of return values */ U64 nanoSecPerRun; /* time per iteration */ } BMK_customResult_t; - +//we might need a nbRuns or nbSecs if we're keeping timeMode / iterMode respectively. +//give benchMem responsibility to incrementally update display. ERROR_STRUCT(BMK_result_t, BMK_return_t); ERROR_STRUCT(BMK_customResult_t, BMK_customReturn_t); @@ -78,7 +79,7 @@ BMK_advancedParams_t BMK_initAdvancedParams(void); /* Loads files in fileNamesTable into memory, as well as a dictionary * from dictFileName, and then uses benchMem */ /* fileNamesTable - name of files to benchmark - * nbFiles - number of files (size of fileNamesTable) + * nbFiles - number of files (size of fileNamesTable), must be > 0 * dictFileName - name of dictionary file to load * cLevel - compression level to benchmark, errors if invalid * compressionParams - basic compression Parameters @@ -91,19 +92,37 @@ BMK_advancedParams_t BMK_initAdvancedParams(void); * return * .error will give a nonzero error value if an error has occured * .result - if .error = 0, .result will return the time taken to compression speed - * (.cSpeed), decompression speed (.dSpeed), and copmressed size (.cSize) of the original + * (.cSpeed), decompression speed (.dSpeed), and compressed size (.cSize) of the original * file */ -BMK_return_t BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, - int cLevel, const ZSTD_compressionParameters* compressionParams, +BMK_return_t BMK_benchFiles(const char* const * const fileNamesTable, unsigned const nbFiles, + const char* const dictFileName, + int const cLevel, const ZSTD_compressionParameters* const compressionParams, int displayLevel); /* See benchFiles for normal parameter uses and return, see advancedParams_t for adv */ -BMK_return_t BMK_benchFilesAdvanced(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, - int cLevel, const ZSTD_compressionParameters* compressionParams, +BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, unsigned const nbFiles, + const char* const dictFileName, + int const cLevel, const ZSTD_compressionParameters* const compressionParams, int displayLevel, const BMK_advancedParams_t* const adv); +/* called in cli */ +/* Generates a sample with datagen with the compressibility argument*/ +/* cLevel - compression level to benchmark, errors if invalid + * compressibility - determines compressibility of sample + * compressionParams - basic compression Parameters + * displayLevel - see benchFiles + * adv - see advanced_Params_t + * return + * .error will give a nonzero error value if an error has occured + * .result - if .error = 0, .result will return the time taken to compression speed + * (.cSpeed), decompression speed (.dSpeed), and compressed size (.cSize) of the original + * file + */ +BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t * const adv); + /* basic benchmarking function, called in paramgrill * applies ZSTD_compress_generic() and ZSTD_decompress_generic() on data in srcBuffer * with specific compression parameters specified by other arguments using benchFunction @@ -150,8 +169,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, * srcSizes - an array of the sizes of above buffers * dstBuffers - an array of buffers to be written into by benchFn * dstCapacities - an array of the capacities of above buffers. - * mode - if 0, iter will be interpreted as the minimum number of seconds to run - * iter - see mode + * iter - defines number of times benchFn is run. * return * .error will give a nonzero value if ZSTD_isError() is nonzero for any of the return * of the calls to initFn and benchFn, or if benchFunction errors internally @@ -168,7 +186,7 @@ BMK_customReturn_t BMK_benchFunction( size_t blockCount, const void* const * const srcBuffers, const size_t* srcSizes, void* const * const dstBuffers, const size_t* dstCapacities, - unsigned mode, unsigned iter); + unsigned sec); #endif /* BENCH_H_121279284357 */ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 63be9ef61..a450ecac2 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -398,6 +398,7 @@ int main(int argCount, const char* argv[]) setRealTimePrio = 0, singleThread = 0, ultra=0; + double compressibility = 0.5; BMK_advancedParams_t adv = BMK_initAdvancedParams(); unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ size_t blockSize = 0; @@ -706,6 +707,19 @@ int main(int argCount, const char* argv[]) #endif main_pause=1; break; + + /* Select compressibility of synthetic sample */ + case 'P': + { U32 proba32 = 0; + while ((argument[1]>= '0') && (argument[1]<= '9')) { + proba32 *= 10; + proba32 += argument[1] - '0'; + argument++; + } + compressibility = (double)proba32 / 100; + } + break; + /* unknown command */ default : CLEAN_RETURN(badusage(programName)); } @@ -821,20 +835,25 @@ int main(int argCount, const char* argv[]) if (cLevelLast < cLevel) cLevelLast = cLevel; if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); - - if(separateFiles) { - unsigned i; - for(i = 0; i < filenameIdx; i++) { - DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]); - int c; - for(c = cLevel; c <= cLevelLast; c++) { - BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &adv); + if(filenameIdx) { + if(separateFiles) { + unsigned i; + for(i = 0; i < filenameIdx; i++) { + int c; + DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]); + for(c = cLevel; c <= cLevelLast; c++) { + BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &adv); + } } + } else { + for(; cLevel <= cLevelLast; cLevel++) { + BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv); + } } } else { for(; cLevel <= cLevelLast; cLevel++) { - BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv); - } + BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &adv); + } } #else diff --git a/tests/fullbench.c b/tests/fullbench.c index 91a1370df..0a18eb2d9 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -291,6 +291,8 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) void* buff2; const char* benchName; size_t (*benchFunction)(const void* src, size_t srcSize, void* dst, size_t dstSize, void* verifBuff); + BMK_customReturn_t r; + int errorcode = 0; /* Selection */ switch(benchNb) @@ -427,15 +429,13 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) /* benchmark loop */ { - BMK_customReturn_t r = BMK_benchFunction( - benchFunction, buff2, - NULL, NULL, - 1, &src, &srcSize, - (void * const * const)&dstBuff, &dstBuffSize, - BMK_timeMode, 1); + r = BMK_benchFunction(benchFunction, buff2, + NULL, NULL, 1, &src, &srcSize, + (void * const * const)&dstBuff, &dstBuffSize, g_nbIterations); if(r.error) { DISPLAY("ERROR %d ! ! \n", r.error); - exit(1); + errorcode = r.error; + goto _cleanOut; } DISPLAY("%2u#Speed: %f MB/s - Size: %f MB - %s\n", benchNb, (double)srcSize / r.result.nanoSecPerRun * 1000, (double)r.result.sumOfReturn / 1000000, benchName); @@ -448,7 +448,7 @@ _cleanOut: ZSTD_freeDCtx(g_zdc); g_zdc=NULL; ZSTD_freeCStream(g_cstream); g_cstream=NULL; ZSTD_freeDStream(g_dstream); g_dstream=NULL; - return 0; + return errorcode; } From ab26f24c9c8ec8e6435c09d8c69b1a8faa19b852 Mon Sep 17 00:00:00 2001 From: George Lu Date: Thu, 21 Jun 2018 11:16:53 -0700 Subject: [PATCH 08/10] benchFunction Timed Wrappers Add BMK_benchFunctionTimed Add BMK_init_customResultCont.. Change benchMem to use benchFunctionTimed Minor Fixes/Adjustments --- programs/bench.c | 185 +++++++++++++++++++++++++---------------------- programs/bench.h | 34 ++++++++- 2 files changed, 133 insertions(+), 86 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 447f9feb9..9aa486f8d 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -333,6 +333,70 @@ BMK_customReturn_t BMK_benchFunction( retval.result.nanoSecPerRun = totalTime / iter; retval.result.sumOfReturn = dstSize; return retval; +} + +BMK_customResultContinuation_t BMK_init_customResultContinuation(unsigned iter) { + BMK_customResultContinuation_t c; + c.completed = 0; + c.state.nbLoops = 1; + c.state.coolTime = UTIL_getTime(); + c.state.timeRemaining = (U64)iter * TIMELOOP_NANOSEC; + c.intermediateResult.error = 0; + c.intermediateResult.result.nanoSecPerRun = (U64)(-1LL); + c.intermediateResult.result.sumOfReturn = 0; + return c; +} + +#define MINUSABLETIME 500000000ULL + +//how to use minusabletime? +//only report times which are > minUsable +void BMK_benchFunctionTimed( + size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, + size_t (*initFn)(void*), void* initPayload, + size_t blockCount, + const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, + void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, + BMK_customResultContinuation_t* cont) +{ + U64 fastest = cont->intermediateResult.result.nanoSecPerRun; + int completed = 0; + + while(!cont->completed && !completed) + { + /* Overheat protection */ + if (UTIL_clockSpanMicro(cont->state.coolTime) > ACTIVEPERIOD_MICROSEC) { + DEBUGOUTPUT("\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + cont->state.coolTime = UTIL_getTime(); + } + + cont->intermediateResult = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload, + blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, cont->state.nbLoops); + if(cont->intermediateResult.error) { /* completed w/ error */ + cont->completed = 1; + return; + } + + { U64 const loopDuration = cont->intermediateResult.result.nanoSecPerRun * cont->state.nbLoops; + cont->completed = (cont->state.timeRemaining <= loopDuration); + cont->state.timeRemaining -= loopDuration; + if (loopDuration > 0) { + fastest = MIN(fastest, cont->intermediateResult.result.nanoSecPerRun); + cont->intermediateResult.result.nanoSecPerRun = fastest; + cont->state.nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; + } else { + const unsigned multiplier = 2; + assert(cont->state.nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ + cont->state.nbLoops *= multiplier; + } + if(loopDuration < MINUSABLETIME) { /* don't report results which have time too low */ + continue; + } + + } + completed = 1; + } } /* benchMem with no allocation */ @@ -350,7 +414,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) { - size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ BMK_return_t results; size_t const loadedCompressedSize = srcSize; size_t cSize = 0; @@ -428,12 +492,9 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( U32 markNb = 0; DISPLAYLEVEL(2, "\r%79s\r", ""); + if (adv->mode != BMK_decodeOnly) { BMK_initCCtxArgs cctxprep; - BMK_customReturn_t compressionResults; - int completed = 0; - U64 totalLoops = 0, totalTime = 0, fastest = (U64)(-1LL); - UTIL_time_t coolTime = UTIL_getTime(); cctxprep.ctx = ctx; cctxprep.dictBuffer = dictBuffer; cctxprep.dictBufferSize = dictBufferSize; @@ -443,51 +504,31 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); if(adv->loopMode == BMK_timeMode) { - U64 maxTime = adv->nbSeconds * TIMELOOP_NANOSEC; - unsigned nbLoops = 1; - while(!completed) { - /* Overheat protection */ - if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { - DEBUGOUTPUT("\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - coolTime = UTIL_getTime(); - } - - compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, nbLoops); - if(compressionResults.error) { - results.error = compressionResults.error; + BMK_customResultContinuation_t cont = BMK_init_customResultContinuation(adv->nbSeconds); + while(!cont.completed) { + BMK_benchFunctionTimed(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, &cont); + if(cont.intermediateResult.error) { + results.error = cont.intermediateResult.error; return results; } - - { U64 loopDuration = compressionResults.result.nanoSecPerRun * nbLoops; - totalLoops += nbLoops; - totalTime += loopDuration; - if (loopDuration > 0) { // nanoSec / run - fastest = MIN(fastest, compressionResults.result.nanoSecPerRun); - nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; - } else { - assert(nbLoops < 40000000); /* avoid overflow */ - nbLoops *= 2; - } - completed = (totalTime >= maxTime); - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = (((double)srcSize * totalLoops) / totalTime) * 1000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - results.result.cSpeed = compressionSpeed * 1000000; - results.result.cSize = compressionResults.result.sumOfReturn; - ratio = (double)srcSize / results.result.cSize; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed); - } - } + ratio = (double)(srcSize / cont.intermediateResult.result.sumOfReturn); + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = ((double)srcSize / cont.intermediateResult.result.nanoSecPerRun) * 1000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + results.result.cSpeed = compressionSpeed * 1000000; + results.result.cSize = cont.intermediateResult.result.sumOfReturn; + ratio = (double)srcSize / results.result.cSize; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed); + } } } else { - compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, + BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); if(compressionResults.error) { results.error = compressionResults.error; @@ -517,49 +558,23 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( if(adv->mode != BMK_compressOnly) { BMK_initDCtxArgs dctxprep; BMK_customReturn_t decompressionResults; - U64 totalLoops = 0, totalTime = 0, fastest = (U64)(-1LL); - int completed = 0; - UTIL_time_t coolTime = UTIL_getTime(); dctxprep.dctx = dctx; dctxprep.dictBuffer = dictBuffer; dctxprep.dictBufferSize = dictBufferSize; if(adv->loopMode == BMK_timeMode) { - U64 maxTime = adv->nbSeconds * TIMELOOP_NANOSEC; - unsigned nbLoops = 1; - while(!completed) { - /* Overheat protection */ - if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { - DEBUGOUTPUT("\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - coolTime = UTIL_getTime(); - } - - decompressionResults = BMK_benchFunction( - &local_defaultDecompress, (void*)(dctx), - &local_initDCtx, (void*)&dctxprep, nbBlocks, - (const void * const *)cPtrs, cSizes, resPtrs, resSizes, - nbLoops); - - if(decompressionResults.error) { - results.error = decompressionResults.error; + BMK_customResultContinuation_t cont = BMK_init_customResultContinuation(adv->nbSeconds); + while(!cont.completed) { + BMK_benchFunctionTimed(&local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, + nbBlocks, (const void * const *)cPtrs, cSizes, resPtrs, resSizes, &cont); + if(cont.intermediateResult.error) { + results.error = cont.intermediateResult.error; return results; } - - { U64 loopDuration = decompressionResults.result.nanoSecPerRun * nbLoops; - totalLoops += nbLoops; - totalTime += loopDuration; - if (loopDuration > 0) { - fastest = MIN(fastest, loopDuration / nbLoops); - nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; - } else { - assert(nbLoops < 40000000); /* avoid overflow */ - nbLoops *= 2; - } - completed = (totalTime >= maxTime); - { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; double const compressionSpeed = results.result.cSpeed / 1000000; int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize * totalLoops / totalTime) * 1000; + double const decompressionSpeed = ((double)srcSize / cont.intermediateResult.result.nanoSecPerRun) * 1000; results.result.dSpeed = decompressionSpeed * 1000000; markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", @@ -567,8 +582,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( ratioAccuracy, ratio, cSpeedAccuracy, compressionSpeed, decompressionSpeed); - } - } + } } } else { decompressionResults = BMK_benchFunction( @@ -643,10 +657,11 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ double const cSpeed = results.result.cSpeed / 1000000; double const dSpeed = results.result.dSpeed / 1000000; - if (adv->additionalParam) + if (adv->additionalParam) { DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam); - else + } else { DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); + } } DISPLAYLEVEL(2, "%2i#\n", cLevel); } /* Bench */ diff --git a/programs/bench.h b/programs/bench.h index 5f3a55285..17cc0d0f5 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -161,7 +161,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, /* This function times the execution of 2 argument functions, benchFn and initFn */ /* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) - * is run a variable number of times, specified by mode and iter args + * is run iter times * initFn - (*initFn)(initPayload) is run once per benchmark at the beginning. This argument can * be NULL, in which case nothing is run. * blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstCapacities) @@ -188,6 +188,38 @@ BMK_customReturn_t BMK_benchFunction( void* const * const dstBuffers, const size_t* dstCapacities, unsigned sec); +typedef struct { + unsigned nbLoops; + U64 timeRemaining; + UTIL_time_t coolTime; +} BMK_timeState_t; + +typedef struct { + int completed; + BMK_customReturn_t intermediateResult; /* since the wrapper can't err, don't need ERROR_STRUCT(cRC, just check here) */ + BMK_timeState_t state; +} BMK_customResultContinuation_t; + +/* + * initializes the last argument of benchFunctionTimed, with iter being the number of seconds to bench (see below) + */ +BMK_customResultContinuation_t BMK_init_customResultContinuation(unsigned iter); + +/* + * Benchmarks custom functions like BMK_benchFunction(), but runs for iter seconds rather than a fixed number of iterations + * arguments mostly the same other than BMK_benchFunction() + * Usage - benchFunctionTimed will return in approximately one second, where the intermediate results can be found in + * the *cont passed in and be displayed/used as wanted. Keep calling BMK_benchFunctionTimed() until cont->completed = 1 + * to continue updating intermediate result. + */ +void BMK_benchFunctionTimed( + size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, + size_t (*initFn)(void*), void* initPayload, + size_t blockCount, + const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, + void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, + BMK_customResultContinuation_t* cont); + #endif /* BENCH_H_121279284357 */ #if defined (__cplusplus) From d6121ad0e1037de73817b9c8568f5dbbe0c22925 Mon Sep 17 00:00:00 2001 From: George Lu Date: Fri, 22 Jun 2018 17:25:16 -0700 Subject: [PATCH 09/10] Opaque State And minor fixups (comments/alignment/checks/fix memory leak) --- programs/bench.c | 207 +++++++++++++++++++++++++-------------------- programs/bench.h | 130 ++++++++++++++-------------- programs/zstdcli.c | 9 +- tests/fullbench.c | 2 +- 4 files changed, 182 insertions(+), 166 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 9aa486f8d..b2b5dc3b2 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -42,6 +42,7 @@ #include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" #include "bench.h" +#include "zstd_errors.h" /* ************************************* @@ -108,13 +109,13 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; } /* error without displaying */ -#define EXM_THROW_ND(errorNum, retType, ...) { \ +#define EXM_THROW_ND(errorNum, retType, ...) { \ retType r; \ memset(&r, 0, sizeof(retType)); \ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ - DEBUGOUTPUT("Error %i : ", errorNum); \ - DEBUGOUTPUT(__VA_ARGS__); \ - DEBUGOUTPUT(" \n"); \ + DEBUGOUTPUT("Error %i : ", errorNum); \ + DEBUGOUTPUT(__VA_ARGS__); \ + DEBUGOUTPUT(" \n"); \ r.error = errorNum; \ return r; \ } @@ -123,8 +124,6 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; * Benchmark Parameters ***************************************/ -#define BMK_LDM_PARAM_NOTSET 9999 - BMK_advancedParams_t BMK_initAdvancedParams(void) { BMK_advancedParams_t res = { BMK_both, /* mode */ @@ -137,8 +136,8 @@ BMK_advancedParams_t BMK_initAdvancedParams(void) { 0, /* ldmFlag */ 0, /* ldmMinMatch */ 0, /* ldmHashLog */ - BMK_LDM_PARAM_NOTSET, /* ldmBuckSizeLog */ - BMK_LDM_PARAM_NOTSET /* ldmHashEveryLog */ + 0, /* ldmBuckSizeLog */ + 0 /* ldmHashEveryLog */ }; return res; } @@ -157,6 +156,13 @@ typedef struct { size_t resSize; } blockParam_t; +struct BMK_timeState_t{ + unsigned nbLoops; + U64 timeRemaining; + UTIL_time_t coolTime; + U64 fastestTime; +}; + #undef MIN #undef MAX #define MIN(a,b) ((a) < (b) ? (a) : (b)) @@ -174,12 +180,8 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx, ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, adv->ldmFlag); ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, adv->ldmMinMatch); ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, adv->ldmHashLog); - if (adv->ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) { - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, adv->ldmBucketSizeLog); - } - if (adv->ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) { - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, adv->ldmHashEveryLog); - } + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, adv->ldmBucketSizeLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, adv->ldmHashEveryLog); ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog); ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog); ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog); @@ -239,6 +241,9 @@ static size_t local_defaultCompress( out.size = dstSize; out.pos = 0; while (moreToFlush) { + if(out.pos == out.size) { + return (size_t)-ZSTD_error_dstSize_tooSmall; + } moreToFlush = ZSTD_compress_generic(ctx, &out, &in, ZSTD_e_end); if (ZSTD_isError(moreToFlush)) { return moreToFlush; @@ -263,6 +268,9 @@ static size_t local_defaultDecompress( out.size = dstSize; out.pos = 0; while (moreToFlush) { + if(out.pos == out.size) { + return (size_t)-ZSTD_error_dstSize_tooSmall; + } moreToFlush = ZSTD_decompress_generic(dctx, &out, &in); if (ZSTD_isError(moreToFlush)) { @@ -276,17 +284,16 @@ static size_t local_defaultDecompress( /* initFn will be measured once, bench fn will be measured x times */ /* benchFn should return error value or out Size */ /* takes # of blocks and list of size & stuff for each. */ -/* only does iterations*/ -/* note time/iter could be zero if interval too short */ +/* only does looping */ +/* note time per loop could be zero if interval too short */ BMK_customReturn_t BMK_benchFunction( - size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, - size_t (*initFn)(void*), void* initPayload, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, size_t blockCount, const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, - unsigned iter) { + unsigned nbLoops) { size_t srcSize = 0, dstSize = 0, ind = 0; - unsigned toAdd = 1; U64 totalTime; BMK_customReturn_t retval; @@ -302,7 +309,7 @@ BMK_customReturn_t BMK_benchFunction( UTIL_waitForNextTick(); } - if(!iter) { + if(!nbLoops) { EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); } @@ -310,85 +317,93 @@ BMK_customReturn_t BMK_benchFunction( srcSize += srcBlockSizes[ind]; } - { - unsigned i, j; + { + unsigned i, j, firstIter = 1; clockStart = UTIL_getTime(); - if(initFn != NULL) { (*initFn)(initPayload); } - for(i = 0; i < iter; i++) { + if(initFn != NULL) { initFn(initPayload); } + for(i = 0; i < nbLoops; i++) { for(j = 0; j < blockCount; j++) { - size_t res = (*benchFn)(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); + size_t res = benchFn(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); if(ZSTD_isError(res)) { EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); - } else if(toAdd) { + } else if(firstIter) { dstSize += res; } } - toAdd = 0; + firstIter = 0; } totalTime = UTIL_clockSpanNano(clockStart); } retval.error = 0; - retval.result.nanoSecPerRun = totalTime / iter; + retval.result.nanoSecPerRun = totalTime / nbLoops; retval.result.sumOfReturn = dstSize; return retval; } -BMK_customResultContinuation_t BMK_init_customResultContinuation(unsigned iter) { - BMK_customResultContinuation_t c; - c.completed = 0; - c.state.nbLoops = 1; - c.state.coolTime = UTIL_getTime(); - c.state.timeRemaining = (U64)iter * TIMELOOP_NANOSEC; - c.intermediateResult.error = 0; - c.intermediateResult.result.nanoSecPerRun = (U64)(-1LL); - c.intermediateResult.result.sumOfReturn = 0; - return c; +#define MINUSABLETIME 500000000ULL /* 0.5 seconds in ns */ + +void BMK_resetTimeState(BMK_timedFnState_t* r, unsigned nbSeconds) { + r->nbLoops = 1; + r->timeRemaining = (U64)nbSeconds * TIMELOOP_NANOSEC; + r->coolTime = UTIL_getTime(); + r->fastestTime = (U64)(-1LL); } -#define MINUSABLETIME 500000000ULL +BMK_timedFnState_t* BMK_createTimeState(unsigned nbSeconds) { + BMK_timedFnState_t* r = (BMK_timedFnState_t*)malloc(sizeof(struct BMK_timeState_t)); + BMK_resetTimeState(r, nbSeconds); + return r; +} -//how to use minusabletime? -//only report times which are > minUsable -void BMK_benchFunctionTimed( - size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, - size_t (*initFn)(void*), void* initPayload, +void BMK_freeTimeState(BMK_timedFnState_t* state) { + free(state); +} + +BMK_customTimedReturn_t BMK_benchFunctionTimed( + BMK_timedFnState_t* cont, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, size_t blockCount, - const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, - void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, - BMK_customResultContinuation_t* cont) + const void* const* const srcBlockBuffers, const size_t* srcBlockSizes, + void* const* const dstBlockBuffers, const size_t* dstBlockCapacities) { - U64 fastest = cont->intermediateResult.result.nanoSecPerRun; + U64 fastest = cont->fastestTime; int completed = 0; + BMK_customTimedReturn_t r; + r.completed = 0; - while(!cont->completed && !completed) + while(!r.completed && !completed) { /* Overheat protection */ - if (UTIL_clockSpanMicro(cont->state.coolTime) > ACTIVEPERIOD_MICROSEC) { + if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { DEBUGOUTPUT("\rcooling down ... \r"); UTIL_sleep(COOLPERIOD_SEC); - cont->state.coolTime = UTIL_getTime(); + cont->coolTime = UTIL_getTime(); } - cont->intermediateResult = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload, - blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, cont->state.nbLoops); - if(cont->intermediateResult.error) { /* completed w/ error */ - cont->completed = 1; - return; + r.result = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload, + blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, cont->nbLoops); + if(r.result.error) { /* completed w/ error */ + r.completed = 1; + return r; } - { U64 const loopDuration = cont->intermediateResult.result.nanoSecPerRun * cont->state.nbLoops; - cont->completed = (cont->state.timeRemaining <= loopDuration); - cont->state.timeRemaining -= loopDuration; + { U64 const loopDuration = r.result.result.nanoSecPerRun * cont->nbLoops; + r.completed = (cont->timeRemaining <= loopDuration); + cont->timeRemaining -= loopDuration; if (loopDuration > 0) { - fastest = MIN(fastest, cont->intermediateResult.result.nanoSecPerRun); - cont->intermediateResult.result.nanoSecPerRun = fastest; - cont->state.nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; + if(loopDuration >= MINUSABLETIME) { /* don't report results which have time too low */ + fastest = MIN(fastest, r.result.result.nanoSecPerRun); + } + r.result.result.nanoSecPerRun = fastest; + cont->fastestTime = fastest; + cont->nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; } else { const unsigned multiplier = 2; - assert(cont->state.nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ - cont->state.nbLoops *= multiplier; + assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ + cont->nbLoops *= multiplier; } if(loopDuration < MINUSABLETIME) { /* don't report results which have time too low */ continue; @@ -397,6 +412,7 @@ void BMK_benchFunctionTimed( } completed = 1; } + return r; } /* benchMem with no allocation */ @@ -406,6 +422,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( void** const resPtrs, size_t* const resSizes, void* resultBuffer, void* compressedBuffer, const size_t maxCompressedSize, + BMK_timedFnState_t* timeState, const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, @@ -504,21 +521,22 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); if(adv->loopMode == BMK_timeMode) { - BMK_customResultContinuation_t cont = BMK_init_customResultContinuation(adv->nbSeconds); - while(!cont.completed) { - BMK_benchFunctionTimed(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, &cont); - if(cont.intermediateResult.error) { - results.error = cont.intermediateResult.error; + BMK_customTimedReturn_t intermediateResult; + intermediateResult.completed = 0; + while(!intermediateResult.completed) { + intermediateResult = BMK_benchFunctionTimed(timeState, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes); + if(intermediateResult.result.error) { + results.error = intermediateResult.result.error; return results; } - ratio = (double)(srcSize / cont.intermediateResult.result.sumOfReturn); + ratio = (double)(srcSize / intermediateResult.result.result.sumOfReturn); { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = ((double)srcSize / cont.intermediateResult.result.nanoSecPerRun) * 1000; + double const compressionSpeed = ((double)srcSize / intermediateResult.result.result.nanoSecPerRun) * 1000; int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; results.result.cSpeed = compressionSpeed * 1000000; - results.result.cSize = cont.intermediateResult.result.sumOfReturn; + results.result.cSize = intermediateResult.result.result.sumOfReturn; ratio = (double)srcSize / results.result.cSize; markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", @@ -562,19 +580,21 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( dctxprep.dictBuffer = dictBuffer; dctxprep.dictBufferSize = dictBufferSize; if(adv->loopMode == BMK_timeMode) { - BMK_customResultContinuation_t cont = BMK_init_customResultContinuation(adv->nbSeconds); - while(!cont.completed) { - BMK_benchFunctionTimed(&local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, - nbBlocks, (const void * const *)cPtrs, cSizes, resPtrs, resSizes, &cont); - if(cont.intermediateResult.error) { - results.error = cont.intermediateResult.error; + BMK_customTimedReturn_t intermediateResult; + intermediateResult.completed = 0; + BMK_resetTimeState(timeState, adv->nbSeconds); + while(!intermediateResult.completed) { + intermediateResult = BMK_benchFunctionTimed(timeState, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, + nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes); + if(intermediateResult.result.error) { + results.error = intermediateResult.result.error; return results; } { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; double const compressionSpeed = results.result.cSpeed / 1000000; int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / cont.intermediateResult.result.nanoSecPerRun) * 1000; + double const decompressionSpeed = ((double)srcSize / intermediateResult.result.result.nanoSecPerRun) * 1000; results.result.dSpeed = decompressionSpeed * 1000000; markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", @@ -588,7 +608,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( decompressionResults = BMK_benchFunction( &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, nbBlocks, - (const void * const *)cPtrs, cSizes, resPtrs, resSizes, + (const void* const*)cPtrs, cSizes, resPtrs, resSizes, adv->nbSeconds); if(decompressionResults.error) { results.error = decompressionResults.error; @@ -680,7 +700,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; /* these are the blockTable parameters, just split up */ - const void ** const srcPtrs = (const void ** const)malloc(maxNbBlocks * sizeof(void*)); + const void ** const srcPtrs = (const void** const)malloc(maxNbBlocks * sizeof(void*)); size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); void ** const cPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); @@ -692,17 +712,20 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* compressedBuffer = malloc(maxCompressedSize); void* resultBuffer = malloc(srcSize); - + BMK_timedFnState_t* timeState = BMK_createTimeState(adv->nbSeconds); + + BMK_return_t results; int allocationincomplete = !compressedBuffer || !resultBuffer || !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes; if (!allocationincomplete) { results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cSizes, - resPtrs, resSizes, resultBuffer, compressedBuffer, maxCompressedSize, + resPtrs, resSizes, resultBuffer, compressedBuffer, maxCompressedSize, timeState, srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, dictBuffer, dictBufferSize, ctx, dctx, displayLevel, displayName, adv); } /* clean up */ + BMK_freeTimeState(timeState); free(compressedBuffer); free(resultBuffer); @@ -741,7 +764,7 @@ static BMK_return_t BMK_benchMemCtxless(const void* srcBuffer, size_t srcSize, int cLevel, const ZSTD_compressionParameters* const comprParams, const void* dictBuffer, size_t dictBufferSize, int displayLevel, const char* displayName, - const BMK_advancedParams_t * const adv) + const BMK_advancedParams_t* const adv) { BMK_return_t res; ZSTD_CCtx* ctx = ZSTD_createCCtx(); @@ -854,11 +877,11 @@ BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, un const ZSTD_compressionParameters* const compressionParams, int displayLevel, const BMK_advancedParams_t * const adv) { - void* srcBuffer; + void* srcBuffer = NULL; size_t benchedSize; void* dictBuffer = NULL; size_t dictBufferSize = 0; - size_t* fileSizes; + size_t* fileSizes = NULL; BMK_return_t res; U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); @@ -890,7 +913,7 @@ BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, un int errorCode = BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1, displayLevel); if(errorCode) { res.error = errorCode; - return res; + goto _cleanUp; } } } @@ -912,7 +935,7 @@ BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, un int errorCode = BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles, displayLevel); if(errorCode) { res.error = errorCode; - return res; + goto _cleanUp; } } /* Bench */ @@ -929,7 +952,7 @@ BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, un adv); } } - /* clean up */ +_cleanUp: free(srcBuffer); free(dictBuffer); free(fileSizes); @@ -966,7 +989,7 @@ BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, displayLevel, name, adv); /* clean up */ - free((void*)srcBuffer); + free(srcBuffer); return res; } diff --git a/programs/bench.h b/programs/bench.h index 17cc0d0f5..87cf56380 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -26,8 +26,8 @@ extern "C" { * in .error != 0. */ #define ERROR_STRUCT(baseType, typeName) typedef struct { \ - int error; \ baseType result; \ + int error; \ } typeName typedef struct { @@ -36,44 +36,7 @@ typedef struct { double dSpeed; } BMK_result_t; -typedef struct { - size_t sumOfReturn; /* sum of return values */ - U64 nanoSecPerRun; /* time per iteration */ -} BMK_customResult_t; -//we might need a nbRuns or nbSecs if we're keeping timeMode / iterMode respectively. -//give benchMem responsibility to incrementally update display. - ERROR_STRUCT(BMK_result_t, BMK_return_t); -ERROR_STRUCT(BMK_customResult_t, BMK_customReturn_t); - -typedef enum { - BMK_timeMode = 0, - BMK_iterMode = 1 -} BMK_loopMode_t; - -typedef enum { - BMK_both = 0, - BMK_decodeOnly = 1, - BMK_compressOnly = 2 -} BMK_mode_t; - -typedef struct { - BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */ - BMK_loopMode_t loopMode; /* if loopmode, then nbSeconds = nbLoops */ - unsigned nbSeconds; /* default timing is in nbSeconds */ - size_t blockSize; /* Maximum allowable size of a block*/ - unsigned nbWorkers; /* multithreading */ - unsigned realTime; /* real time priority */ - int additionalParam; /* used by python speed benchmark */ - unsigned ldmFlag; /* enables long distance matching */ - unsigned ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md for meaning */ - unsigned ldmHashLog; - unsigned ldmBucketSizeLog; - unsigned ldmHashEveryLog; -} BMK_advancedParams_t; - -/* returns default parameters used by nonAdvanced functions */ -BMK_advancedParams_t BMK_initAdvancedParams(void); /* called in cli */ /* Loads files in fileNamesTable into memory, as well as a dictionary @@ -100,6 +63,35 @@ BMK_return_t BMK_benchFiles(const char* const * const fileNamesTable, unsigned c int const cLevel, const ZSTD_compressionParameters* const compressionParams, int displayLevel); +typedef enum { + BMK_timeMode = 0, + BMK_iterMode = 1 +} BMK_loopMode_t; + +typedef enum { + BMK_both = 0, + BMK_decodeOnly = 1, + BMK_compressOnly = 2 +} BMK_mode_t; + +typedef struct { + BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */ + BMK_loopMode_t loopMode; /* if loopmode, then nbSeconds = nbLoops */ + unsigned nbSeconds; /* default timing is in nbSeconds */ + size_t blockSize; /* Maximum allowable size of a block*/ + unsigned nbWorkers; /* multithreading */ + unsigned realTime; /* real time priority */ + int additionalParam; /* used by python speed benchmark */ + unsigned ldmFlag; /* enables long distance matching */ + unsigned ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md for meaning */ + unsigned ldmHashLog; + unsigned ldmBucketSizeLog; + unsigned ldmHashEveryLog; +} BMK_advancedParams_t; + +/* returns default parameters used by nonAdvanced functions */ +BMK_advancedParams_t BMK_initAdvancedParams(void); + /* See benchFiles for normal parameter uses and return, see advancedParams_t for adv */ BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, unsigned const nbFiles, const char* const dictFileName, @@ -158,10 +150,20 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, int displayLevel, const char* displayName, const BMK_advancedParams_t* adv); +typedef struct { + size_t sumOfReturn; /* sum of return values */ + U64 nanoSecPerRun; /* time per iteration */ +} BMK_customResult_t; + +ERROR_STRUCT(BMK_customResult_t, BMK_customReturn_t); + +typedef size_t (*BMK_benchFn_t)(const void*, size_t, void*, size_t, void*); +typedef size_t (*BMK_initFn_t)(void*); + /* This function times the execution of 2 argument functions, benchFn and initFn */ /* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) - * is run iter times + * is run nbLoops times * initFn - (*initFn)(initPayload) is run once per benchmark at the beginning. This argument can * be NULL, in which case nothing is run. * blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstCapacities) @@ -169,7 +171,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, * srcSizes - an array of the sizes of above buffers * dstBuffers - an array of buffers to be written into by benchFn * dstCapacities - an array of the capacities of above buffers. - * iter - defines number of times benchFn is run. + * nbLoops - defines number of times benchFn is run. * return * .error will give a nonzero value if ZSTD_isError() is nonzero for any of the return * of the calls to initFn and benchFn, or if benchFunction errors internally @@ -181,44 +183,40 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, * dstBuffer. */ BMK_customReturn_t BMK_benchFunction( - size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, - size_t (*initFn)(void*), void* initPayload, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, size_t blockCount, const void* const * const srcBuffers, const size_t* srcSizes, void* const * const dstBuffers, const size_t* dstCapacities, - unsigned sec); - -typedef struct { - unsigned nbLoops; - U64 timeRemaining; - UTIL_time_t coolTime; -} BMK_timeState_t; + unsigned nbLoops); + + +/* state information needed to advance computation for benchFunctionTimed */ +typedef struct BMK_timeState_t BMK_timedFnState_t; +/* initializes timeState object with desired number of seconds */ +BMK_timedFnState_t* BMK_createTimeState(unsigned nbSeconds); +/* resets existing timeState object */ +void BMK_resetTimeState(BMK_timedFnState_t*, unsigned nbSeconds); +/* deletes timeState object */ +void BMK_freeTimeState(BMK_timedFnState_t* state); typedef struct { + BMK_customReturn_t result; int completed; - BMK_customReturn_t intermediateResult; /* since the wrapper can't err, don't need ERROR_STRUCT(cRC, just check here) */ - BMK_timeState_t state; -} BMK_customResultContinuation_t; +} BMK_customTimedReturn_t; /* - * initializes the last argument of benchFunctionTimed, with iter being the number of seconds to bench (see below) - */ -BMK_customResultContinuation_t BMK_init_customResultContinuation(unsigned iter); - -/* - * Benchmarks custom functions like BMK_benchFunction(), but runs for iter seconds rather than a fixed number of iterations + * Benchmarks custom functions like BMK_benchFunction(), but runs for nbSeconds seconds rather than a fixed number of loops * arguments mostly the same other than BMK_benchFunction() - * Usage - benchFunctionTimed will return in approximately one second, where the intermediate results can be found in - * the *cont passed in and be displayed/used as wanted. Keep calling BMK_benchFunctionTimed() until cont->completed = 1 - * to continue updating intermediate result. + * Usage - benchFunctionTimed will return in approximately one second. Keep calling BMK_benchFunctionTimed() until the return's completed field = 1. + * to continue updating intermediate result. Intermediate return values are returned by the function. */ -void BMK_benchFunctionTimed( - size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload, - size_t (*initFn)(void*), void* initPayload, +BMK_customTimedReturn_t BMK_benchFunctionTimed(BMK_timedFnState_t* cont, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, size_t blockCount, const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, - void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, - BMK_customResultContinuation_t* cont); + void* const * const dstBlockBuffers, const size_t* dstBlockCapacities); #endif /* BENCH_H_121279284357 */ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index a450ecac2..b3e0c0f63 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -710,13 +710,8 @@ int main(int argCount, const char* argv[]) /* Select compressibility of synthetic sample */ case 'P': - { U32 proba32 = 0; - while ((argument[1]>= '0') && (argument[1]<= '9')) { - proba32 *= 10; - proba32 += argument[1] - '0'; - argument++; - } - compressibility = (double)proba32 / 100; + { argument++; + compressibility = (double)readU32FromChar(&argument) / 100; } break; diff --git a/tests/fullbench.c b/tests/fullbench.c index 0a18eb2d9..b548a33f3 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -290,7 +290,7 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) size_t const dstBuffSize = ZSTD_compressBound(srcSize); void* buff2; const char* benchName; - size_t (*benchFunction)(const void* src, size_t srcSize, void* dst, size_t dstSize, void* verifBuff); + BMK_benchFn_t benchFunction; BMK_customReturn_t r; int errorcode = 0; From 50d612f4f0005c3ba793eb4f374e9f34a8c4601b Mon Sep 17 00:00:00 2001 From: George Lu Date: Mon, 25 Jun 2018 15:01:03 -0700 Subject: [PATCH 10/10] Interleave compression/decompression Fix Bugs --- programs/bench.c | 221 ++++++++++++++++++++++++----------------------- 1 file changed, 114 insertions(+), 107 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index b2b5dc3b2..a54168c42 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -394,12 +394,12 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed( r.completed = (cont->timeRemaining <= loopDuration); cont->timeRemaining -= loopDuration; if (loopDuration > 0) { - if(loopDuration >= MINUSABLETIME) { /* don't report results which have time too low */ - fastest = MIN(fastest, r.result.result.nanoSecPerRun); + fastest = MIN(fastest, r.result.result.nanoSecPerRun); + if(loopDuration >= MINUSABLETIME) { + r.result.result.nanoSecPerRun = fastest; + cont->fastestTime = fastest; } - r.result.result.nanoSecPerRun = fastest; - cont->fastestTime = fastest; - cont->nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1; + cont->nbLoops = (U32)(TIMELOOP_NANOSEC / r.result.result.nanoSecPerRun) + 1; } else { const unsigned multiplier = 2; assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ @@ -422,7 +422,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( void** const resPtrs, size_t* const resSizes, void* resultBuffer, void* compressedBuffer, const size_t maxCompressedSize, - BMK_timedFnState_t* timeState, + BMK_timedFnState_t* timeStateCompress, BMK_timedFnState_t* timeStateDecompress, const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, @@ -509,34 +509,98 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( U32 markNb = 0; DISPLAYLEVEL(2, "\r%79s\r", ""); - - if (adv->mode != BMK_decodeOnly) { + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); + { BMK_initCCtxArgs cctxprep; + BMK_initDCtxArgs dctxprep; cctxprep.ctx = ctx; cctxprep.dictBuffer = dictBuffer; cctxprep.dictBufferSize = dictBufferSize; cctxprep.cLevel = cLevel; cctxprep.comprParams = comprParams; cctxprep.adv = adv; - /* Compression */ - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); + dctxprep.dctx = dctx; + dctxprep.dictBuffer = dictBuffer; + dctxprep.dictBufferSize = dictBufferSize; if(adv->loopMode == BMK_timeMode) { - BMK_customTimedReturn_t intermediateResult; - intermediateResult.completed = 0; - while(!intermediateResult.completed) { - intermediateResult = BMK_benchFunctionTimed(timeState, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes); - if(intermediateResult.result.error) { - results.error = intermediateResult.result.error; + BMK_customTimedReturn_t intermediateResultCompress; + BMK_customTimedReturn_t intermediateResultDecompress; + if(adv->mode == BMK_compressOnly) { + intermediateResultCompress.completed = 0; + intermediateResultDecompress.completed = 1; + } else if (adv->mode == BMK_decodeOnly) { + intermediateResultCompress.completed = 1; + intermediateResultDecompress.completed = 0; + } else { /* both */ + intermediateResultCompress.completed = 0; + intermediateResultDecompress.completed = 0; + } + while(!(intermediateResultCompress.completed && intermediateResultDecompress.completed)) { + if(!intermediateResultCompress.completed) { + intermediateResultCompress = BMK_benchFunctionTimed(timeStateCompress, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes); + if(intermediateResultCompress.result.error) { + results.error = intermediateResultCompress.result.error; + return results; + } + ratio = (double)(srcSize / intermediateResultCompress.result.result.sumOfReturn); + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = ((double)srcSize / intermediateResultCompress.result.result.nanoSecPerRun) * 1000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + results.result.cSpeed = compressionSpeed * 1000000; + results.result.cSize = intermediateResultCompress.result.result.sumOfReturn; + ratio = (double)srcSize / results.result.cSize; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed); + } + } + + if(!intermediateResultDecompress.completed) { + intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, + nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes); + if(intermediateResultDecompress.result.error) { + results.error = intermediateResultDecompress.result.error; + return results; + } + + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = results.result.cSpeed / 1000000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + double const decompressionSpeed = ((double)srcSize / intermediateResultDecompress.result.result.nanoSecPerRun) * 1000; + results.result.dSpeed = decompressionSpeed * 1000000; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed, + decompressionSpeed); + } + } + } + + } else { + if(adv->mode != BMK_decodeOnly) { + BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); + if(compressionResults.error) { + results.error = compressionResults.error; return results; } - ratio = (double)(srcSize / intermediateResult.result.result.sumOfReturn); + if(compressionResults.result.nanoSecPerRun == 0) { + results.result.cSpeed = 0; + } else { + results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + } + results.result.cSize = compressionResults.result.sumOfReturn; { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = ((double)srcSize / intermediateResult.result.result.nanoSecPerRun) * 1000; + double const compressionSpeed = results.result.cSpeed / 1000000; int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - results.result.cSpeed = compressionSpeed * 1000000; - results.result.cSize = intermediateResult.result.result.sumOfReturn; ratio = (double)srcSize / results.result.cSize; markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", @@ -545,91 +609,33 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( cSpeedAccuracy, compressionSpeed); } } - } else { - BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, - nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); - if(compressionResults.error) { - results.error = compressionResults.error; - return results; - } - if(compressionResults.result.nanoSecPerRun == 0) { - results.result.cSpeed = 0; - } else { - results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; - } - results.result.cSize = compressionResults.result.sumOfReturn; - { - int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = results.result.cSpeed / 1000000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - ratio = (double)srcSize / results.result.cSize; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed); - } - } - - } /* if (adv->mode != BMK_decodeOnly) */ - - if(adv->mode != BMK_compressOnly) { - BMK_initDCtxArgs dctxprep; - BMK_customReturn_t decompressionResults; - dctxprep.dctx = dctx; - dctxprep.dictBuffer = dictBuffer; - dctxprep.dictBufferSize = dictBufferSize; - if(adv->loopMode == BMK_timeMode) { - BMK_customTimedReturn_t intermediateResult; - intermediateResult.completed = 0; - BMK_resetTimeState(timeState, adv->nbSeconds); - while(!intermediateResult.completed) { - intermediateResult = BMK_benchFunctionTimed(timeState, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, - nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes); - if(intermediateResult.result.error) { - results.error = intermediateResult.result.error; + if(adv->mode != BMK_compressOnly) { + BMK_customReturn_t decompressionResults = BMK_benchFunction( + &local_defaultDecompress, (void*)(dctx), + &local_initDCtx, (void*)&dctxprep, nbBlocks, + (const void* const*)cPtrs, cSizes, resPtrs, resSizes, + adv->nbSeconds); + if(decompressionResults.error) { + results.error = decompressionResults.error; return results; } - - { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = results.result.cSpeed / 1000000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / intermediateResult.result.result.nanoSecPerRun) * 1000; - results.result.dSpeed = decompressionSpeed * 1000000; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed, - decompressionSpeed); + if(decompressionResults.result.nanoSecPerRun == 0) { + results.result.dSpeed = 0; + } else { + results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + } + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = results.result.cSpeed / 1000000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000; + results.result.dSpeed = decompressionSpeed * 1000000; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed, + decompressionSpeed); } - } - } else { - decompressionResults = BMK_benchFunction( - &local_defaultDecompress, (void*)(dctx), - &local_initDCtx, (void*)&dctxprep, nbBlocks, - (const void* const*)cPtrs, cSizes, resPtrs, resSizes, - adv->nbSeconds); - if(decompressionResults.error) { - results.error = decompressionResults.error; - return results; - } - if(decompressionResults.result.nanoSecPerRun == 0) { - results.result.dSpeed = 0; - } else { - results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; - } - { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = results.result.cSpeed / 1000000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000; - results.result.dSpeed = decompressionSpeed * 1000000; - markNb = (markNb+1) % NB_MARKS; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed, - decompressionSpeed); } } } @@ -712,20 +718,21 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* compressedBuffer = malloc(maxCompressedSize); void* resultBuffer = malloc(srcSize); - BMK_timedFnState_t* timeState = BMK_createTimeState(adv->nbSeconds); - + BMK_timedFnState_t* timeStateCompress = BMK_createTimeState(adv->nbSeconds); + BMK_timedFnState_t* timeStateDecompress = BMK_createTimeState(adv->nbSeconds); BMK_return_t results; int allocationincomplete = !compressedBuffer || !resultBuffer || !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes; if (!allocationincomplete) { results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cSizes, - resPtrs, resSizes, resultBuffer, compressedBuffer, maxCompressedSize, timeState, + resPtrs, resSizes, resultBuffer, compressedBuffer, maxCompressedSize, timeStateCompress, timeStateDecompress, srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, dictBuffer, dictBufferSize, ctx, dctx, displayLevel, displayName, adv); } /* clean up */ - BMK_freeTimeState(timeState); + BMK_freeTimeState(timeStateCompress); + BMK_freeTimeState(timeStateDecompress); free(compressedBuffer); free(resultBuffer);