diff --git a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj index e697318e0..6939d4406 100644 --- a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj +++ b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj @@ -167,11 +167,13 @@ + + diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj index 19faf92cb..d2276c33d 100644 --- a/build/VS2010/fullbench/fullbench.vcxproj +++ b/build/VS2010/fullbench/fullbench.vcxproj @@ -176,6 +176,7 @@ + @@ -197,6 +198,7 @@ + diff --git a/programs/bench.c b/programs/bench.c index 09697d1fe..a54168c42 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -42,6 +42,7 @@ #include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" #include "bench.h" +#include "zstd_errors.h" /* ************************************* @@ -64,9 +65,6 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); -static U32 g_compressibilityDefault = 50; - - /* ************************************* * console display ***************************************/ @@ -90,88 +88,60 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; # define DEBUG 0 #endif #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } -#define EXM_THROW(error, ...) { \ + +#define EXM_THROW_INT(errorNum, ...) { \ DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ - DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, "Error %i : ", errorNum); \ DISPLAYLEVEL(1, __VA_ARGS__); \ DISPLAYLEVEL(1, " \n"); \ - exit(error); \ + return errorNum; \ } +#define EXM_THROW(errorNum, retType, ...) { \ + retType r; \ + memset(&r, 0, sizeof(retType)); \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", errorNum); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + r.error = errorNum; \ + return r; \ +} + +/* error without displaying */ +#define EXM_THROW_ND(errorNum, retType, ...) { \ + retType r; \ + memset(&r, 0, sizeof(retType)); \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DEBUGOUTPUT("Error %i : ", errorNum); \ + DEBUGOUTPUT(__VA_ARGS__); \ + DEBUGOUTPUT(" \n"); \ + r.error = errorNum; \ + return r; \ +} /* ************************************* * Benchmark Parameters ***************************************/ -static int g_additionalParam = 0; -static U32 g_decodeOnly = 0; -void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; } - - -//TODO : Deal with DISPLAYLEVEL for all these set functions - -static U32 g_nbSeconds = BMK_TIMETEST_DEFAULT_S; - -void BMK_setNbSeconds(unsigned nbSeconds) -{ - g_nbSeconds = nbSeconds; - DISPLAY("- test >= %u seconds per compression / decompression - \n", g_nbSeconds); +BMK_advancedParams_t BMK_initAdvancedParams(void) { + BMK_advancedParams_t res = { + BMK_both, /* mode */ + BMK_timeMode, /* loopMode */ + BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ + 0, /* blockSize */ + 0, /* nbWorkers */ + 0, /* realTime */ + 0, /* additionalParam */ + 0, /* ldmFlag */ + 0, /* ldmMinMatch */ + 0, /* ldmHashLog */ + 0, /* ldmBuckSizeLog */ + 0 /* ldmHashEveryLog */ + }; + return res; } -static size_t g_blockSize = 0; - -void BMK_setBlockSize(size_t blockSize) -{ - g_blockSize = blockSize; - if (g_blockSize) DISPLAY("using blocks of size %u KB \n", (U32)(blockSize>>10)); -} - -void BMK_setDecodeOnlyMode(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); } - -static U32 g_nbWorkers = 0; - -void BMK_setNbWorkers(unsigned nbWorkers) { -#ifndef ZSTD_MULTITHREAD - if (nbWorkers > 0) DISPLAY("Note : multi-threading is disabled \n"); -#endif - g_nbWorkers = nbWorkers; -} - -static U32 g_realTime = 0; -void BMK_setRealTime(unsigned priority) { - g_realTime = (priority>0); -} - -static U32 g_separateFiles = 0; -void BMK_setSeparateFiles(unsigned separate) { - g_separateFiles = (separate>0); -} - -static U32 g_ldmFlag = 0; -void BMK_setLdmFlag(unsigned ldmFlag) { - g_ldmFlag = ldmFlag; -} - -static U32 g_ldmMinMatch = 0; -void BMK_setLdmMinMatch(unsigned ldmMinMatch) { - g_ldmMinMatch = ldmMinMatch; -} - -static U32 g_ldmHashLog = 0; -void BMK_setLdmHashLog(unsigned ldmHashLog) { - g_ldmHashLog = ldmHashLog; -} - -#define BMK_LDM_PARAM_NOTSET 9999 -static U32 g_ldmBucketSizeLog = BMK_LDM_PARAM_NOTSET; -void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) { - g_ldmBucketSizeLog = ldmBucketSizeLog; -} - -static U32 g_ldmHashEveryLog = BMK_LDM_PARAM_NOTSET; -void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) { - g_ldmHashEveryLog = ldmHashEveryLog; -} /* ******************************************************** * Bench functions @@ -186,344 +156,638 @@ typedef struct { size_t resSize; } blockParam_t; +struct BMK_timeState_t{ + unsigned nbLoops; + U64 timeRemaining; + UTIL_time_t coolTime; + U64 fastestTime; +}; + #undef MIN #undef MAX #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) -BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, - const size_t* fileSizes, unsigned nbFiles, - const int cLevel, const ZSTD_compressionParameters* comprParams, - const void* dictBuffer, size_t dictBufferSize, - ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, - int displayLevel, const char* displayName) +static void BMK_initCCtx(ZSTD_CCtx* ctx, + const void* dictBuffer, size_t dictBufferSize, int cLevel, + const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) { + if (adv->nbWorkers==1) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, 0); + } else { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, adv->nbWorkers); + } + ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, adv->ldmFlag); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, adv->ldmMinMatch); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, adv->ldmHashLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, adv->ldmBucketSizeLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, adv->ldmHashEveryLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy); + ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize); +} + +static void BMK_initDCtx(ZSTD_DCtx* dctx, + const void* dictBuffer, size_t dictBufferSize) { + ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize); +} + +typedef struct { + ZSTD_CCtx* ctx; + const void* dictBuffer; + size_t dictBufferSize; + int cLevel; + const ZSTD_compressionParameters* comprParams; + const BMK_advancedParams_t* adv; +} BMK_initCCtxArgs; + +static size_t local_initCCtx(void* payload) { + BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload; + BMK_initCCtx(ag->ctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv); + return 0; +} + +typedef struct { + ZSTD_DCtx* dctx; + const void* dictBuffer; + size_t dictBufferSize; +} BMK_initDCtxArgs; + +static size_t local_initDCtx(void* payload) { + BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload; + BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize); + return 0; +} + +/* additional argument is just the context */ +static size_t local_defaultCompress( + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) { + size_t moreToFlush = 1; + ZSTD_CCtx* ctx = (ZSTD_CCtx*)addArgs; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + in.src = srcBuffer; + in.size = srcSize; + in.pos = 0; + out.dst = dstBuffer; + out.size = dstSize; + out.pos = 0; + while (moreToFlush) { + if(out.pos == out.size) { + return (size_t)-ZSTD_error_dstSize_tooSmall; + } + moreToFlush = ZSTD_compress_generic(ctx, &out, &in, ZSTD_e_end); + if (ZSTD_isError(moreToFlush)) { + return moreToFlush; + } + } + return out.pos; +} + +/* additional argument is just the context */ +static size_t local_defaultDecompress( + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) { + size_t moreToFlush = 1; + ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + in.src = srcBuffer; + in.size = srcSize; + in.pos = 0; + out.dst = dstBuffer; + out.size = dstSize; + out.pos = 0; + while (moreToFlush) { + if(out.pos == out.size) { + return (size_t)-ZSTD_error_dstSize_tooSmall; + } + moreToFlush = ZSTD_decompress_generic(dctx, + &out, &in); + if (ZSTD_isError(moreToFlush)) { + return moreToFlush; + } + } + return out.pos; + +} + +/* initFn will be measured once, bench fn will be measured x times */ +/* benchFn should return error value or out Size */ +/* takes # of blocks and list of size & stuff for each. */ +/* only does looping */ +/* note time per loop could be zero if interval too short */ +BMK_customReturn_t BMK_benchFunction( + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + size_t blockCount, + const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, + void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, + unsigned nbLoops) { + size_t srcSize = 0, dstSize = 0, ind = 0; + U64 totalTime; + + BMK_customReturn_t retval; + UTIL_time_t clockStart; + + { + unsigned i; + for(i = 0; i < blockCount; i++) { + memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ + } + + UTIL_sleepMilli(5); /* give processor time to other processes */ + UTIL_waitForNextTick(); + } + + if(!nbLoops) { + EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); + } + + for(ind = 0; ind < blockCount; ind++) { + srcSize += srcBlockSizes[ind]; + } + + { + unsigned i, j, firstIter = 1; + clockStart = UTIL_getTime(); + if(initFn != NULL) { initFn(initPayload); } + for(i = 0; i < nbLoops; i++) { + for(j = 0; j < blockCount; j++) { + size_t res = benchFn(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload); + if(ZSTD_isError(res)) { + EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", + j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res)); + } else if(firstIter) { + dstSize += res; + } + } + firstIter = 0; + } + totalTime = UTIL_clockSpanNano(clockStart); + } + + retval.error = 0; + retval.result.nanoSecPerRun = totalTime / nbLoops; + retval.result.sumOfReturn = dstSize; + return retval; +} + +#define MINUSABLETIME 500000000ULL /* 0.5 seconds in ns */ + +void BMK_resetTimeState(BMK_timedFnState_t* r, unsigned nbSeconds) { + r->nbLoops = 1; + r->timeRemaining = (U64)nbSeconds * TIMELOOP_NANOSEC; + r->coolTime = UTIL_getTime(); + r->fastestTime = (U64)(-1LL); +} + +BMK_timedFnState_t* BMK_createTimeState(unsigned nbSeconds) { + BMK_timedFnState_t* r = (BMK_timedFnState_t*)malloc(sizeof(struct BMK_timeState_t)); + BMK_resetTimeState(r, nbSeconds); + return r; +} + +void BMK_freeTimeState(BMK_timedFnState_t* state) { + free(state); +} + +BMK_customTimedReturn_t BMK_benchFunctionTimed( + BMK_timedFnState_t* cont, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + size_t blockCount, + const void* const* const srcBlockBuffers, const size_t* srcBlockSizes, + void* const* const dstBlockBuffers, const size_t* dstBlockCapacities) { - size_t const blockSize = ((g_blockSize>=32 && !g_decodeOnly) ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; - U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; - blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); - size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ - void* const compressedBuffer = malloc(maxCompressedSize); - void* resultBuffer = malloc(srcSize); - BMK_return_t results; + U64 fastest = cont->fastestTime; + int completed = 0; + BMK_customTimedReturn_t r; + r.completed = 0; + while(!r.completed && !completed) + { + /* Overheat protection */ + if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { + DEBUGOUTPUT("\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + cont->coolTime = UTIL_getTime(); + } + + r.result = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload, + blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, cont->nbLoops); + if(r.result.error) { /* completed w/ error */ + r.completed = 1; + return r; + } + + { U64 const loopDuration = r.result.result.nanoSecPerRun * cont->nbLoops; + r.completed = (cont->timeRemaining <= loopDuration); + cont->timeRemaining -= loopDuration; + if (loopDuration > 0) { + fastest = MIN(fastest, r.result.result.nanoSecPerRun); + if(loopDuration >= MINUSABLETIME) { + r.result.result.nanoSecPerRun = fastest; + cont->fastestTime = fastest; + } + cont->nbLoops = (U32)(TIMELOOP_NANOSEC / r.result.result.nanoSecPerRun) + 1; + } else { + const unsigned multiplier = 2; + assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ + cont->nbLoops *= multiplier; + } + if(loopDuration < MINUSABLETIME) { /* don't report results which have time too low */ + continue; + } + + } + completed = 1; + } + return r; +} + +/* benchMem with no allocation */ +static BMK_return_t BMK_benchMemAdvancedNoAlloc( + const void ** const srcPtrs, size_t* const srcSizes, + void** const cPtrs, size_t* const cSizes, + void** const resPtrs, size_t* const resSizes, + void* resultBuffer, void* compressedBuffer, + const size_t maxCompressedSize, + BMK_timedFnState_t* timeStateCompress, BMK_timedFnState_t* timeStateDecompress, + + const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) +{ + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ + BMK_return_t results; size_t const loadedCompressedSize = srcSize; size_t cSize = 0; double ratio = 0.; U32 nbBlocks; - /* checks */ - if (!compressedBuffer || !resultBuffer || !blockTable) - EXM_THROW(31, "allocation error : not enough memory"); - if(!ctx || !dctx) - EXM_THROW(31, "error: passed in null context"); + EXM_THROW(31, BMK_return_t, "error: passed in null context"); /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* display last 17 characters */ - if (g_nbWorkers==1) g_nbWorkers=0; /* prefer synchronous mode */ - - if (g_decodeOnly) { /* benchmark only decompression : source must be already compressed */ + if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */ const char* srcPtr = (const char*)srcBuffer; U64 totalDSize64 = 0; U32 fileNb; for (fileNb=0; fileNb decodedSize) EXM_THROW(32, "original size is too large"); /* size_t overflow */ free(resultBuffer); resultBuffer = malloc(decodedSize); - if (!resultBuffer) EXM_THROW(33, "not enough memory"); + if (!resultBuffer) { + EXM_THROW(33, BMK_return_t, "not enough memory"); + } + if (totalDSize64 > decodedSize) { + free(resultBuffer); + EXM_THROW(32, BMK_return_t, "original size is too large"); /* size_t overflow */ + } cSize = srcSize; srcSize = decodedSize; ratio = (double)srcSize / (double)cSize; - } } + } + } - /* Init blockTable data */ + /* Init data blocks */ { const char* srcPtr = (const char*)srcBuffer; char* cPtr = (char*)compressedBuffer; char* resPtr = (char*)resultBuffer; U32 fileNb; for (nbBlocks=0, fileNb=0; fileNbmode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize); U32 const blockEnd = nbBlocks + nbBlocksforThisFile; for ( ; nbBlocksmode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); + resPtrs[nbBlocks] = (void*)resPtr; + resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; srcPtr += thisBlockSize; - cPtr += blockTable[nbBlocks].cRoom; + cPtr += cSizes[nbBlocks]; resPtr += thisBlockSize; remaining -= thisBlockSize; - } } } + } + } + } /* warmimg up memory */ - if (g_decodeOnly) { + if (adv->mode == BMK_decodeOnly) { memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); } else { RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); } /* Bench */ - { U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL); - U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0); - UTIL_time_t coolTime; - U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 1; - U32 nbDecodeLoops = (U32)((100 MB) / (srcSize+1)) + 1; /* initial conservative speed estimate */ - U32 nbCompressionLoops = (U32)((2 MB) / (srcSize+1)) + 1; /* initial conservative speed estimate */ - U64 totalCTime=0, totalDTime=0; - U32 cCompleted=g_decodeOnly, dCompleted=0; + { + U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); # define NB_MARKS 4 const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; U32 markNb = 0; - - coolTime = UTIL_getTime(); DISPLAYLEVEL(2, "\r%79s\r", ""); - while (!cCompleted || !dCompleted) { - /* overheat protection */ - if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) { - DISPLAYLEVEL(2, "\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - coolTime = UTIL_getTime(); - } - - if (!g_decodeOnly) { - /* Compression */ - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); - if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ - - UTIL_sleepMilli(5); /* give processor time to other processes */ - UTIL_waitForNextTick(); - - if (!cCompleted) { /* still some time to do compression tests */ - U32 nbLoops = 0; - UTIL_time_t const clockStart = UTIL_getTime(); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, g_nbWorkers); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog); - if (g_ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) { - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog); - } - if (g_ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) { - ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog); - } - ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength); - ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy); - ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize); - - if (!g_nbSeconds) nbCompressionLoops=1; - for (nbLoops=0; nbLoops 0) { - if (loopDuration < fastestC * nbCompressionLoops) - fastestC = loopDuration / nbCompressionLoops; - nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1; - } else { - assert(nbCompressionLoops < 40000000); /* avoid overflow */ - nbCompressionLoops *= 100; - } - totalCTime += loopDuration; - cCompleted = (totalCTime >= maxTime); /* end compression tests */ - } } - - cSize = 0; - { U32 blockNb; for (blockNb=0; blockNb%10u (%5.*f),%6.*f MB/s\r", - marks[markNb], displayName, (U32)srcSize, (U32)cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed ); + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); + { + BMK_initCCtxArgs cctxprep; + BMK_initDCtxArgs dctxprep; + cctxprep.ctx = ctx; + cctxprep.dictBuffer = dictBuffer; + cctxprep.dictBufferSize = dictBufferSize; + cctxprep.cLevel = cLevel; + cctxprep.comprParams = comprParams; + cctxprep.adv = adv; + dctxprep.dctx = dctx; + dctxprep.dictBuffer = dictBuffer; + dctxprep.dictBufferSize = dictBufferSize; + if(adv->loopMode == BMK_timeMode) { + BMK_customTimedReturn_t intermediateResultCompress; + BMK_customTimedReturn_t intermediateResultDecompress; + if(adv->mode == BMK_compressOnly) { + intermediateResultCompress.completed = 0; + intermediateResultDecompress.completed = 1; + } else if (adv->mode == BMK_decodeOnly) { + intermediateResultCompress.completed = 1; + intermediateResultDecompress.completed = 0; + } else { /* both */ + intermediateResultCompress.completed = 0; + intermediateResultDecompress.completed = 0; } - } /* if (!g_decodeOnly) */ - -#if 0 /* disable decompression test */ - dCompleted=1; - (void)totalDTime; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ -#else - /* Decompression */ - if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ - - UTIL_sleepMilli(5); /* give processor time to other processes */ - UTIL_waitForNextTick(); - - if (!dCompleted) { - U32 nbLoops = 0; - ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize); - UTIL_time_t const clockStart = UTIL_getTime(); - if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure"); - if (!g_nbSeconds) nbDecodeLoops = 1; - for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) { - U32 blockNb; - for (blockNb=0; blockNb%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed); } - blockTable[blockNb].resSize = regenSize; - } } - ZSTD_freeDDict(ddict); - { U64 const loopDuration = UTIL_clockSpanNano(clockStart); - if (loopDuration > 0) { - if (loopDuration < fastestD * nbDecodeLoops) - fastestD = loopDuration / nbDecodeLoops; - nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1; - } else { - assert(nbDecodeLoops < 40000000); /* avoid overflow */ - nbDecodeLoops *= 100; } - totalDTime += loopDuration; - dCompleted = (totalDTime >= maxTime); - } } - markNb = (markNb+1) % NB_MARKS; - { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = ((double)srcSize / fastestC) * 1000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / fastestD) * 1000; - results.result.cSpeed = compressionSpeed * 1000000; - results.result.dSpeed = decompressionSpeed * 1000000; - DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", - marks[markNb], displayName, (U32)srcSize, (U32)cSize, - ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed, - decompressionSpeed); - } - - /* CRC Checking */ - { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); - if (!g_decodeOnly && (crcOrig!=crcCheck)) { - size_t u; - DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); - for (u=0; u u) break; - bacc += blockTable[segNb].srcSize; - } - pos = (U32)(u - bacc); - bNb = pos / (128 KB); - DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos); - if (u>5) { - int n; - DISPLAY("origin: "); - for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); - DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); - for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); - DISPLAY(" \n"); - DISPLAY("decode: "); - for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); - DISPLAY(" :%02X: ", ((const BYTE*)resultBuffer)[u]); - for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); - DISPLAY(" \n"); - } - break; + if(!intermediateResultDecompress.completed) { + intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, + nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes); + if(intermediateResultDecompress.result.error) { + results.error = intermediateResultDecompress.result.error; + return results; } - if (u==srcSize-1) { /* should never happen */ - DISPLAY("no difference detected\n"); - } } - break; - } } /* CRC Checking */ -#endif - } /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */ + + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = results.result.cSpeed / 1000000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + double const decompressionSpeed = ((double)srcSize / intermediateResultDecompress.result.result.nanoSecPerRun) * 1000; + results.result.dSpeed = decompressionSpeed * 1000000; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed, + decompressionSpeed); + } + } + } - if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ - double const cSpeed = ((double)srcSize / fastestC) * 1000; - double const dSpeed = ((double)srcSize / fastestD) * 1000; - if (g_additionalParam) - DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam); - else - DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); + } else { + if(adv->mode != BMK_decodeOnly) { + BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, + nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); + if(compressionResults.error) { + results.error = compressionResults.error; + return results; + } + if(compressionResults.result.nanoSecPerRun == 0) { + results.result.cSpeed = 0; + } else { + results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + } + results.result.cSize = compressionResults.result.sumOfReturn; + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = results.result.cSpeed / 1000000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + ratio = (double)srcSize / results.result.cSize; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed); + } + } + if(adv->mode != BMK_compressOnly) { + BMK_customReturn_t decompressionResults = BMK_benchFunction( + &local_defaultDecompress, (void*)(dctx), + &local_initDCtx, (void*)&dctxprep, nbBlocks, + (const void* const*)cPtrs, cSizes, resPtrs, resSizes, + adv->nbSeconds); + if(decompressionResults.error) { + results.error = decompressionResults.error; + return results; + } + if(decompressionResults.result.nanoSecPerRun == 0) { + results.result.dSpeed = 0; + } else { + results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + } + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + double const compressionSpeed = results.result.cSpeed / 1000000; + int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000; + results.result.dSpeed = decompressionSpeed * 1000000; + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", + marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, + ratioAccuracy, ratio, + cSpeedAccuracy, compressionSpeed, + decompressionSpeed); + } + } + } } - DISPLAYLEVEL(2, "%2i#\n", cLevel); - } /* Bench */ - /* clean up */ - free(blockTable); - free(compressedBuffer); - free(resultBuffer); - results.errorCode = 0; + /* CRC Checking */ + { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + /* adv->mode == 0 -> compress + decompress */ + if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) { + size_t u; + DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + for (u=0; u u) break; + bacc += srcSizes[segNb]; + } + pos = (U32)(u - bacc); + bNb = pos / (128 KB); + DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos); + if (u>5) { + int n; + DISPLAY("origin: "); + for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); + for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" \n"); + DISPLAY("decode: "); + for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); + DISPLAY(" :%02X: ", ((const BYTE*)resultBuffer)[u]); + for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); + DISPLAY(" \n"); + } + break; + } + if (u==srcSize-1) { /* should never happen */ + DISPLAY("no difference detected\n"); + } + } + } + } /* CRC Checking */ + + if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ + double const cSpeed = results.result.cSpeed / 1000000; + double const dSpeed = results.result.dSpeed / 1000000; + if (adv->additionalParam) { + DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam); + } else { + DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); + } + } + DISPLAYLEVEL(2, "%2i#\n", cLevel); + } /* Bench */ return results; } -static void BMK_benchMemCtxless(const void* srcBuffer, size_t srcSize, +BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) + +{ + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; + U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; + + /* these are the blockTable parameters, just split up */ + const void ** const srcPtrs = (const void** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); + + void ** const cPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const cSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); + + void ** const resPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); + size_t* const resSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); + + const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ + void* compressedBuffer = malloc(maxCompressedSize); + void* resultBuffer = malloc(srcSize); + BMK_timedFnState_t* timeStateCompress = BMK_createTimeState(adv->nbSeconds); + BMK_timedFnState_t* timeStateDecompress = BMK_createTimeState(adv->nbSeconds); + + BMK_return_t results; + int allocationincomplete = !compressedBuffer || !resultBuffer || + !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes; + if (!allocationincomplete) { + results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cSizes, + resPtrs, resSizes, resultBuffer, compressedBuffer, maxCompressedSize, timeStateCompress, timeStateDecompress, + srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, + dictBuffer, dictBufferSize, ctx, dctx, displayLevel, displayName, adv); + } + /* clean up */ + BMK_freeTimeState(timeStateCompress); + BMK_freeTimeState(timeStateDecompress); + free(compressedBuffer); + free(resultBuffer); + + free((void*)srcPtrs); + free(srcSizes); + free(cPtrs); + free(cSizes); + free(resPtrs); + free(resSizes); + + if(allocationincomplete) { + EXM_THROW(31, BMK_return_t, "allocation error : not enough memory"); + } + results.error = 0; + return results; +} + +BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName) { + + const BMK_advancedParams_t adv = BMK_initAdvancedParams(); + return BMK_benchMemAdvanced(srcBuffer, srcSize, + fileSizes, nbFiles, + cLevel, comprParams, + dictBuffer, dictBufferSize, + ctx, dctx, + displayLevel, displayName, &adv); +} + +static BMK_return_t BMK_benchMemCtxless(const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, int cLevel, const ZSTD_compressionParameters* const comprParams, const void* dictBuffer, size_t dictBufferSize, - int displayLevel, const char* displayName) + int displayLevel, const char* displayName, + const BMK_advancedParams_t* const adv) { + BMK_return_t res; ZSTD_CCtx* ctx = ZSTD_createCCtx(); ZSTD_DCtx* dctx = ZSTD_createDCtx(); if(ctx == NULL || dctx == NULL) { - EXM_THROW(12, "not enough memory for contexts"); + EXM_THROW(12, BMK_return_t, "not enough memory for contexts"); } - BMK_benchMem(srcBuffer, srcSize, + res = BMK_benchMemAdvanced(srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, dictBuffer, dictBufferSize, ctx, dctx, - displayLevel, displayName); + displayLevel, displayName, adv); ZSTD_freeCCtx(ctx); ZSTD_freeDCtx(dctx); + return res; } static size_t BMK_findMaxMem(U64 requiredMem) @@ -544,44 +808,43 @@ static size_t BMK_findMaxMem(U64 requiredMem) return (size_t)(requiredMem); } -/* returns average stats over all range [cLevel, cLevelLast] */ -static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, +static BMK_return_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, const size_t* fileSizes, unsigned nbFiles, - const int cLevel, const int cLevelLast, const ZSTD_compressionParameters* comprParams, + const int cLevel, const ZSTD_compressionParameters* comprParams, const void* dictBuffer, size_t dictBufferSize, - int displayLevel, const char* displayName) + int displayLevel, const char* displayName, + BMK_advancedParams_t const * const adv) { - int l; + BMK_return_t res; const char* pch = strrchr(displayName, '\\'); /* Windows */ + if (!pch) pch = strrchr(displayName, '/'); /* Linux */ if (pch) displayName = pch+1; - if (g_realTime) { + if (adv->realTime) { DISPLAYLEVEL(2, "Note : switching to real-time priority \n"); SET_REALTIME_PRIORITY; } - if (displayLevel == 1 && !g_additionalParam) - DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); + if (displayLevel == 1 && !adv->additionalParam) + DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, adv->nbSeconds, (U32)(adv->blockSize>>10)); - for (l=cLevel; l <= cLevelLast; l++) { - if (l==0) continue; /* skip level 0 */ - BMK_benchMemCtxless(srcBuffer, benchedSize, - fileSizes, nbFiles, - l, comprParams, - dictBuffer, dictBufferSize, - displayLevel, displayName); - } + res = BMK_benchMemCtxless(srcBuffer, benchedSize, + fileSizes, nbFiles, + cLevel, comprParams, + dictBuffer, dictBufferSize, + displayLevel, displayName, + adv); - return; + return res; } /*! BMK_loadFiles() : * Loads `buffer` with content of files listed within `fileNamesTable`. * At most, fills `buffer` entirely. */ -static void BMK_loadFiles(void* buffer, size_t bufferSize, +static int BMK_loadFiles(void* buffer, size_t bufferSize, size_t* fileSizes, const char* const * const fileNamesTable, unsigned nbFiles, int displayLevel) { @@ -601,44 +864,65 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize, continue; } f = fopen(fileNamesTable[n], "rb"); - if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]); + if (f==NULL) EXM_THROW_INT(10, "impossible to open file %s", fileNamesTable[n]); DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]); if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); - if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); + if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]); pos += readSize; } fileSizes[n] = (size_t)fileSize; totalSize += (size_t)fileSize; fclose(f); } - if (totalSize == 0) EXM_THROW(12, "no data to bench"); + if (totalSize == 0) EXM_THROW_INT(12, "no data to bench"); + return 0; } -static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigned const nbFiles, - const char* const dictFileName, int const cLevel, int const cLevelLast, - const ZSTD_compressionParameters* const compressionParams, int displayLevel) +BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, unsigned const nbFiles, + const char* const dictFileName, int const cLevel, + const ZSTD_compressionParameters* const compressionParams, + int displayLevel, const BMK_advancedParams_t * const adv) { - void* srcBuffer; + void* srcBuffer = NULL; size_t benchedSize; void* dictBuffer = NULL; size_t dictBufferSize = 0; - size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); + size_t* fileSizes = NULL; + BMK_return_t res; U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); - if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes"); + if(!nbFiles) { + EXM_THROW(14, BMK_return_t, "No Files to Benchmark"); + } + if (cLevel > ZSTD_maxCLevel()) { + EXM_THROW(15, BMK_return_t, "Invalid Compression Level"); + } + + fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t)); + if (!fileSizes) EXM_THROW(12, BMK_return_t, "not enough memory for fileSizes"); /* Load dictionary */ if (dictFileName != NULL) { U64 const dictFileSize = UTIL_getFileSize(dictFileName); - if (dictFileSize > 64 MB) - EXM_THROW(10, "dictionary file %s too large", dictFileName); + if (dictFileSize > 64 MB) { + free(fileSizes); + EXM_THROW(10, BMK_return_t, "dictionary file %s too large", dictFileName); + } dictBufferSize = (size_t)dictFileSize; dictBuffer = malloc(dictBufferSize); - if (dictBuffer==NULL) - EXM_THROW(11, "not enough memory for dictionary (%u bytes)", + if (dictBuffer==NULL) { + free(fileSizes); + EXM_THROW(11, BMK_return_t, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); - BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1, displayLevel); + } + { + int errorCode = BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1, displayLevel); + if(errorCode) { + res.error = errorCode; + goto _cleanUp; + } + } } /* Memory allocation & restrictions */ @@ -647,96 +931,80 @@ static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigne if (benchedSize < totalSizeToLoad) DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); srcBuffer = malloc(benchedSize); - if (!srcBuffer) EXM_THROW(12, "not enough memory"); + if (!srcBuffer) { + free(dictBuffer); + free(fileSizes); + EXM_THROW(12, BMK_return_t, "not enough memory"); + } /* Load input buffer */ - BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles, displayLevel); - - /* Bench */ - if (g_separateFiles) { - const BYTE* srcPtr = (const BYTE*)srcBuffer; - U32 fileNb; - BMK_result_t* resultarray = (BMK_result_t*)malloc(sizeof(BMK_result_t) * nbFiles); - if(resultarray == NULL) EXM_THROW(12, "not enough memory"); - for (fileNb=0; fileNb 1) ? mfName : fileNamesTable[0]; - BMK_benchCLevel(srcBuffer, benchedSize, + { + const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; + res = BMK_benchCLevel(srcBuffer, benchedSize, fileSizes, nbFiles, - cLevel, cLevelLast, compressionParams, + cLevel, compressionParams, dictBuffer, dictBufferSize, - displayLevel, displayName); + displayLevel, displayName, + adv); } } - /* clean up */ +_cleanUp: free(srcBuffer); free(dictBuffer); free(fileSizes); + return res; } -static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, +BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, const ZSTD_compressionParameters* compressionParams, - int displayLevel) + int displayLevel, const BMK_advancedParams_t * const adv) { char name[20] = {0}; size_t benchedSize = 10000000; - void* const srcBuffer = malloc(benchedSize); + void* srcBuffer; + BMK_return_t res; + + if (cLevel > ZSTD_maxCLevel()) { + EXM_THROW(15, BMK_return_t, "Invalid Compression Level"); + } /* Memory allocation */ - if (!srcBuffer) EXM_THROW(21, "not enough memory"); + srcBuffer = malloc(benchedSize); + if (!srcBuffer) EXM_THROW(21, BMK_return_t, "not enough memory"); /* Fill input buffer */ RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); /* Bench */ snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - BMK_benchCLevel(srcBuffer, benchedSize, + res = BMK_benchCLevel(srcBuffer, benchedSize, &benchedSize, 1, - cLevel, cLevelLast, compressionParams, + cLevel, compressionParams, NULL, 0, - displayLevel, name); + displayLevel, name, adv); /* clean up */ free(srcBuffer); + + return res; } - -static void BMK_benchFilesFull(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, - int cLevel, int cLevelLast, - const ZSTD_compressionParameters* compressionParams, int displayLevel) -{ - double const compressibility = (double)g_compressibilityDefault / 100; - - if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); - if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); - if (cLevelLast < cLevel) cLevelLast = cLevel; - if (cLevelLast > cLevel) - DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); - - if (nbFiles == 0) - BMK_syntheticTest(cLevel, cLevelLast, compressibility, compressionParams, displayLevel); - else - BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, displayLevel); -} - -int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, - const char* dictFileName, - int cLevel, int cLevelLast, - const ZSTD_compressionParameters* compressionParams, +BMK_return_t BMK_benchFiles(const char* const * const fileNamesTable, unsigned const nbFiles, + const char* const dictFileName, + int const cLevel, const ZSTD_compressionParameters* const compressionParams, int displayLevel) { - BMK_benchFilesFull(fileNamesTable, nbFiles, dictFileName, cLevel, cLevelLast, compressionParams, displayLevel); - return 0; + const BMK_advancedParams_t adv = BMK_initAdvancedParams(); + return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv); } diff --git a/programs/bench.h b/programs/bench.h index 0ba6f8985..87cf56380 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -19,25 +19,121 @@ extern "C" { #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ #include "zstd.h" /* ZSTD_compressionParameters */ +/* Creates a struct of type typeName with an int type .error field + * and a .result field of some baseType. Functions with return + * typeName pass a successful result with .error = 0 and .result + * with the intended result, while returning an error will result + * in .error != 0. + */ +#define ERROR_STRUCT(baseType, typeName) typedef struct { \ + baseType result; \ + int error; \ +} typeName + typedef struct { size_t cSize; double cSpeed; /* bytes / sec */ double dSpeed; } BMK_result_t; -/* 0 = no Error */ -typedef struct { - int errorCode; - BMK_result_t result; -} BMK_return_t; +ERROR_STRUCT(BMK_result_t, BMK_return_t); /* called in cli */ -int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, const char* dictFileName, - int cLevel, int cLevelLast, const ZSTD_compressionParameters* compressionParams, +/* Loads files in fileNamesTable into memory, as well as a dictionary + * from dictFileName, and then uses benchMem */ +/* fileNamesTable - name of files to benchmark + * nbFiles - number of files (size of fileNamesTable), must be > 0 + * dictFileName - name of dictionary file to load + * cLevel - compression level to benchmark, errors if invalid + * compressionParams - basic compression Parameters + * displayLevel - what gets printed + * 0 : no display; + * 1 : errors; + * 2 : + result + interaction + warnings; + * 3 : + progression; + * 4 : + information + * return + * .error will give a nonzero error value if an error has occured + * .result - if .error = 0, .result will return the time taken to compression speed + * (.cSpeed), decompression speed (.dSpeed), and compressed size (.cSize) of the original + * file + */ +BMK_return_t BMK_benchFiles(const char* const * const fileNamesTable, unsigned const nbFiles, + const char* const dictFileName, + int const cLevel, const ZSTD_compressionParameters* const compressionParams, int displayLevel); -/* basic benchmarking function, called in paramgrill - * ctx, dctx must be valid */ +typedef enum { + BMK_timeMode = 0, + BMK_iterMode = 1 +} BMK_loopMode_t; + +typedef enum { + BMK_both = 0, + BMK_decodeOnly = 1, + BMK_compressOnly = 2 +} BMK_mode_t; + +typedef struct { + BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */ + BMK_loopMode_t loopMode; /* if loopmode, then nbSeconds = nbLoops */ + unsigned nbSeconds; /* default timing is in nbSeconds */ + size_t blockSize; /* Maximum allowable size of a block*/ + unsigned nbWorkers; /* multithreading */ + unsigned realTime; /* real time priority */ + int additionalParam; /* used by python speed benchmark */ + unsigned ldmFlag; /* enables long distance matching */ + unsigned ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md for meaning */ + unsigned ldmHashLog; + unsigned ldmBucketSizeLog; + unsigned ldmHashEveryLog; +} BMK_advancedParams_t; + +/* returns default parameters used by nonAdvanced functions */ +BMK_advancedParams_t BMK_initAdvancedParams(void); + +/* See benchFiles for normal parameter uses and return, see advancedParams_t for adv */ +BMK_return_t BMK_benchFilesAdvanced(const char* const * const fileNamesTable, unsigned const nbFiles, + const char* const dictFileName, + int const cLevel, const ZSTD_compressionParameters* const compressionParams, + int displayLevel, const BMK_advancedParams_t* const adv); + +/* called in cli */ +/* Generates a sample with datagen with the compressibility argument*/ +/* cLevel - compression level to benchmark, errors if invalid + * compressibility - determines compressibility of sample + * compressionParams - basic compression Parameters + * displayLevel - see benchFiles + * adv - see advanced_Params_t + * return + * .error will give a nonzero error value if an error has occured + * .result - if .error = 0, .result will return the time taken to compression speed + * (.cSpeed), decompression speed (.dSpeed), and compressed size (.cSize) of the original + * file + */ +BMK_return_t BMK_syntheticTest(int cLevel, double compressibility, + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t * const adv); + +/* basic benchmarking function, called in paramgrill + * applies ZSTD_compress_generic() and ZSTD_decompress_generic() on data in srcBuffer + * with specific compression parameters specified by other arguments using benchFunction + * (cLevel, comprParams + adv in advanced Mode) */ +/* srcBuffer - data source, expected to be valid compressed data if in Decode Only Mode + * srcSize - size of data in srcBuffer + * cLevel - compression level + * comprParams - basic compression parameters + * dictBuffer - a dictionary if used, null otherwise + * dictBufferSize - size of dictBuffer, 0 otherwise + * ctx - Compression Context (must be provided) + * dctx - Decompression Context (must be provided) + * diplayLevel - see BMK_benchFiles + * displayName - name used by display + * return + * .error will give a nonzero value if an error has occured + * .result - if .error = 0, will give the same results as benchFiles + * but for the data stored in srcBuffer + */ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, const size_t* fileSizes, unsigned nbFiles, const int cLevel, const ZSTD_compressionParameters* comprParams, @@ -45,20 +141,82 @@ BMK_return_t BMK_benchMem(const void* srcBuffer, size_t srcSize, ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, int displayLevel, const char* displayName); -/* Set Parameters */ -void BMK_setNbSeconds(unsigned nbLoops); -void BMK_setBlockSize(size_t blockSize); -void BMK_setNbWorkers(unsigned nbWorkers); -void BMK_setRealTime(unsigned priority); -void BMK_setNotificationLevel(unsigned level); -void BMK_setSeparateFiles(unsigned separate); -void BMK_setAdditionalParam(int additionalParam); -void BMK_setDecodeOnlyMode(unsigned decodeFlag); -void BMK_setLdmFlag(unsigned ldmFlag); -void BMK_setLdmMinMatch(unsigned ldmMinMatch); -void BMK_setLdmHashLog(unsigned ldmHashLog); -void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog); -void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog); +/* See benchMem for normal parameter uses and return, see advancedParams_t for adv */ +BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, + const BMK_advancedParams_t* adv); + +typedef struct { + size_t sumOfReturn; /* sum of return values */ + U64 nanoSecPerRun; /* time per iteration */ +} BMK_customResult_t; + +ERROR_STRUCT(BMK_customResult_t, BMK_customReturn_t); + +typedef size_t (*BMK_benchFn_t)(const void*, size_t, void*, size_t, void*); +typedef size_t (*BMK_initFn_t)(void*); + +/* This function times the execution of 2 argument functions, benchFn and initFn */ + +/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) + * is run nbLoops times + * initFn - (*initFn)(initPayload) is run once per benchmark at the beginning. This argument can + * be NULL, in which case nothing is run. + * blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstCapacities) + * srcBuffers - an array of buffers to be operated on by benchFn + * srcSizes - an array of the sizes of above buffers + * dstBuffers - an array of buffers to be written into by benchFn + * dstCapacities - an array of the capacities of above buffers. + * nbLoops - defines number of times benchFn is run. + * return + * .error will give a nonzero value if ZSTD_isError() is nonzero for any of the return + * of the calls to initFn and benchFn, or if benchFunction errors internally + * .result - if .error = 0, then .result will contain the sum of all return values of + * benchFn on the first iteration through all of the blocks (.sumOfReturn) and also + * the time per run of benchFn (.nanoSecPerRun). For the former, this + * is generally intended to be used on functions which return the # of bytes written + * into dstBuffer, hence this value will be the total amount of bytes written to + * dstBuffer. + */ +BMK_customReturn_t BMK_benchFunction( + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + size_t blockCount, + const void* const * const srcBuffers, const size_t* srcSizes, + void* const * const dstBuffers, const size_t* dstCapacities, + unsigned nbLoops); + + +/* state information needed to advance computation for benchFunctionTimed */ +typedef struct BMK_timeState_t BMK_timedFnState_t; +/* initializes timeState object with desired number of seconds */ +BMK_timedFnState_t* BMK_createTimeState(unsigned nbSeconds); +/* resets existing timeState object */ +void BMK_resetTimeState(BMK_timedFnState_t*, unsigned nbSeconds); +/* deletes timeState object */ +void BMK_freeTimeState(BMK_timedFnState_t* state); + +typedef struct { + BMK_customReturn_t result; + int completed; +} BMK_customTimedReturn_t; + +/* + * Benchmarks custom functions like BMK_benchFunction(), but runs for nbSeconds seconds rather than a fixed number of loops + * arguments mostly the same other than BMK_benchFunction() + * Usage - benchFunctionTimed will return in approximately one second. Keep calling BMK_benchFunctionTimed() until the return's completed field = 1. + * to continue updating intermediate result. Intermediate return values are returned by the function. + */ +BMK_customTimedReturn_t BMK_benchFunctionTimed(BMK_timedFnState_t* cont, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + size_t blockCount, + const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, + void* const * const dstBlockBuffers, const size_t* dstBlockCapacities); #endif /* BENCH_H_121279284357 */ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index cc3ec26c6..15e522dfb 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -32,7 +32,7 @@ #include /* errno */ #include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */ #ifndef ZSTD_NOBENCH -# include "bench.h" /* BMK_benchFiles, BMK_SetNbSeconds */ +# include "bench.h" /* BMK_benchFiles */ #endif #ifndef ZSTD_NODICT # include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */ @@ -398,6 +398,8 @@ int main(int argCount, const char* argv[]) setRealTimePrio = 0, singleThread = 0, ultra=0; + double compressibility = 0.5; + BMK_advancedParams_t adv = BMK_initAdvancedParams(); unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ size_t blockSize = 0; zstd_operation_mode operation = zom_compress; @@ -609,7 +611,7 @@ int main(int argCount, const char* argv[]) /* Decoding */ case 'd': #ifndef ZSTD_NOBENCH - BMK_setDecodeOnlyMode(1); + adv.mode = BMK_decodeOnly; if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */ #endif operation=zom_decompress; argument++; break; @@ -702,11 +704,19 @@ int main(int argCount, const char* argv[]) case 'p': argument++; #ifndef ZSTD_NOBENCH if ((*argument>='0') && (*argument<='9')) { - BMK_setAdditionalParam(readU32FromChar(&argument)); + adv.additionalParam = (int)readU32FromChar(&argument); } else #endif main_pause=1; break; + + /* Select compressibility of synthetic sample */ + case 'P': + { argument++; + compressibility = (double)readU32FromChar(&argument) / 100; + } + break; + /* unknown command */ default : CLEAN_RETURN(badusage(programName)); } @@ -807,21 +817,46 @@ int main(int argCount, const char* argv[]) /* Check if benchmark is selected */ if (operation==zom_bench) { #ifndef ZSTD_NOBENCH - BMK_setSeparateFiles(separateFiles); - BMK_setBlockSize(blockSize); - BMK_setNbWorkers(nbWorkers); - BMK_setRealTime(setRealTimePrio); - BMK_setNbSeconds(bench_nbSeconds); - BMK_setLdmFlag(ldmFlag); - BMK_setLdmMinMatch(g_ldmMinMatch); - BMK_setLdmHashLog(g_ldmHashLog); + adv.blockSize = blockSize; + adv.nbWorkers = nbWorkers; + adv.realTime = setRealTimePrio; + adv.nbSeconds = bench_nbSeconds; + adv.ldmFlag = ldmFlag; + adv.ldmMinMatch = g_ldmMinMatch; + adv.ldmHashLog = g_ldmHashLog; if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { - BMK_setLdmBucketSizeLog(g_ldmBucketSizeLog); + adv.ldmBucketSizeLog = g_ldmBucketSizeLog; } if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) { - BMK_setLdmHashEveryLog(g_ldmHashEveryLog); + adv.ldmHashEveryLog = g_ldmHashEveryLog; } - BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, g_displayLevel); + + if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); + if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); + if (cLevelLast < cLevel) cLevelLast = cLevel; + if (cLevelLast > cLevel) + DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + if(filenameIdx) { + if(separateFiles) { + unsigned i; + for(i = 0; i < filenameIdx; i++) { + int c; + DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]); + for(c = cLevel; c <= cLevelLast; c++) { + BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &adv); + } + } + } else { + for(; cLevel <= cLevelLast; cLevel++) { + BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv); + } + } + } else { + for(; cLevel <= cLevelLast; cLevel++) { + BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &adv); + } + } + #else (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; #endif diff --git a/tests/Makefile b/tests/Makefile index 4fffbc2d4..813380cc2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -128,7 +128,7 @@ fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD) fullbench fullbench32 : DEBUGFLAGS = # turn off assert() for speed measurements fullbench fullbench32 : $(ZSTD_FILES) -fullbench fullbench32 : $(PRGDIR)/datagen.c fullbench.c +fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/bench.c fullbench.c $(CC) $(FLAGS) $^ -o $@$(EXT) fullbench-lib : zstd-staticLib diff --git a/tests/fullbench.c b/tests/fullbench.c index 6abdd4da0..b548a33f3 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -30,6 +30,7 @@ #include "zstd.h" /* ZSTD_versionString */ #include "util.h" /* time functions */ #include "datagen.h" +#include "bench.h" /* CustomBench*/ /*_************************************ @@ -93,14 +94,15 @@ static size_t BMK_findMaxMem(U64 requiredMem) /*_******************************************************* * Benchmark wrappers *********************************************************/ -size_t local_ZSTD_compress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) + +size_t local_ZSTD_compress(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { (void)buff2; return ZSTD_compress(dst, dstSize, src, srcSize, 1); } static size_t g_cSize = 0; -size_t local_ZSTD_decompress(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_decompress(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { (void)src; (void)srcSize; return ZSTD_decompress(dst, dstSize, buff2, g_cSize); @@ -110,14 +112,14 @@ static ZSTD_DCtx* g_zdc = NULL; #ifndef ZSTD_DLL_IMPORT extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize); -size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { (void)src; (void)srcSize; (void)dst; (void)dstSize; return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_zdc, buff2, g_cSize); } extern size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeq, const void* src, size_t srcSize); -size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2) { int nbSeq; (void)src; (void)srcSize; (void)dst; (void)dstSize; @@ -126,7 +128,7 @@ size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const #endif static ZSTD_CStream* g_cstream= NULL; -size_t local_ZSTD_compressStream(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_compressStream(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -143,7 +145,7 @@ size_t local_ZSTD_compressStream(void* dst, size_t dstCapacity, void* buff2, con return buffOut.pos; } -static size_t local_ZSTD_compress_generic_end(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_compress_generic_end(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -159,7 +161,7 @@ static size_t local_ZSTD_compress_generic_end(void* dst, size_t dstCapacity, voi return buffOut.pos; } -static size_t local_ZSTD_compress_generic_continue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_compress_generic_continue(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -176,7 +178,7 @@ static size_t local_ZSTD_compress_generic_continue(void* dst, size_t dstCapacity return buffOut.pos; } -static size_t local_ZSTD_compress_generic_T2_end(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -193,7 +195,7 @@ static size_t local_ZSTD_compress_generic_T2_end(void* dst, size_t dstCapacity, return buffOut.pos; } -static size_t local_ZSTD_compress_generic_T2_continue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_compress_generic_T2_continue(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -212,7 +214,7 @@ static size_t local_ZSTD_compress_generic_T2_continue(void* dst, size_t dstCapac } static ZSTD_DStream* g_dstream= NULL; -static size_t local_ZSTD_decompressStream(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +static size_t local_ZSTD_decompressStream(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { ZSTD_outBuffer buffOut; ZSTD_inBuffer buffIn; @@ -231,7 +233,7 @@ static size_t local_ZSTD_decompressStream(void* dst, size_t dstCapacity, void* b static ZSTD_CCtx* g_zcc = NULL; #ifndef ZSTD_DLL_IMPORT -size_t local_ZSTD_compressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_compressContinue(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { (void)buff2; ZSTD_compressBegin(g_zcc, 1 /* compressionLevel */); @@ -239,7 +241,7 @@ size_t local_ZSTD_compressContinue(void* dst, size_t dstCapacity, void* buff2, c } #define FIRST_BLOCK_SIZE 8 -size_t local_ZSTD_compressContinue_extDict(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { BYTE firstBlockBuf[FIRST_BLOCK_SIZE]; @@ -255,7 +257,7 @@ size_t local_ZSTD_compressContinue_extDict(void* dst, size_t dstCapacity, void* return ZSTD_compressEnd(g_zcc, dst, dstCapacity, (const BYTE*)src + FIRST_BLOCK_SIZE, srcSize - FIRST_BLOCK_SIZE); } -size_t local_ZSTD_decompressContinue(void* dst, size_t dstCapacity, void* buff2, const void* src, size_t srcSize) +size_t local_ZSTD_decompressContinue(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* buff2) { size_t regeneratedSize = 0; const BYTE* ip = (const BYTE*)buff2; @@ -288,8 +290,9 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) size_t const dstBuffSize = ZSTD_compressBound(srcSize); void* buff2; const char* benchName; - size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize); - double bestTime = 100000000.; + BMK_benchFn_t benchFunction; + BMK_customReturn_t r; + int errorcode = 0; /* Selection */ switch(benchNb) @@ -419,46 +422,25 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb) default : ; } + /* warming up memory */ { size_t i; for (i=0; i %s !! \n", benchName, ZSTD_getErrorName(benchResult)); - exit(1); - } } - { U64 const clockSpanNano = UTIL_clockSpanNano(clockStart); - double const averageTime = (double)clockSpanNano / TIME_SEC_NANOSEC / nbRounds; - if (clockSpanNano > 0) { - if (averageTime < bestTime) bestTime = averageTime; - assert(bestTime > (1./2000000000)); - nbRounds = (U32)(1. / bestTime); /* aim for 1 sec */ - DISPLAY("%2i- %-30.30s : %7.1f MB/s (%9u)\r", - loopNb, benchName, - (double)srcSize / (1 MB) / bestTime, - (U32)benchResult); - } else { - assert(nbRounds < 40000000); /* avoid overflow */ - nbRounds *= 100; - } - } } } - DISPLAY("%2u\n", benchNb); + { + r = BMK_benchFunction(benchFunction, buff2, + NULL, NULL, 1, &src, &srcSize, + (void * const * const)&dstBuff, &dstBuffSize, g_nbIterations); + if(r.error) { + DISPLAY("ERROR %d ! ! \n", r.error); + errorcode = r.error; + goto _cleanOut; + } + DISPLAY("%2u#Speed: %f MB/s - Size: %f MB - %s\n", benchNb, (double)srcSize / r.result.nanoSecPerRun * 1000, (double)r.result.sumOfReturn / 1000000, benchName); + } + _cleanOut: free(dstBuff); free(buff2); @@ -466,7 +448,7 @@ _cleanOut: ZSTD_freeDCtx(g_zdc); g_zdc=NULL; ZSTD_freeCStream(g_cstream); g_cstream=NULL; ZSTD_freeDStream(g_dstream); g_dstream=NULL; - return 0; + return errorcode; } diff --git a/tests/paramgrill.c b/tests/paramgrill.c index db45220c3..025bc6aad 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -162,18 +162,15 @@ const char* g_stratName[ZSTD_btultra+1] = { "ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra "}; /* TODO: support additional parameters (more files, fileSizes) */ - -//TODO: benchMem dctx can't = NULL in new system static size_t BMK_benchParam(BMK_result_t* resultPtr, const void* srcBuffer, size_t srcSize, ZSTD_CCtx* ctx, ZSTD_DCtx* dctx, const ZSTD_compressionParameters cParams) { - BMK_return_t res = BMK_benchMem(srcBuffer,srcSize, &srcSize, 1, 0, &cParams, NULL, 0, ctx, dctx, 0, "File"); *resultPtr = res.result; - return res.errorCode; + return res.error; } static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compressionParameters params, size_t srcSize)