1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-29 11:21:22 +03:00

Separate capacity vs size

Also:
Make suggested fixes
-varInds_t
-reorder some arguments
-remove code duplication
-update README / -h
-Fix memory leaks
This commit is contained in:
George Lu
2018-07-20 14:35:09 -07:00
parent df026e159f
commit e148db366e
5 changed files with 780 additions and 1290 deletions

View File

@ -171,6 +171,8 @@ struct BMK_timeState_t{
static void BMK_initCCtx(ZSTD_CCtx* ctx, static void BMK_initCCtx(ZSTD_CCtx* ctx,
const void* dictBuffer, size_t dictBufferSize, int cLevel, const void* dictBuffer, size_t dictBufferSize, int cLevel,
const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) { const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) {
ZSTD_CCtx_reset(ctx);
ZSTD_CCtx_resetParameters(ctx);
if (adv->nbWorkers==1) { if (adv->nbWorkers==1) {
ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, 0); ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, 0);
} else { } else {
@ -195,6 +197,7 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx,
static void BMK_initDCtx(ZSTD_DCtx* dctx, static void BMK_initDCtx(ZSTD_DCtx* dctx,
const void* dictBuffer, size_t dictBufferSize) { const void* dictBuffer, size_t dictBufferSize) {
ZSTD_DCtx_reset(dctx);
ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize); ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize);
} }
@ -291,9 +294,9 @@ BMK_customReturn_t BMK_benchFunction(
BMK_initFn_t initFn, void* initPayload, BMK_initFn_t initFn, void* initPayload,
size_t blockCount, size_t blockCount,
const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, const void* const * const srcBlockBuffers, const size_t* srcBlockSizes,
void* const * const dstBlockBuffers, size_t* dstBlockCapacitiesToSizes, void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, size_t* cSizes,
unsigned nbLoops) { unsigned nbLoops) {
size_t srcSize = 0, dstSize = 0, ind = 0; size_t dstSize = 0;
U64 totalTime; U64 totalTime;
BMK_customReturn_t retval; BMK_customReturn_t retval;
@ -303,36 +306,37 @@ BMK_customReturn_t BMK_benchFunction(
EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n"); EXM_THROW_ND(1, BMK_customReturn_t, "nbLoops must be nonzero \n");
} }
for(ind = 0; ind < blockCount; ind++) {
srcSize += srcBlockSizes[ind];
}
{ {
size_t i; size_t i;
for(i = 0; i < blockCount; i++) { for(i = 0; i < blockCount; i++) {
memset(dstBlockBuffers[i], 0xE5, dstBlockCapacitiesToSizes[i]); /* warm up and erase result buffer */ memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */
} }
#if 0
//UTIL_sleepMilli(5); /* give processor time to other processes */ /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
//UTIL_waitForNextTick(); * (Makes former slower)
*/
UTIL_sleepMilli(5); /* give processor time to other processes */
UTIL_waitForNextTick();
#endif
} }
{ {
unsigned i, j, firstIter = 1; unsigned i, j;
clockStart = UTIL_getTime(); clockStart = UTIL_getTime();
if(initFn != NULL) { initFn(initPayload); } if(initFn != NULL) { initFn(initPayload); }
for(i = 0; i < nbLoops; i++) { for(i = 0; i < nbLoops; i++) {
for(j = 0; j < blockCount; j++) { for(j = 0; j < blockCount; j++) {
size_t res = benchFn(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacitiesToSizes[j], benchPayload); size_t res = benchFn(srcBlockBuffers[j], srcBlockSizes[j], dstBlockBuffers[j], dstBlockCapacities[j], benchPayload);
if(ZSTD_isError(res)) { if(ZSTD_isError(res)) {
EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n", EXM_THROW_ND(2, BMK_customReturn_t, "Function benchmarking failed on block %u of size %u : %s \n",
j, (U32)dstBlockCapacitiesToSizes[j], ZSTD_getErrorName(res)); j, (U32)dstBlockCapacities[j], ZSTD_getErrorName(res));
} else if(firstIter) { } else if(i == nbLoops - 1) {
dstSize += res; dstSize += res;
dstBlockCapacitiesToSizes[j] = res; if(cSizes != NULL) {
cSizes[j] = res;
}
} }
} }
firstIter = 0;
} }
totalTime = UTIL_clockSpanNano(clockStart); totalTime = UTIL_clockSpanNano(clockStart);
} }
@ -369,7 +373,7 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed(
BMK_initFn_t initFn, void* initPayload, BMK_initFn_t initFn, void* initPayload,
size_t blockCount, size_t blockCount,
const void* const* const srcBlockBuffers, const size_t* srcBlockSizes, const void* const* const srcBlockBuffers, const size_t* srcBlockSizes,
void * const * const dstBlockBuffers, size_t * dstBlockCapacitiesToSizes) void * const * const dstBlockBuffers, const size_t * dstBlockCapacities, size_t* dstSizes)
{ {
U64 fastest = cont->fastestTime; U64 fastest = cont->fastestTime;
int completed = 0; int completed = 0;
@ -384,9 +388,9 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed(
UTIL_sleep(COOLPERIOD_SEC); UTIL_sleep(COOLPERIOD_SEC);
cont->coolTime = UTIL_getTime(); cont->coolTime = UTIL_getTime();
} }
/* reinitialize capacity */
r.result = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload, r.result = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload,
blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacitiesToSizes, cont->nbLoops); blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, dstSizes, cont->nbLoops);
if(r.result.error) { /* completed w/ error */ if(r.result.error) { /* completed w/ error */
r.completed = 1; r.completed = 1;
return r; return r;
@ -420,7 +424,7 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed(
/* benchMem with no allocation */ /* benchMem with no allocation */
static BMK_return_t BMK_benchMemAdvancedNoAlloc( static BMK_return_t BMK_benchMemAdvancedNoAlloc(
const void ** const srcPtrs, size_t* const srcSizes, const void ** const srcPtrs, size_t* const srcSizes,
void** const cPtrs, size_t* const cSizes, void** const cPtrs, size_t* const cCapacities, size_t* const cSizes,
void** const resPtrs, size_t* const resSizes, void** const resPtrs, size_t* const resSizes,
void** resultBufferPtr, void* compressedBuffer, void** resultBufferPtr, void* compressedBuffer,
const size_t maxCompressedSize, const size_t maxCompressedSize,
@ -485,11 +489,11 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
srcPtrs[nbBlocks] = (const void*)srcPtr; srcPtrs[nbBlocks] = (const void*)srcPtr;
srcSizes[nbBlocks] = thisBlockSize; srcSizes[nbBlocks] = thisBlockSize;
cPtrs[nbBlocks] = (void*)cPtr; cPtrs[nbBlocks] = (void*)cPtr;
cSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize);
resPtrs[nbBlocks] = (void*)resPtr; resPtrs[nbBlocks] = (void*)resPtr;
resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize;
srcPtr += thisBlockSize; srcPtr += thisBlockSize;
cPtr += cSizes[nbBlocks]; cPtr += cCapacities[nbBlocks];
resPtr += thisBlockSize; resPtr += thisBlockSize;
remaining -= thisBlockSize; remaining -= thisBlockSize;
} }
@ -540,7 +544,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
while(!(intermediateResultCompress.completed && intermediateResultDecompress.completed)) { while(!(intermediateResultCompress.completed && intermediateResultDecompress.completed)) {
if(!intermediateResultCompress.completed) { if(!intermediateResultCompress.completed) {
intermediateResultCompress = BMK_benchFunctionTimed(timeStateCompress, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, intermediateResultCompress = BMK_benchFunctionTimed(timeStateCompress, &local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep,
nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes); nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, cSizes);
if(intermediateResultCompress.result.error) { if(intermediateResultCompress.result.error) {
results.error = intermediateResultCompress.result.error; results.error = intermediateResultCompress.result.error;
return results; return results;
@ -564,7 +568,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
if(!intermediateResultDecompress.completed) { if(!intermediateResultDecompress.completed) {
intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep, intermediateResultDecompress = BMK_benchFunctionTimed(timeStateDecompress, &local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep,
nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes); nbBlocks, (const void* const*)cPtrs, cSizes, resPtrs, resSizes, NULL);
if(intermediateResultDecompress.result.error) { if(intermediateResultDecompress.result.error) {
results.error = intermediateResultDecompress.result.error; results.error = intermediateResultDecompress.result.error;
return results; return results;
@ -590,7 +594,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
if(adv->mode != BMK_decodeOnly) { if(adv->mode != BMK_decodeOnly) {
BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep, BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep,
nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds); nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, cSizes, adv->nbSeconds);
if(compressionResults.error) { if(compressionResults.error) {
results.error = compressionResults.error; results.error = compressionResults.error;
return results; return results;
@ -617,7 +621,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
BMK_customReturn_t decompressionResults = BMK_benchFunction( BMK_customReturn_t decompressionResults = BMK_benchFunction(
&local_defaultDecompress, (void*)(dctx), &local_defaultDecompress, (void*)(dctx),
&local_initDCtx, (void*)&dctxprep, nbBlocks, &local_initDCtx, (void*)&dctxprep, nbBlocks,
(const void* const*)cPtrs, cSizes, resPtrs, resSizes, (const void* const*)cPtrs, cSizes, resPtrs, resSizes, NULL,
adv->nbSeconds); adv->nbSeconds);
if(decompressionResults.error) { if(decompressionResults.error) {
results.error = decompressionResults.error; results.error = decompressionResults.error;
@ -717,8 +721,10 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
const void ** const srcPtrs = (const void** const)malloc(maxNbBlocks * sizeof(void*)); const void ** const srcPtrs = (const void** const)malloc(maxNbBlocks * sizeof(void*));
size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); size_t* const srcSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t));
void ** const cPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); void ** const cPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*));
size_t* const cSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); size_t* const cSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t));
size_t* const cCapacities = (size_t* const)malloc(maxNbBlocks * sizeof(size_t));
void ** const resPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*)); void ** const resPtrs = (void** const)malloc(maxNbBlocks * sizeof(void*));
size_t* const resSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t)); size_t* const resSizes = (size_t* const)malloc(maxNbBlocks * sizeof(size_t));
@ -744,13 +750,11 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
!srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes; !srcPtrs || !srcSizes || !cPtrs || !cSizes || !resPtrs || !resSizes;
if (!allocationincomplete) { if (!allocationincomplete) {
results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cSizes, results = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, cPtrs, cCapacities, cSizes,
resPtrs, resSizes, &resultBuffer, compressedBuffer, maxCompressedSize, timeStateCompress, timeStateDecompress, resPtrs, resSizes, &resultBuffer, compressedBuffer, maxCompressedSize, timeStateCompress, timeStateDecompress,
srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams, srcBuffer, srcSize, fileSizes, nbFiles, cLevel, comprParams,
dictBuffer, dictBufferSize, ctx, dctx, displayLevel, displayName, adv); dictBuffer, dictBufferSize, ctx, dctx, displayLevel, displayName, adv);
} }
/* clean up */ /* clean up */
BMK_freeTimeState(timeStateCompress); BMK_freeTimeState(timeStateCompress);
@ -764,6 +768,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
free(srcSizes); free(srcSizes);
free(cPtrs); free(cPtrs);
free(cSizes); free(cSizes);
free(cCapacities);
free(resPtrs); free(resPtrs);
free(resSizes); free(resSizes);

View File

@ -177,6 +177,7 @@ typedef size_t (*BMK_initFn_t)(void*);
* dstBuffers - an array of buffers to be written into by benchFn * dstBuffers - an array of buffers to be written into by benchFn
* dstCapacitiesToSizes - an array of the capacities of above buffers. Output modified to compressed sizes of those blocks. * dstCapacitiesToSizes - an array of the capacities of above buffers. Output modified to compressed sizes of those blocks.
* nbLoops - defines number of times benchFn is run. * nbLoops - defines number of times benchFn is run.
* assumed array of size blockCount, will have compressed size of each block written to it.
* return * return
* .error will give a nonzero value if ZSTD_isError() is nonzero for any of the return * .error will give a nonzero value if ZSTD_isError() is nonzero for any of the return
* of the calls to initFn and benchFn, or if benchFunction errors internally * of the calls to initFn and benchFn, or if benchFunction errors internally
@ -187,12 +188,11 @@ typedef size_t (*BMK_initFn_t)(void*);
* into dstBuffer, hence this value will be the total amount of bytes written to * into dstBuffer, hence this value will be the total amount of bytes written to
* dstBuffer. * dstBuffer.
*/ */
BMK_customReturn_t BMK_benchFunction( BMK_customReturn_t BMK_benchFunction(BMK_benchFn_t benchFn, void* benchPayload,
BMK_benchFn_t benchFn, void* benchPayload,
BMK_initFn_t initFn, void* initPayload, BMK_initFn_t initFn, void* initPayload,
size_t blockCount, size_t blockCount,
const void* const * const srcBuffers, const size_t* srcSizes, const void* const * const srcBuffers, const size_t* srcSizes,
void * const * const dstBuffers, size_t* dstCapacitiesToSizes, void * const * const dstBuffers, const size_t* dstCapacities, size_t* cSizes,
unsigned nbLoops); unsigned nbLoops);
@ -221,7 +221,7 @@ BMK_customTimedReturn_t BMK_benchFunctionTimed(BMK_timedFnState_t* cont,
BMK_initFn_t initFn, void* initPayload, BMK_initFn_t initFn, void* initPayload,
size_t blockCount, size_t blockCount,
const void* const * const srcBlockBuffers, const size_t* srcBlockSizes, const void* const * const srcBlockBuffers, const size_t* srcBlockSizes,
void* const * const dstBlockBuffers, size_t* dstBlockCapacitiesToSizes); void* const * const dstBlockBuffers, const size_t* dstBlockCapacities, size_t* cSizes);
#endif /* BENCH_H_121279284357 */ #endif /* BENCH_H_121279284357 */

View File

@ -88,3 +88,35 @@ as well as the 10,000 original files for more detailed comparison of decompressi
will choose a random seed, and for 1 minute, will choose a random seed, and for 1 minute,
generate random test frames and ensure that the generate random test frames and ensure that the
zstd library correctly decompresses them in both simple and streaming modes. zstd library correctly decompresses them in both simple and streaming modes.
#### `paramgrill` - tool for generating compression table parameters and optimizing parameters on file given constraints
Full list of arguments
```
-T# : set level 1 speed objective
-B# : cut input into blocks of size # (default : single block)
-i# : iteration loops
-S : benchmarks a single run (example command: -Sl3w10h12)
w# - windowLog
h# - hashLog
c# - chainLog
s# - searchLog
l# - searchLength
t# - targetLength
S# - strategy
L# - level
--zstd= : Single run, parameter selection syntax same as zstdcli
--optimize= : find parameters to maximize compression ratio given parameters
Can use all --zstd= commands to constrain the type of solution found in addition to the following constraints
cSpeed= - Minimum compression speed
dSpeed= - Minimum decompression speed
cMem= - compression memory
lvl= - Automatically sets compression speed constraint to the speed of that level
--optimize= : same as -O with more verbose syntax
-P# : generated sample compressibility
-t# : Caps runtime of operation in seconds (default : 99999 seconds (about 27 hours ))
-v : Prints Benchmarking output
-D : Next argument dictionary file
```
Any inputs afterwards are treated as files to benchmark.

View File

@ -516,7 +516,7 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb, int cLevel,
{ {
r = BMK_benchFunction(benchFunction, buff2, r = BMK_benchFunction(benchFunction, buff2,
NULL, NULL, 1, &src, &srcSize, NULL, NULL, 1, &src, &srcSize,
(void **)&dstBuff, &dstBuffSize, g_nbIterations); (void **)&dstBuff, &dstBuffSize, NULL, g_nbIterations);
if(r.error) { if(r.error) {
DISPLAY("ERROR %d ! ! \n", r.error); DISPLAY("ERROR %d ! ! \n", r.error);
errorcode = r.error; errorcode = r.error;

1965
tests/paramgrill.c Executable file → Normal file

File diff suppressed because it is too large Load Diff