diff --git a/programs/fileio.c b/programs/fileio.c index 569a410c1..82d70075b 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -304,6 +304,7 @@ struct FIO_prefs_s { int ldmMinMatch; int ldmBucketSizeLog; int ldmHashRateLog; + size_t streamSrcSize; size_t targetCBlockSize; ZSTD_literalCompressionMode_e literalCompressionMode; @@ -349,6 +350,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->ldmMinMatch = 0; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; + ret->streamSrcSize = 0; ret->targetCBlockSize = 0; ret->literalCompressionMode = ZSTD_lcm_auto; return ret; @@ -418,6 +420,10 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { prefs->rsyncable = rsyncable; } +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) { + prefs->streamSrcSize = streamSrcSize; +} + void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { prefs->targetCBlockSize = targetCBlockSize; } @@ -698,9 +704,20 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); #endif /* dictionary */ - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */ + /* set the pledged size for dictionary loading, to adapt compression parameters */ + if (srcSize == ZSTD_CONTENTSIZE_UNKNOWN && prefs->streamSrcSize > 0) { + /* unknown source size; use the declared stream size and disable writing this size to frame during compression */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) ); + } else { + /* use the known source size for adaption */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); + } CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */ + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN || prefs->streamSrcSize == 0) { + /* reset pledge when src size is known or stream size is declared */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); + } free(dictBuffer); } diff --git a/programs/fileio.h b/programs/fileio.h index 311f8c0e1..13f6f1d05 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -71,6 +71,7 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog); void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize); void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); void FIO_setLiteralCompressionMode( FIO_prefs_t* const prefs, diff --git a/programs/zstdcli.c b/programs/zstdcli.c index de286cdf2..401e1ee2c 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -141,6 +141,7 @@ static int usage_advanced(const char* programName) DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n"); + DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n"); DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n"); #ifdef ZSTD_MULTITHREAD DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); @@ -588,6 +589,7 @@ int main(int argCount, const char* argv[]) const char* suffix = ZSTD_EXTENSION; unsigned maxDictSize = g_defaultMaxDictSize; unsigned dictID = 0; + size_t streamSrcSize = 0; size_t targetCBlockSize = 0; int dictCLevel = g_defaultDictCLevel; unsigned dictSelect = g_defaultSelectivityLevel; @@ -745,6 +747,7 @@ int main(int argCount, const char* argv[]) if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } + if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--long")) { unsigned ldmWindowLog = 0; @@ -1150,6 +1153,7 @@ int main(int argCount, const char* argv[]) FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMax(prefs, adaptMax); FIO_setRsyncable(prefs, rsyncable); + FIO_setStreamSrcSize(prefs, streamSrcSize); FIO_setTargetCBlockSize(prefs, targetCBlockSize); FIO_setLiteralCompressionMode(prefs, literalCompressionMode); if (adaptMin > cLevel) cLevel = adaptMin; @@ -1160,7 +1164,7 @@ int main(int argCount, const char* argv[]) else operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); #else - (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */ + (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)streamSrcSize; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); #endif } else { /* decompression or test */ diff --git a/tests/playTests.sh b/tests/playTests.sh index 69387321f..431a53a18 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -108,7 +108,6 @@ else fi - println "\n===> simple tests " ./datagen > tmp @@ -1020,4 +1019,25 @@ test -f dictionary rm -f tmp* dictionary +println "\n===> stream-size mode" + +./datagen -g11000 > tmp +println "test : basic file compression vs sized streaming compression" +$ZSTD -14 -f tmp -o tmp.zst |& tee file.out +cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 |& tee stream_sized.out + +file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g') +stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g') +rm file.out stream_sized.out + +ratio_diff=$(echo $file_ratio - $stream_sized_ratio | bc) +if [ $(echo "(100 * $ratio_diff) > 5" | bc -l) == 1 ] +then + die "greater than 0.05% difference between file and sized-streaming compression" +fi + +println "test : incorrect stream size" +cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size" + + rm -f tmp*