From 0c53c5ad4aa419d1f3aab0022ad181d1efec6f30 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 15 Feb 2019 14:15:36 -0800 Subject: [PATCH] [zstdcli] Add a flag to control literals compression --- programs/fileio.c | 9 +++++++++ programs/fileio.h | 3 +++ programs/zstdcli.c | 5 ++++- tests/playTests.sh | 7 +++++++ tests/regression/config.c | 3 +++ tests/regression/results.csv | 23 ++++++++++++++++------- 6 files changed, 42 insertions(+), 8 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index dd47a1e32..8a5715113 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -296,6 +296,7 @@ struct FIO_prefs_s { int ldmMinMatch; int ldmBucketSizeLog; int ldmHashRateLog; + ZSTD_literalCompressionMode_e literalCompressionMode; /* IO preferences */ U32 removeSrcFile; @@ -339,6 +340,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->ldmMinMatch = 0; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; + ret->literalCompressionMode = ZSTD_lcm_auto; return ret; } @@ -406,6 +408,12 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { prefs->rsyncable = rsyncable; } +void FIO_setLiteralCompressionMode( + FIO_prefs_t* const prefs, + ZSTD_literalCompressionMode_e mode) { + prefs->literalCompressionMode = mode; +} + void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel) { #ifndef ZSTD_NOCOMPRESS @@ -674,6 +682,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); /* multi-threading */ #ifdef ZSTD_MULTITHREAD DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); diff --git a/programs/fileio.h b/programs/fileio.h index b20570bcb..e46633752 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -71,6 +71,9 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog); void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); +void FIO_setLiteralCompressionMode( + FIO_prefs_t* const prefs, + ZSTD_literalCompressionMode_e mode); void FIO_setNoProgress(unsigned noProgress); void FIO_setNotificationLevel(int level); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 794e2a811..60035e566 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -148,6 +148,7 @@ static int usage_advanced(const char* programName) #endif DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n"); DISPLAY( "--[no-]check : integrity check (default: enabled) \n"); + DISPLAY( "--[no-]compress-literals : force (un)compressed literals \n"); #endif #ifdef UTIL_HAS_CREATEFILELIST DISPLAY( " -r : operate recursively on directories \n"); @@ -483,7 +484,7 @@ static int init_cLevel(void) { if ((*ptr>='0') && (*ptr<='9')) { unsigned absLevel; - if (readU32FromCharChecked(&ptr, &absLevel)) { + if (readU32FromCharChecked(&ptr, &absLevel)) { DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large\n", ENV_CLEVEL, env); return ZSTDCLI_CLEVEL_DEFAULT; } else if (*ptr == 0) { @@ -659,6 +660,8 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; } #endif if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; } + if (!strcmp(argument, "--compress-literals")) { FIO_setLiteralCompressionMode(prefs, ZSTD_lcm_huffman); continue; } + if (!strcmp(argument, "--no-compress-literals")) { FIO_setLiteralCompressionMode(prefs, ZSTD_lcm_uncompressed); continue; } if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; } /* long commands with arguments */ diff --git a/tests/playTests.sh b/tests/playTests.sh index 5be065fa8..62b3057e9 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -200,6 +200,13 @@ $ZSTD tmp -fo tmp && die "zstd compression overwrote the input file" $ZSTD tmp.zst -dfo tmp.zst && die "zstd decompression overwrote the input file" $ECHO "test: detect that input file does not exist" $ZSTD nothere && die "zstd hasn't detected that input file does not exist" +$ECHO "test: --[no-]compress-literals" +$ZSTD tmp -c --no-compress-literals -1 | $ZSTD -t +$ZSTD tmp -c --no-compress-literals --fast=1 | $ZSTD -t +$ZSTD tmp -c --no-compress-literals -19 | $ZSTD -t +$ZSTD tmp -c --compress-literals -1 | $ZSTD -t +$ZSTD tmp -c --compress-literals --fast=1 | $ZSTD -t +$ZSTD tmp -c --compress-literals -19 | $ZSTD -t $ECHO "test : file removal" $ZSTD -f --rm tmp diff --git a/tests/regression/config.c b/tests/regression/config.c index 4d5d19105..bd3640099 100644 --- a/tests/regression/config.c +++ b/tests/regression/config.c @@ -129,6 +129,7 @@ static param_value_t const uncompressed_literals_param_values[] = { static config_t uncompressed_literals = { .name = "uncompressed literals", + .cli_args = "-3 --no-compress-literals", .param_values = PARAM_VALUES(uncompressed_literals_param_values), }; @@ -139,6 +140,7 @@ static param_value_t const uncompressed_literals_opt_param_values[] = { static config_t uncompressed_literals_opt = { .name = "uncompressed literals optimal", + .cli_args = "-19 --no-compress-literals", .param_values = PARAM_VALUES(uncompressed_literals_opt_param_values), }; @@ -149,6 +151,7 @@ static param_value_t const huffman_literals_param_values[] = { static config_t huffman_literals = { .name = "huffman literals", + .cli_args = "--fast=1 --compress-literals", .param_values = PARAM_VALUES(huffman_literals_param_values), }; diff --git a/tests/regression/results.csv b/tests/regression/results.csv index 6d0d1b515..23afae5ee 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -68,13 +68,13 @@ github, level 16, compress cct github, level 16 with dict, compress cctx, 37568 github, level 19, compress cctx, 133717 github, level 19 with dict, compress cctx, 37567 -github, long distance mode, compress cctx, decompression error -github, multithreaded, compress cctx, decompression error -github, multithreaded long distance mode, compress cctx, decompression error -github, small window log, compress cctx, decompression error -github, small hash log, compress cctx, decompression error -github, small chain log, compress cctx, decompression error -github, explicit params, compress cctx, decompression error +github, long distance mode, compress cctx, 141473 +github, multithreaded, compress cctx, 141473 +github, multithreaded long distance mode, compress cctx, 141473 +github, small window log, compress cctx, 141473 +github, small hash log, compress cctx, 138943 +github, small chain log, compress cctx, 139239 +github, explicit params, compress cctx, 140924 github, uncompressed literals, compress cctx, 136397 github, uncompressed literals optimal, compress cctx, 133717 github, huffman literals, compress cctx, 176575 @@ -99,6 +99,9 @@ silesia, small window log, zstdcli, silesia, small hash log, zstdcli, 6554946 silesia, small chain log, zstdcli, 4931141 silesia, explicit params, zstdcli, 4815380 +silesia, uncompressed literals, zstdcli, 5155472 +silesia, uncompressed literals optimal, zstdcli, 4325475 +silesia, huffman literals, zstdcli, 5341405 silesia.tar, level -5, zstdcli, 7161160 silesia.tar, level -3, zstdcli, 6789865 silesia.tar, level -1, zstdcli, 6196433 @@ -121,6 +124,9 @@ silesia.tar, small window log, zstdcli, silesia.tar, small hash log, zstdcli, 6587841 silesia.tar, small chain log, zstdcli, 4943259 silesia.tar, explicit params, zstdcli, 4839202 +silesia.tar, uncompressed literals, zstdcli, 5158134 +silesia.tar, uncompressed literals optimal, zstdcli, 4321098 +silesia.tar, huffman literals, zstdcli, 5358479 github, level -5, zstdcli, 234744 github, level -5 with dict, zstdcli, 48718 github, level -3, zstdcli, 222611 @@ -156,6 +162,9 @@ github, small window log, zstdcli, github, small hash log, zstdcli, 137467 github, small chain log, zstdcli, 138314 github, explicit params, zstdcli, 136140 +github, uncompressed literals, zstdcli, 169004 +github, uncompressed literals optimal, zstdcli, 158824 +github, huffman literals, zstdcli, 145457 silesia, level -5, advanced one pass, 7152294 silesia, level -3, advanced one pass, 6789969 silesia, level -1, advanced one pass, 6191548