From d5dbdd6ece53ac5bcab2b91fcd7a1b776b84dfb1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 4 Mar 2025 12:51:39 -0800 Subject: [PATCH] changed command --blocksize into --split to reduce confusion with the concept of "blocks" inside a Zstandard frame. We are now talking about "independent chunks" being produced by a `split` operation. updated documentation accordingly. Note: old commands "-B#` and `--blocksize=#` remain supported, to maintain compatibility with existing scripts. --- programs/benchzstd.h | 4 +-- programs/zstd.1.md | 18 ++++++------ programs/zstdcli.c | 66 +++++++++++++++++++++++++------------------- 3 files changed, 49 insertions(+), 39 deletions(-) diff --git a/programs/benchzstd.h b/programs/benchzstd.h index 4fd0e5a8a..db4d72f9d 100644 --- a/programs/benchzstd.h +++ b/programs/benchzstd.h @@ -92,9 +92,9 @@ typedef enum { } BMK_mode_t; typedef struct { - BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */ + BMK_mode_t mode; /* 0: both, 1: compress only 2: decode only */ unsigned nbSeconds; /* default timing is in nbSeconds */ - size_t blockSize; /* Maximum size of each block*/ + size_t blockSize; /* Maximum size of each independent chunk */ size_t targetCBlockSize;/* Approximative size of compressed blocks */ int nbWorkers; /* multithreading */ unsigned realTime; /* real time priority */ diff --git a/programs/zstd.1.md b/programs/zstd.1.md index e5c1b7fd2..3b7bc342a 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -113,7 +113,11 @@ the last one takes effect. Because the compressor's behavior highly depends on the content to compress, there's no guarantee of a smooth progression from one level to another. * `--ultra`: unlocks high compression levels 20+ (maximum 22), using a lot more memory. - Note that decompression will also require more memory when using these levels. + Decompression will also need more memory when using these levels. +* `--max`: + set advanced parameters to reach maximum compression. + warning: this setting is very slow and uses a lot of resources. + It's inappropriate for 32-bit mode and therefore disabled in this mode. * `--fast[=#]`: switch to ultra-fast compression levels. If `=#` is not present, it defaults to `1`. @@ -161,10 +165,6 @@ the last one takes effect. Note: If `windowLog` is set to larger than 27, `--long=windowLog` or `--memory=windowSize` needs to be passed to the decompressor. -* `--max`: - set advanced parameters to maximum compression. - warning: this setting is very slow and uses a lot of resources. - It's inappropriate for 32-bit mode and therefore disabled in this mode. * `-D DICT`: use `DICT` as Dictionary to compress or decompress FILE(s) * `--patch-from FILE`: @@ -554,8 +554,8 @@ Compression of small files similar to the sample set will be greatly improved. Use `#` compression level during training (optional). Will generate statistics more tuned for selected compression level, resulting in a _small_ compression ratio improvement for this level. -* `-B#`: - Split input files into blocks of size # (default: no split) +* `--split=#`: + Split input files into independent chunks of size # (default: no split) * `-M#`, `--memory=#`: Limit the amount of sample data loaded for training (default: 2 GB). Note that the default (2 GB) is also the maximum. @@ -683,8 +683,8 @@ Benchmarking will employ `max(1, min(4, nbCores/4))` worker threads by default i benchmark decompression speed only (requires providing a zstd-compressed content) * `-i#`: minimum evaluation time, in seconds (default: 3s), benchmark mode only -* `-B#`, `--block-size=#`: - cut file(s) into independent chunks of size # (default: no chunking) +* `--split=#`: + split input file(s) into independent chunks of size # (default: no chunking) * `-S`: output one benchmark result per input file (default: consolidated result) * `-D dictionary` diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 83d9b881e..38d00225f 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -307,7 +307,7 @@ static void usageAdvanced(const char* programName) DISPLAYOUT(" -b# Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT); DISPLAYOUT(" -e# Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n"); DISPLAYOUT(" -i# Set the minimum evaluation to time # seconds. [Default: 3]\n"); - DISPLAYOUT(" -B# Cut file into independent chunks of size #. [Default: No chunking]\n"); + DISPLAYOUT(" --split=# Split input into independent chunks of size #. [Default: No chunking]\n"); DISPLAYOUT(" -S Output one benchmark result per input file. [Default: Consolidated result]\n"); DISPLAYOUT(" -D dictionary Benchmark using dictionary \n"); DISPLAYOUT(" --priority=rt Set process priority to real-time.\n"); @@ -773,7 +773,7 @@ static int init_cLevel(void) { } #ifdef ZSTD_MULTITHREAD -static unsigned default_nbThreads(void) { +static int default_nbThreads(void) { const char* const env = getenv(ENV_NBTHREADS); if (env != NULL) { const char* ptr = env; @@ -783,7 +783,7 @@ static unsigned default_nbThreads(void) { DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env); return ZSTDCLI_NBTHREADS_DEFAULT; } else if (*ptr == 0) { - return nbThreads; + return (int)nbThreads; } } DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env); @@ -810,22 +810,31 @@ static unsigned default_nbThreads(void) { CLEAN_RETURN(1); \ } } } -#define NEXT_UINT32(val32) { \ - const char* __nb; \ - NEXT_FIELD(__nb); \ - val32 = readU32FromChar(&__nb); \ - if(*__nb != 0) { \ +#define NEXT_INT32(_vari32) { \ + const char* __nb; \ + NEXT_FIELD(__nb); \ + _vari32 = (int)readU32FromChar(&__nb); \ + if(*__nb != 0) { \ errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \ - } \ + } \ } -#define NEXT_TSIZE(valTsize) { \ - const char* __nb; \ - NEXT_FIELD(__nb); \ - valTsize = readSizeTFromChar(&__nb); \ - if(*__nb != 0) { \ +#define NEXT_UINT32(_varu32) { \ + const char* __nb; \ + NEXT_FIELD(__nb); \ + _varu32 = readU32FromChar(&__nb); \ + if(*__nb != 0) { \ errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \ - } \ + } \ +} + +#define NEXT_TSIZE(_varTsize) { \ + const char* __nb; \ + NEXT_FIELD(__nb); \ + _varTsize = readSizeTFromChar(&__nb); \ + if(*__nb != 0) { \ + errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \ + } \ } typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode; @@ -871,7 +880,7 @@ int main(int argCount, const char* argv[]) int nbWorkers = -1; /* -1 means unset */ double compressibility = -1.0; /* lorem ipsum generator */ unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ - size_t blockSize = 0; + size_t chunkSize = 0; FIO_prefs_t* const prefs = FIO_createPreferences(); FIO_ctx_t* const fCtx = FIO_createContext(); @@ -1069,11 +1078,12 @@ int main(int argCount, const char* argv[]) continue; } #endif - if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; } + if (longCommandWArg(&argument, "--threads")) { NEXT_INT32(nbWorkers); continue; } if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; } if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; } if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; } - if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(blockSize); continue; } + if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(chunkSize); continue; } /* hidden command, prefer --split below */ + if (longCommandWArg(&argument, "--split")) { NEXT_TSIZE(chunkSize); continue; } if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; } if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; } @@ -1256,10 +1266,10 @@ int main(int argCount, const char* argv[]) bench_nbSeconds = readU32FromChar(&argument); break; - /* cut input into blocks (benchmark only) */ + /* cut input into independent chunks (benchmark only) */ case 'B': argument++; - blockSize = readU32FromChar(&argument); + chunkSize = readU32FromChar(&argument); break; /* benchmark files separately (hidden option) */ @@ -1273,7 +1283,7 @@ int main(int argCount, const char* argv[]) /* nb of threads (hidden option) */ case 'T': argument++; - nbWorkers = readU32FromChar(&argument); + nbWorkers = (int)readU32FromChar(&argument); break; /* Dictionary Selection level */ @@ -1324,10 +1334,10 @@ int main(int argCount, const char* argv[]) if ((nbWorkers==0) && (!singleThread)) { /* automatically set # workers based on # of reported cpus */ if (defaultLogicalCores) { - nbWorkers = (unsigned)UTIL_countLogicalCores(); + nbWorkers = UTIL_countLogicalCores(); DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers); } else { - nbWorkers = (unsigned)UTIL_countPhysicalCores(); + nbWorkers = UTIL_countPhysicalCores(); DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers); } } @@ -1404,7 +1414,7 @@ int main(int argCount, const char* argv[]) DISPLAYLEVEL(1, "benchmark mode is only compatible with zstd format \n"); CLEAN_RETURN(1); } - benchParams.blockSize = blockSize; + benchParams.blockSize = chunkSize; benchParams.targetCBlockSize = targetCBlockSize; benchParams.nbWorkers = (int)nbWorkers; benchParams.realTime = (unsigned)setRealTimePrio; @@ -1464,18 +1474,18 @@ int main(int argCount, const char* argv[]) int const optimize = !coverParams.k || !coverParams.d; coverParams.nbThreads = (unsigned)nbWorkers; coverParams.zParams = zParams; - operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize, memLimit); + operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, &coverParams, NULL, optimize, memLimit); } else if (dict == fastCover) { int const optimize = !fastCoverParams.k || !fastCoverParams.d; fastCoverParams.nbThreads = (unsigned)nbWorkers; fastCoverParams.zParams = zParams; - operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize, memLimit); + operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, NULL, &fastCoverParams, optimize, memLimit); } else { ZDICT_legacy_params_t dictParams; memset(&dictParams, 0, sizeof(dictParams)); dictParams.selectivityLevel = dictSelect; dictParams.zParams = zParams; - operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0, memLimit); + operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, &dictParams, NULL, NULL, 0, memLimit); } #else (void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */ @@ -1583,7 +1593,7 @@ int main(int argCount, const char* argv[]) FIO_setCompressionType(prefs, cType); FIO_setContentSize(prefs, contentSize); FIO_setNbWorkers(prefs, (int)nbWorkers); - FIO_setBlockSize(prefs, (int)blockSize); + FIO_setBlockSize(prefs, (int)chunkSize); if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog); FIO_setLdmFlag(prefs, (unsigned)ldmFlag); FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);