changed command --blocksize into --split

to reduce confusion with the concept of "blocks" inside a Zstandard frame. We are now talking about "independent chunks" being produced by a `split` operation. updated documentation accordingly. Note: old commands "-B#` and `--blocksize=#` remain supported, to maintain compatibility with existing scripts.
2025-07-30 22:23:13 +03:00 · 2025-03-04 12:51:39 -08:00
parent 7df457a51d
commit d5dbdd6ece
3 changed files with 49 additions and 39 deletions
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@ -92,9 +92,9 @@ typedef enum {
 } BMK_mode_t;
 typedef struct {
-    BMK_mode_t mode;        /* 0: all, 1: compress only 2: decode only */
+    BMK_mode_t mode;        /* 0: both, 1: compress only 2: decode only */
    unsigned nbSeconds;     /* default timing is in nbSeconds */
-    size_t blockSize;       /* Maximum size of each block*/
+    size_t blockSize;       /* Maximum size of each independent chunk */
    size_t targetCBlockSize;/* Approximative size of compressed blocks */
    int nbWorkers;          /* multithreading */
    unsigned realTime;      /* real time priority */
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@ -113,7 +113,11 @@ the last one takes effect.
    Because the compressor's behavior highly depends on the content to compress, there's no guarantee of a smooth progression from one level to another.
 * `--ultra`:
    unlocks high compression levels 20+ (maximum 22), using a lot more memory.
-    Note that decompression will also require more memory when using these levels.
+    Decompression will also need more memory when using these levels.
 * `--max`:
    set advanced parameters to reach maximum compression.
    warning: this setting is very slow and uses a lot of resources.
    It's inappropriate for 32-bit mode and therefore disabled in this mode.
 * `--fast[=#]`:
    switch to ultra-fast compression levels.
    If `=#` is not present, it defaults to `1`.
@ -161,10 +165,6 @@ the last one takes effect.
    Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
    `--memory=windowSize` needs to be passed to the decompressor.
 * `--max`:
    set advanced parameters to maximum compression.
    warning: this setting is very slow and uses a lot of resources.
    It's inappropriate for 32-bit mode and therefore disabled in this mode.
 * `-D DICT`:
    use `DICT` as Dictionary to compress or decompress FILE(s)
 * `--patch-from FILE`:
@ -554,8 +554,8 @@ Compression of small files similar to the sample set will be greatly improved.
    Use `#` compression level during training (optional).
    Will generate statistics more tuned for selected compression level,
    resulting in a _small_ compression ratio improvement for this level.
-* `-B#`:
+* `--split=#`:
-    Split input files into blocks of size # (default: no split)
+    Split input files into independent chunks of size # (default: no split)
 * `-M#`, `--memory=#`:
    Limit the amount of sample data loaded for training (default: 2 GB).
    Note that the default (2 GB) is also the maximum.
@ -683,8 +683,8 @@ Benchmarking will employ `max(1, min(4, nbCores/4))` worker threads by default i
    benchmark decompression speed only (requires providing a zstd-compressed content)
 * `-i#`:
    minimum evaluation time, in seconds (default: 3s), benchmark mode only
-* `-B#`, `--block-size=#`:
+* `--split=#`:
-    cut file(s) into independent chunks of size # (default: no chunking)
+    split input file(s) into independent chunks of size # (default: no chunking)
 * `-S`:
    output one benchmark result per input file (default: consolidated result)
 * `-D dictionary`
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@ -307,7 +307,7 @@ static void usageAdvanced(const char* programName)
    DISPLAYOUT("  -b#                           Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT);
    DISPLAYOUT("  -e#                           Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n");
    DISPLAYOUT("  -i#                           Set the minimum evaluation to time # seconds. [Default: 3]\n");
-    DISPLAYOUT("  -B#                           Cut file into independent chunks of size #. [Default: No chunking]\n");
+    DISPLAYOUT("  --split=#                     Split input into independent chunks of size #. [Default: No chunking]\n");
    DISPLAYOUT("  -S                            Output one benchmark result per input file. [Default: Consolidated result]\n");
    DISPLAYOUT("  -D dictionary                 Benchmark using dictionary \n");
    DISPLAYOUT("  --priority=rt                 Set process priority to real-time.\n");
@ -773,7 +773,7 @@ static int init_cLevel(void) {
 }
 #ifdef ZSTD_MULTITHREAD
-static unsigned default_nbThreads(void) {
+static int default_nbThreads(void) {
    const char* const env = getenv(ENV_NBTHREADS);
    if (env != NULL) {
        const char* ptr = env;
@ -783,7 +783,7 @@ static unsigned default_nbThreads(void) {
                DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);
                return ZSTDCLI_NBTHREADS_DEFAULT;
            } else if (*ptr == 0) {
-                return nbThreads;
+                return (int)nbThreads;
            }
        }
        DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);
@ -810,22 +810,31 @@ static unsigned default_nbThreads(void) {
            CLEAN_RETURN(1);      \
 }   }   }
-#define NEXT_UINT32(val32) {        \
+#define NEXT_INT32(_vari32) {              \
-    const char* __nb;               \
+    const char* __nb;                      \
-    NEXT_FIELD(__nb);               \
+    NEXT_FIELD(__nb);                      \
-    val32 = readU32FromChar(&__nb); \
+    _vari32 = (int)readU32FromChar(&__nb); \
-    if(*__nb != 0) {                \
+    if(*__nb != 0) {                       \
        errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
-    }                               \
+    }                                      \
 }
-#define NEXT_TSIZE(valTsize) {           \
+#define NEXT_UINT32(_varu32) {        \
-    const char* __nb;                    \
+    const char* __nb;                 \
-    NEXT_FIELD(__nb);                    \
+    NEXT_FIELD(__nb);                 \
-    valTsize = readSizeTFromChar(&__nb); \
+    _varu32 = readU32FromChar(&__nb); \
-    if(*__nb != 0) {                     \
+    if(*__nb != 0) {                  \
        errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
-    }                                    \
+    }                                 \
 }
 #define NEXT_TSIZE(_varTsize) {           \
    const char* __nb;                     \
    NEXT_FIELD(__nb);                     \
    _varTsize = readSizeTFromChar(&__nb); \
    if(*__nb != 0) {                      \
        errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
    }                                     \
 }
 typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;
@ -871,7 +880,7 @@ int main(int argCount, const char* argv[])
    int nbWorkers = -1; /* -1 means unset */
    double compressibility = -1.0;  /* lorem ipsum generator */
    unsigned bench_nbSeconds = 3;   /* would be better if this value was synchronized from bench */
-    size_t blockSize = 0;
+    size_t chunkSize = 0;
    FIO_prefs_t* const prefs = FIO_createPreferences();
    FIO_ctx_t* const fCtx = FIO_createContext();
@ -1069,11 +1078,12 @@ int main(int argCount, const char* argv[])
                  continue;
                }
 #endif
-                if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; }
+                if (longCommandWArg(&argument, "--threads")) { NEXT_INT32(nbWorkers); continue; }
                if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; }
                if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; }
                if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; }
-                if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(blockSize); continue; }
+                if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(chunkSize); continue; } /* hidden command, prefer --split below */
                if (longCommandWArg(&argument, "--split")) { NEXT_TSIZE(chunkSize); continue; }
                if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }
                if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }
                if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; }
@ -1256,10 +1266,10 @@ int main(int argCount, const char* argv[])
                    bench_nbSeconds = readU32FromChar(&argument);
                    break;
-                    /* cut input into blocks (benchmark only) */
+                    /* cut input into independent chunks (benchmark only) */
                case 'B':
                    argument++;
-                    blockSize = readU32FromChar(&argument);
+                    chunkSize = readU32FromChar(&argument);
                    break;
                    /* benchmark files separately (hidden option) */
@ -1273,7 +1283,7 @@ int main(int argCount, const char* argv[])
                    /* nb of threads (hidden option) */
                case 'T':
                    argument++;
-                    nbWorkers = readU32FromChar(&argument);
+                    nbWorkers = (int)readU32FromChar(&argument);
                    break;
                    /* Dictionary Selection level */
@ -1324,10 +1334,10 @@ int main(int argCount, const char* argv[])
    if ((nbWorkers==0) && (!singleThread)) {
        /* automatically set # workers based on # of reported cpus */
        if (defaultLogicalCores) {
-            nbWorkers = (unsigned)UTIL_countLogicalCores();
+            nbWorkers = UTIL_countLogicalCores();
            DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers);
        } else {
-            nbWorkers = (unsigned)UTIL_countPhysicalCores();
+            nbWorkers = UTIL_countPhysicalCores();
            DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);
        }
    }
@ -1404,7 +1414,7 @@ int main(int argCount, const char* argv[])
            DISPLAYLEVEL(1, "benchmark mode is only compatible with zstd format \n");
            CLEAN_RETURN(1);
        }
-        benchParams.blockSize = blockSize;
+        benchParams.blockSize = chunkSize;
        benchParams.targetCBlockSize = targetCBlockSize;
        benchParams.nbWorkers = (int)nbWorkers;
        benchParams.realTime = (unsigned)setRealTimePrio;
@ -1464,18 +1474,18 @@ int main(int argCount, const char* argv[])
            int const optimize = !coverParams.k || !coverParams.d;
            coverParams.nbThreads = (unsigned)nbWorkers;
            coverParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, &coverParams, NULL, optimize, memLimit);
        } else if (dict == fastCover) {
            int const optimize = !fastCoverParams.k || !fastCoverParams.d;
            fastCoverParams.nbThreads = (unsigned)nbWorkers;
            fastCoverParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
        } else {
            ZDICT_legacy_params_t dictParams;
            memset(&dictParams, 0, sizeof(dictParams));
            dictParams.selectivityLevel = dictSelect;
            dictParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, &dictParams, NULL, NULL, 0, memLimit);
        }
 #else
        (void)dictCLevel; (void)dictSelect; (void)dictID;  (void)maxDictSize; /* not used when ZSTD_NODICT set */
@ -1583,7 +1593,7 @@ int main(int argCount, const char* argv[])
        FIO_setCompressionType(prefs, cType);
        FIO_setContentSize(prefs, contentSize);
        FIO_setNbWorkers(prefs, (int)nbWorkers);
-        FIO_setBlockSize(prefs, (int)blockSize);
+        FIO_setBlockSize(prefs, (int)chunkSize);
        if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);
        FIO_setLdmFlag(prefs, (unsigned)ldmFlag);
        FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);