From d5dbdd6ece53ac5bcab2b91fcd7a1b776b84dfb1 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 4 Mar 2025 12:51:39 -0800
Subject: [PATCH] changed command --blocksize into --split

to reduce confusion with the concept of "blocks" inside a Zstandard frame.

We are now talking about "independent chunks" being produced by a `split` operation.

updated documentation accordingly.

Note: old commands "-B#` and `--blocksize=#` remain supported,
to maintain compatibility with existing scripts.
---
 programs/benchzstd.h |  4 +--
 programs/zstd.1.md   | 18 ++++++------
 programs/zstdcli.c   | 66 +++++++++++++++++++++++++-------------------
 3 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/programs/benchzstd.h b/programs/benchzstd.h
index 4fd0e5a8a..db4d72f9d 100644
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@@ -92,9 +92,9 @@ typedef enum {
 } BMK_mode_t;
 
 typedef struct {
-    BMK_mode_t mode;        /* 0: all, 1: compress only 2: decode only */
+    BMK_mode_t mode;        /* 0: both, 1: compress only 2: decode only */
     unsigned nbSeconds;     /* default timing is in nbSeconds */
-    size_t blockSize;       /* Maximum size of each block*/
+    size_t blockSize;       /* Maximum size of each independent chunk */
     size_t targetCBlockSize;/* Approximative size of compressed blocks */
     int nbWorkers;          /* multithreading */
     unsigned realTime;      /* real time priority */
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index e5c1b7fd2..3b7bc342a 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -113,7 +113,11 @@ the last one takes effect.
     Because the compressor's behavior highly depends on the content to compress, there's no guarantee of a smooth progression from one level to another.
 * `--ultra`:
     unlocks high compression levels 20+ (maximum 22), using a lot more memory.
-    Note that decompression will also require more memory when using these levels.
+    Decompression will also need more memory when using these levels.
+* `--max`:
+    set advanced parameters to reach maximum compression.
+    warning: this setting is very slow and uses a lot of resources.
+    It's inappropriate for 32-bit mode and therefore disabled in this mode.
 * `--fast[=#]`:
     switch to ultra-fast compression levels.
     If `=#` is not present, it defaults to `1`.
@@ -161,10 +165,6 @@ the last one takes effect.
 
     Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
     `--memory=windowSize` needs to be passed to the decompressor.
-* `--max`:
-    set advanced parameters to maximum compression.
-    warning: this setting is very slow and uses a lot of resources.
-    It's inappropriate for 32-bit mode and therefore disabled in this mode.
 * `-D DICT`:
     use `DICT` as Dictionary to compress or decompress FILE(s)
 * `--patch-from FILE`:
@@ -554,8 +554,8 @@ Compression of small files similar to the sample set will be greatly improved.
     Use `#` compression level during training (optional).
     Will generate statistics more tuned for selected compression level,
     resulting in a _small_ compression ratio improvement for this level.
-* `-B#`:
-    Split input files into blocks of size # (default: no split)
+* `--split=#`:
+    Split input files into independent chunks of size # (default: no split)
 * `-M#`, `--memory=#`:
     Limit the amount of sample data loaded for training (default: 2 GB).
     Note that the default (2 GB) is also the maximum.
@@ -683,8 +683,8 @@ Benchmarking will employ `max(1, min(4, nbCores/4))` worker threads by default i
     benchmark decompression speed only (requires providing a zstd-compressed content)
 * `-i#`:
     minimum evaluation time, in seconds (default: 3s), benchmark mode only
-* `-B#`, `--block-size=#`:
-    cut file(s) into independent chunks of size # (default: no chunking)
+* `--split=#`:
+    split input file(s) into independent chunks of size # (default: no chunking)
 * `-S`:
     output one benchmark result per input file (default: consolidated result)
 * `-D dictionary`
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 83d9b881e..38d00225f 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -307,7 +307,7 @@ static void usageAdvanced(const char* programName)
     DISPLAYOUT("  -b#                           Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT);
     DISPLAYOUT("  -e#                           Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n");
     DISPLAYOUT("  -i#                           Set the minimum evaluation to time # seconds. [Default: 3]\n");
-    DISPLAYOUT("  -B#                           Cut file into independent chunks of size #. [Default: No chunking]\n");
+    DISPLAYOUT("  --split=#                     Split input into independent chunks of size #. [Default: No chunking]\n");
     DISPLAYOUT("  -S                            Output one benchmark result per input file. [Default: Consolidated result]\n");
     DISPLAYOUT("  -D dictionary                 Benchmark using dictionary \n");
     DISPLAYOUT("  --priority=rt                 Set process priority to real-time.\n");
@@ -773,7 +773,7 @@ static int init_cLevel(void) {
 }
 
 #ifdef ZSTD_MULTITHREAD
-static unsigned default_nbThreads(void) {
+static int default_nbThreads(void) {
     const char* const env = getenv(ENV_NBTHREADS);
     if (env != NULL) {
         const char* ptr = env;
@@ -783,7 +783,7 @@ static unsigned default_nbThreads(void) {
                 DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);
                 return ZSTDCLI_NBTHREADS_DEFAULT;
             } else if (*ptr == 0) {
-                return nbThreads;
+                return (int)nbThreads;
             }
         }
         DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);
@@ -810,22 +810,31 @@ static unsigned default_nbThreads(void) {
             CLEAN_RETURN(1);      \
 }   }   }
 
-#define NEXT_UINT32(val32) {        \
-    const char* __nb;               \
-    NEXT_FIELD(__nb);               \
-    val32 = readU32FromChar(&__nb); \
-    if(*__nb != 0) {                \
+#define NEXT_INT32(_vari32) {              \
+    const char* __nb;                      \
+    NEXT_FIELD(__nb);                      \
+    _vari32 = (int)readU32FromChar(&__nb); \
+    if(*__nb != 0) {                       \
         errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
-    }                               \
+    }                                      \
 }
 
-#define NEXT_TSIZE(valTsize) {           \
-    const char* __nb;                    \
-    NEXT_FIELD(__nb);                    \
-    valTsize = readSizeTFromChar(&__nb); \
-    if(*__nb != 0) {                     \
+#define NEXT_UINT32(_varu32) {        \
+    const char* __nb;                 \
+    NEXT_FIELD(__nb);                 \
+    _varu32 = readU32FromChar(&__nb); \
+    if(*__nb != 0) {                  \
         errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
-    }                                    \
+    }                                 \
+}
+
+#define NEXT_TSIZE(_varTsize) {           \
+    const char* __nb;                     \
+    NEXT_FIELD(__nb);                     \
+    _varTsize = readSizeTFromChar(&__nb); \
+    if(*__nb != 0) {                      \
+        errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
+    }                                     \
 }
 
 typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;
@@ -871,7 +880,7 @@ int main(int argCount, const char* argv[])
     int nbWorkers = -1; /* -1 means unset */
     double compressibility = -1.0;  /* lorem ipsum generator */
     unsigned bench_nbSeconds = 3;   /* would be better if this value was synchronized from bench */
-    size_t blockSize = 0;
+    size_t chunkSize = 0;
 
     FIO_prefs_t* const prefs = FIO_createPreferences();
     FIO_ctx_t* const fCtx = FIO_createContext();
@@ -1069,11 +1078,12 @@ int main(int argCount, const char* argv[])
                   continue;
                 }
 #endif
-                if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; }
+                if (longCommandWArg(&argument, "--threads")) { NEXT_INT32(nbWorkers); continue; }
                 if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; }
                 if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; }
                 if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; }
-                if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(blockSize); continue; }
+                if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(chunkSize); continue; } /* hidden command, prefer --split below */
+                if (longCommandWArg(&argument, "--split")) { NEXT_TSIZE(chunkSize); continue; }
                 if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }
                 if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }
                 if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; }
@@ -1256,10 +1266,10 @@ int main(int argCount, const char* argv[])
                     bench_nbSeconds = readU32FromChar(&argument);
                     break;
 
-                    /* cut input into blocks (benchmark only) */
+                    /* cut input into independent chunks (benchmark only) */
                 case 'B':
                     argument++;
-                    blockSize = readU32FromChar(&argument);
+                    chunkSize = readU32FromChar(&argument);
                     break;
 
                     /* benchmark files separately (hidden option) */
@@ -1273,7 +1283,7 @@ int main(int argCount, const char* argv[])
                     /* nb of threads (hidden option) */
                 case 'T':
                     argument++;
-                    nbWorkers = readU32FromChar(&argument);
+                    nbWorkers = (int)readU32FromChar(&argument);
                     break;
 
                     /* Dictionary Selection level */
@@ -1324,10 +1334,10 @@ int main(int argCount, const char* argv[])
     if ((nbWorkers==0) && (!singleThread)) {
         /* automatically set # workers based on # of reported cpus */
         if (defaultLogicalCores) {
-            nbWorkers = (unsigned)UTIL_countLogicalCores();
+            nbWorkers = UTIL_countLogicalCores();
             DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers);
         } else {
-            nbWorkers = (unsigned)UTIL_countPhysicalCores();
+            nbWorkers = UTIL_countPhysicalCores();
             DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);
         }
     }
@@ -1404,7 +1414,7 @@ int main(int argCount, const char* argv[])
             DISPLAYLEVEL(1, "benchmark mode is only compatible with zstd format \n");
             CLEAN_RETURN(1);
         }
-        benchParams.blockSize = blockSize;
+        benchParams.blockSize = chunkSize;
         benchParams.targetCBlockSize = targetCBlockSize;
         benchParams.nbWorkers = (int)nbWorkers;
         benchParams.realTime = (unsigned)setRealTimePrio;
@@ -1464,18 +1474,18 @@ int main(int argCount, const char* argv[])
             int const optimize = !coverParams.k || !coverParams.d;
             coverParams.nbThreads = (unsigned)nbWorkers;
             coverParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, &coverParams, NULL, optimize, memLimit);
         } else if (dict == fastCover) {
             int const optimize = !fastCoverParams.k || !fastCoverParams.d;
             fastCoverParams.nbThreads = (unsigned)nbWorkers;
             fastCoverParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
         } else {
             ZDICT_legacy_params_t dictParams;
             memset(&dictParams, 0, sizeof(dictParams));
             dictParams.selectivityLevel = dictSelect;
             dictParams.zParams = zParams;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0, memLimit);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, &dictParams, NULL, NULL, 0, memLimit);
         }
 #else
         (void)dictCLevel; (void)dictSelect; (void)dictID;  (void)maxDictSize; /* not used when ZSTD_NODICT set */
@@ -1583,7 +1593,7 @@ int main(int argCount, const char* argv[])
         FIO_setCompressionType(prefs, cType);
         FIO_setContentSize(prefs, contentSize);
         FIO_setNbWorkers(prefs, (int)nbWorkers);
-        FIO_setBlockSize(prefs, (int)blockSize);
+        FIO_setBlockSize(prefs, (int)chunkSize);
         if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);
         FIO_setLdmFlag(prefs, (unsigned)ldmFlag);
         FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);