mirror of
https://github.com/facebook/zstd.git
synced 2025-07-30 22:23:13 +03:00
changed command --blocksize into --split
to reduce confusion with the concept of "blocks" inside a Zstandard frame. We are now talking about "independent chunks" being produced by a `split` operation. updated documentation accordingly. Note: old commands "-B#` and `--blocksize=#` remain supported, to maintain compatibility with existing scripts.
This commit is contained in:
@ -92,9 +92,9 @@ typedef enum {
|
|||||||
} BMK_mode_t;
|
} BMK_mode_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */
|
BMK_mode_t mode; /* 0: both, 1: compress only 2: decode only */
|
||||||
unsigned nbSeconds; /* default timing is in nbSeconds */
|
unsigned nbSeconds; /* default timing is in nbSeconds */
|
||||||
size_t blockSize; /* Maximum size of each block*/
|
size_t blockSize; /* Maximum size of each independent chunk */
|
||||||
size_t targetCBlockSize;/* Approximative size of compressed blocks */
|
size_t targetCBlockSize;/* Approximative size of compressed blocks */
|
||||||
int nbWorkers; /* multithreading */
|
int nbWorkers; /* multithreading */
|
||||||
unsigned realTime; /* real time priority */
|
unsigned realTime; /* real time priority */
|
||||||
|
@ -113,7 +113,11 @@ the last one takes effect.
|
|||||||
Because the compressor's behavior highly depends on the content to compress, there's no guarantee of a smooth progression from one level to another.
|
Because the compressor's behavior highly depends on the content to compress, there's no guarantee of a smooth progression from one level to another.
|
||||||
* `--ultra`:
|
* `--ultra`:
|
||||||
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
|
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
|
||||||
Note that decompression will also require more memory when using these levels.
|
Decompression will also need more memory when using these levels.
|
||||||
|
* `--max`:
|
||||||
|
set advanced parameters to reach maximum compression.
|
||||||
|
warning: this setting is very slow and uses a lot of resources.
|
||||||
|
It's inappropriate for 32-bit mode and therefore disabled in this mode.
|
||||||
* `--fast[=#]`:
|
* `--fast[=#]`:
|
||||||
switch to ultra-fast compression levels.
|
switch to ultra-fast compression levels.
|
||||||
If `=#` is not present, it defaults to `1`.
|
If `=#` is not present, it defaults to `1`.
|
||||||
@ -161,10 +165,6 @@ the last one takes effect.
|
|||||||
|
|
||||||
Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
|
Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
|
||||||
`--memory=windowSize` needs to be passed to the decompressor.
|
`--memory=windowSize` needs to be passed to the decompressor.
|
||||||
* `--max`:
|
|
||||||
set advanced parameters to maximum compression.
|
|
||||||
warning: this setting is very slow and uses a lot of resources.
|
|
||||||
It's inappropriate for 32-bit mode and therefore disabled in this mode.
|
|
||||||
* `-D DICT`:
|
* `-D DICT`:
|
||||||
use `DICT` as Dictionary to compress or decompress FILE(s)
|
use `DICT` as Dictionary to compress or decompress FILE(s)
|
||||||
* `--patch-from FILE`:
|
* `--patch-from FILE`:
|
||||||
@ -554,8 +554,8 @@ Compression of small files similar to the sample set will be greatly improved.
|
|||||||
Use `#` compression level during training (optional).
|
Use `#` compression level during training (optional).
|
||||||
Will generate statistics more tuned for selected compression level,
|
Will generate statistics more tuned for selected compression level,
|
||||||
resulting in a _small_ compression ratio improvement for this level.
|
resulting in a _small_ compression ratio improvement for this level.
|
||||||
* `-B#`:
|
* `--split=#`:
|
||||||
Split input files into blocks of size # (default: no split)
|
Split input files into independent chunks of size # (default: no split)
|
||||||
* `-M#`, `--memory=#`:
|
* `-M#`, `--memory=#`:
|
||||||
Limit the amount of sample data loaded for training (default: 2 GB).
|
Limit the amount of sample data loaded for training (default: 2 GB).
|
||||||
Note that the default (2 GB) is also the maximum.
|
Note that the default (2 GB) is also the maximum.
|
||||||
@ -683,8 +683,8 @@ Benchmarking will employ `max(1, min(4, nbCores/4))` worker threads by default i
|
|||||||
benchmark decompression speed only (requires providing a zstd-compressed content)
|
benchmark decompression speed only (requires providing a zstd-compressed content)
|
||||||
* `-i#`:
|
* `-i#`:
|
||||||
minimum evaluation time, in seconds (default: 3s), benchmark mode only
|
minimum evaluation time, in seconds (default: 3s), benchmark mode only
|
||||||
* `-B#`, `--block-size=#`:
|
* `--split=#`:
|
||||||
cut file(s) into independent chunks of size # (default: no chunking)
|
split input file(s) into independent chunks of size # (default: no chunking)
|
||||||
* `-S`:
|
* `-S`:
|
||||||
output one benchmark result per input file (default: consolidated result)
|
output one benchmark result per input file (default: consolidated result)
|
||||||
* `-D dictionary`
|
* `-D dictionary`
|
||||||
|
@ -307,7 +307,7 @@ static void usageAdvanced(const char* programName)
|
|||||||
DISPLAYOUT(" -b# Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT);
|
DISPLAYOUT(" -b# Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT);
|
||||||
DISPLAYOUT(" -e# Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n");
|
DISPLAYOUT(" -e# Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n");
|
||||||
DISPLAYOUT(" -i# Set the minimum evaluation to time # seconds. [Default: 3]\n");
|
DISPLAYOUT(" -i# Set the minimum evaluation to time # seconds. [Default: 3]\n");
|
||||||
DISPLAYOUT(" -B# Cut file into independent chunks of size #. [Default: No chunking]\n");
|
DISPLAYOUT(" --split=# Split input into independent chunks of size #. [Default: No chunking]\n");
|
||||||
DISPLAYOUT(" -S Output one benchmark result per input file. [Default: Consolidated result]\n");
|
DISPLAYOUT(" -S Output one benchmark result per input file. [Default: Consolidated result]\n");
|
||||||
DISPLAYOUT(" -D dictionary Benchmark using dictionary \n");
|
DISPLAYOUT(" -D dictionary Benchmark using dictionary \n");
|
||||||
DISPLAYOUT(" --priority=rt Set process priority to real-time.\n");
|
DISPLAYOUT(" --priority=rt Set process priority to real-time.\n");
|
||||||
@ -773,7 +773,7 @@ static int init_cLevel(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ZSTD_MULTITHREAD
|
#ifdef ZSTD_MULTITHREAD
|
||||||
static unsigned default_nbThreads(void) {
|
static int default_nbThreads(void) {
|
||||||
const char* const env = getenv(ENV_NBTHREADS);
|
const char* const env = getenv(ENV_NBTHREADS);
|
||||||
if (env != NULL) {
|
if (env != NULL) {
|
||||||
const char* ptr = env;
|
const char* ptr = env;
|
||||||
@ -783,7 +783,7 @@ static unsigned default_nbThreads(void) {
|
|||||||
DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);
|
DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);
|
||||||
return ZSTDCLI_NBTHREADS_DEFAULT;
|
return ZSTDCLI_NBTHREADS_DEFAULT;
|
||||||
} else if (*ptr == 0) {
|
} else if (*ptr == 0) {
|
||||||
return nbThreads;
|
return (int)nbThreads;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);
|
DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);
|
||||||
@ -810,22 +810,31 @@ static unsigned default_nbThreads(void) {
|
|||||||
CLEAN_RETURN(1); \
|
CLEAN_RETURN(1); \
|
||||||
} } }
|
} } }
|
||||||
|
|
||||||
#define NEXT_UINT32(val32) { \
|
#define NEXT_INT32(_vari32) { \
|
||||||
const char* __nb; \
|
const char* __nb; \
|
||||||
NEXT_FIELD(__nb); \
|
NEXT_FIELD(__nb); \
|
||||||
val32 = readU32FromChar(&__nb); \
|
_vari32 = (int)readU32FromChar(&__nb); \
|
||||||
if(*__nb != 0) { \
|
if(*__nb != 0) { \
|
||||||
errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
|
errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define NEXT_TSIZE(valTsize) { \
|
#define NEXT_UINT32(_varu32) { \
|
||||||
const char* __nb; \
|
const char* __nb; \
|
||||||
NEXT_FIELD(__nb); \
|
NEXT_FIELD(__nb); \
|
||||||
valTsize = readSizeTFromChar(&__nb); \
|
_varu32 = readU32FromChar(&__nb); \
|
||||||
if(*__nb != 0) { \
|
if(*__nb != 0) { \
|
||||||
errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
|
errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
|
||||||
} \
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NEXT_TSIZE(_varTsize) { \
|
||||||
|
const char* __nb; \
|
||||||
|
NEXT_FIELD(__nb); \
|
||||||
|
_varTsize = readSizeTFromChar(&__nb); \
|
||||||
|
if(*__nb != 0) { \
|
||||||
|
errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;
|
typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;
|
||||||
@ -871,7 +880,7 @@ int main(int argCount, const char* argv[])
|
|||||||
int nbWorkers = -1; /* -1 means unset */
|
int nbWorkers = -1; /* -1 means unset */
|
||||||
double compressibility = -1.0; /* lorem ipsum generator */
|
double compressibility = -1.0; /* lorem ipsum generator */
|
||||||
unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */
|
unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */
|
||||||
size_t blockSize = 0;
|
size_t chunkSize = 0;
|
||||||
|
|
||||||
FIO_prefs_t* const prefs = FIO_createPreferences();
|
FIO_prefs_t* const prefs = FIO_createPreferences();
|
||||||
FIO_ctx_t* const fCtx = FIO_createContext();
|
FIO_ctx_t* const fCtx = FIO_createContext();
|
||||||
@ -1069,11 +1078,12 @@ int main(int argCount, const char* argv[])
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; }
|
if (longCommandWArg(&argument, "--threads")) { NEXT_INT32(nbWorkers); continue; }
|
||||||
if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; }
|
if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; }
|
||||||
if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; }
|
if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; }
|
||||||
if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; }
|
if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; }
|
||||||
if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(blockSize); continue; }
|
if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(chunkSize); continue; } /* hidden command, prefer --split below */
|
||||||
|
if (longCommandWArg(&argument, "--split")) { NEXT_TSIZE(chunkSize); continue; }
|
||||||
if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }
|
if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }
|
||||||
if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }
|
if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }
|
||||||
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; }
|
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badUsage(programName, originalArgument); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; }
|
||||||
@ -1256,10 +1266,10 @@ int main(int argCount, const char* argv[])
|
|||||||
bench_nbSeconds = readU32FromChar(&argument);
|
bench_nbSeconds = readU32FromChar(&argument);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* cut input into blocks (benchmark only) */
|
/* cut input into independent chunks (benchmark only) */
|
||||||
case 'B':
|
case 'B':
|
||||||
argument++;
|
argument++;
|
||||||
blockSize = readU32FromChar(&argument);
|
chunkSize = readU32FromChar(&argument);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* benchmark files separately (hidden option) */
|
/* benchmark files separately (hidden option) */
|
||||||
@ -1273,7 +1283,7 @@ int main(int argCount, const char* argv[])
|
|||||||
/* nb of threads (hidden option) */
|
/* nb of threads (hidden option) */
|
||||||
case 'T':
|
case 'T':
|
||||||
argument++;
|
argument++;
|
||||||
nbWorkers = readU32FromChar(&argument);
|
nbWorkers = (int)readU32FromChar(&argument);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Dictionary Selection level */
|
/* Dictionary Selection level */
|
||||||
@ -1324,10 +1334,10 @@ int main(int argCount, const char* argv[])
|
|||||||
if ((nbWorkers==0) && (!singleThread)) {
|
if ((nbWorkers==0) && (!singleThread)) {
|
||||||
/* automatically set # workers based on # of reported cpus */
|
/* automatically set # workers based on # of reported cpus */
|
||||||
if (defaultLogicalCores) {
|
if (defaultLogicalCores) {
|
||||||
nbWorkers = (unsigned)UTIL_countLogicalCores();
|
nbWorkers = UTIL_countLogicalCores();
|
||||||
DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers);
|
DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers);
|
||||||
} else {
|
} else {
|
||||||
nbWorkers = (unsigned)UTIL_countPhysicalCores();
|
nbWorkers = UTIL_countPhysicalCores();
|
||||||
DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);
|
DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1404,7 +1414,7 @@ int main(int argCount, const char* argv[])
|
|||||||
DISPLAYLEVEL(1, "benchmark mode is only compatible with zstd format \n");
|
DISPLAYLEVEL(1, "benchmark mode is only compatible with zstd format \n");
|
||||||
CLEAN_RETURN(1);
|
CLEAN_RETURN(1);
|
||||||
}
|
}
|
||||||
benchParams.blockSize = blockSize;
|
benchParams.blockSize = chunkSize;
|
||||||
benchParams.targetCBlockSize = targetCBlockSize;
|
benchParams.targetCBlockSize = targetCBlockSize;
|
||||||
benchParams.nbWorkers = (int)nbWorkers;
|
benchParams.nbWorkers = (int)nbWorkers;
|
||||||
benchParams.realTime = (unsigned)setRealTimePrio;
|
benchParams.realTime = (unsigned)setRealTimePrio;
|
||||||
@ -1464,18 +1474,18 @@ int main(int argCount, const char* argv[])
|
|||||||
int const optimize = !coverParams.k || !coverParams.d;
|
int const optimize = !coverParams.k || !coverParams.d;
|
||||||
coverParams.nbThreads = (unsigned)nbWorkers;
|
coverParams.nbThreads = (unsigned)nbWorkers;
|
||||||
coverParams.zParams = zParams;
|
coverParams.zParams = zParams;
|
||||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize, memLimit);
|
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, &coverParams, NULL, optimize, memLimit);
|
||||||
} else if (dict == fastCover) {
|
} else if (dict == fastCover) {
|
||||||
int const optimize = !fastCoverParams.k || !fastCoverParams.d;
|
int const optimize = !fastCoverParams.k || !fastCoverParams.d;
|
||||||
fastCoverParams.nbThreads = (unsigned)nbWorkers;
|
fastCoverParams.nbThreads = (unsigned)nbWorkers;
|
||||||
fastCoverParams.zParams = zParams;
|
fastCoverParams.zParams = zParams;
|
||||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
|
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
|
||||||
} else {
|
} else {
|
||||||
ZDICT_legacy_params_t dictParams;
|
ZDICT_legacy_params_t dictParams;
|
||||||
memset(&dictParams, 0, sizeof(dictParams));
|
memset(&dictParams, 0, sizeof(dictParams));
|
||||||
dictParams.selectivityLevel = dictSelect;
|
dictParams.selectivityLevel = dictSelect;
|
||||||
dictParams.zParams = zParams;
|
dictParams.zParams = zParams;
|
||||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0, memLimit);
|
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, chunkSize, &dictParams, NULL, NULL, 0, memLimit);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
(void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */
|
(void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */
|
||||||
@ -1583,7 +1593,7 @@ int main(int argCount, const char* argv[])
|
|||||||
FIO_setCompressionType(prefs, cType);
|
FIO_setCompressionType(prefs, cType);
|
||||||
FIO_setContentSize(prefs, contentSize);
|
FIO_setContentSize(prefs, contentSize);
|
||||||
FIO_setNbWorkers(prefs, (int)nbWorkers);
|
FIO_setNbWorkers(prefs, (int)nbWorkers);
|
||||||
FIO_setBlockSize(prefs, (int)blockSize);
|
FIO_setBlockSize(prefs, (int)chunkSize);
|
||||||
if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);
|
if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);
|
||||||
FIO_setLdmFlag(prefs, (unsigned)ldmFlag);
|
FIO_setLdmFlag(prefs, (unsigned)ldmFlag);
|
||||||
FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);
|
FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);
|
||||||
|
Reference in New Issue
Block a user