1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-05 19:15:58 +03:00

Merge pull request #2299 from senhuang42/env_var_num_threads

Allow environment variable to specify number of threads for compression
This commit is contained in:
Yann Collet
2020-09-14 14:04:19 -07:00
committed by GitHub
4 changed files with 59 additions and 8 deletions

View File

@@ -204,14 +204,19 @@ Benchmark arguments :
### Passing parameters through Environment Variables ### Passing parameters through Environment Variables
`ZSTD_CLEVEL` can be used to modify the default compression level of `zstd` `ZSTD_CLEVEL` can be used to modify the default compression level of `zstd`
(usually set to `3`) to another value between 1 and 19 (the "normal" range). (usually set to `3`) to another value between 1 and 19 (the "normal" range).
This can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments. `ZSTD_NBTHREADS` can be used to specify number of threads that `zstd` will use during compression, which by default is `1`.
This functionality only exists when `zstd` is compiled with multithread support.
The max # of threads is capped at: `ZSTDMT_NBWORKERS_MAX==200`.
This functionality can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments.
One such scenario is `tar --zstd`. One such scenario is `tar --zstd`.
As `ZSTD_CLEVEL` only replaces the default compression level, As `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` only replace the default compression level
it can then be overridden by corresponding command line arguments. and number of threads, respectively, they can both be overridden by corresponding command line arguments:
`-#` for compression level and `-T#` for number of threads.
There is no "generic" way to pass "any kind of parameter" to `zstd` in a pass-through manner. There is no "generic" way to pass "any kind of parameter" to `zstd` in a pass-through manner.
Using environment variables for this purpose has security implications. Using environment variables for this purpose has security implications.
Therefore, this avenue is intentionally restricted and only supports `ZSTD_CLEVEL`. Therefore, this avenue is intentionally restricted and only supports `ZSTD_CLEVEL` and `ZSTD_NBTHREADS`.
### Long distance matching mode ### Long distance matching mode
The long distance matching mode, enabled with `--long`, is designed to improve The long distance matching mode, enabled with `--long`, is designed to improve

View File

@@ -271,11 +271,20 @@ the last one takes effect.
Using environment variables to set parameters has security implications. Using environment variables to set parameters has security implications.
Therefore, this avenue is intentionally restricted. Therefore, this avenue is intentionally restricted.
Only `ZSTD_CLEVEL` is supported currently, for setting compression level. Only `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` are currently supported.
They set the compression level and number of threads to use during compression, respectively.
`ZSTD_CLEVEL` can be used to set the level between 1 and 19 (the "normal" range). `ZSTD_CLEVEL` can be used to set the level between 1 and 19 (the "normal" range).
If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a warning message. If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a warning message.
`ZSTD_CLEVEL` just replaces the default compression level (`3`). `ZSTD_CLEVEL` just replaces the default compression level (`3`).
It can be overridden by corresponding command line arguments.
`ZSTD_NBTHREADS` can be used to set the number of threads `zstd` will attempt to use during compression.
If the value of `ZSTD_NBTHREADS` is not a valid unsigned integer, it will be ignored with a warning message.
'ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200. `zstd` must be
compiled with multithread support for this to have any effect.
They can both be overridden by corresponding command line arguments:
`-#` for compression level and `-T#` for number of compression threads.
DICTIONARY BUILDER DICTIONARY BUILDER

View File

@@ -20,7 +20,9 @@
# define ZSTDCLI_CLEVEL_MAX 19 /* without using --ultra */ # define ZSTDCLI_CLEVEL_MAX 19 /* without using --ultra */
#endif #endif
#ifndef ZSTDCLI_NBTHREADS_DEFAULT
# define ZSTDCLI_NBTHREADS_DEFAULT 1
#endif
/*-************************************ /*-************************************
* Dependencies * Dependencies
@@ -598,6 +600,7 @@ static void printVersion(void)
/* Environment variables for parameter setting */ /* Environment variables for parameter setting */
#define ENV_CLEVEL "ZSTD_CLEVEL" #define ENV_CLEVEL "ZSTD_CLEVEL"
#define ENV_NBTHREADS "ZSTD_NBTHREADS" /* takes lower precedence than directly specifying -T# in the CLI */
/* pick up environment variable */ /* pick up environment variable */
static int init_cLevel(void) { static int init_cLevel(void) {
@@ -627,6 +630,27 @@ static int init_cLevel(void) {
return ZSTDCLI_CLEVEL_DEFAULT; return ZSTDCLI_CLEVEL_DEFAULT;
} }
#ifdef ZSTD_MULTITHREAD
static unsigned init_nbThreads(void) {
const char* const env = getenv(ENV_NBTHREADS);
if (env != NULL) {
const char* ptr = env;
if ((*ptr>='0') && (*ptr<='9')) {
unsigned nbThreads;
if (readU32FromCharChecked(&ptr, &nbThreads)) {
DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);
return ZSTDCLI_NBTHREADS_DEFAULT;
} else if (*ptr == 0) {
return nbThreads;
}
}
DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);
}
return ZSTDCLI_NBTHREADS_DEFAULT;
}
#endif
#define NEXT_FIELD(ptr) { \ #define NEXT_FIELD(ptr) { \
if (*argument == '=') { \ if (*argument == '=') { \
ptr = ++argument; \ ptr = ++argument; \
@@ -733,7 +757,7 @@ int main(int const argCount, const char* argv[])
if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); } if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }
programName = lastNameFromPath(programName); programName = lastNameFromPath(programName);
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
nbWorkers = 1; nbWorkers = init_nbThreads();
#endif #endif
/* preset behaviors */ /* preset behaviors */

View File

@@ -1202,6 +1202,19 @@ then
println "\n===> zstdmt long distance matching round-trip tests " println "\n===> zstdmt long distance matching round-trip tests "
roundTripTest -g8M "3 --long=24 -T2" roundTripTest -g8M "3 --long=24 -T2"
println "\n===> zstdmt environment variable tests "
echo "multifoo" >> mt_tmp
ZSTD_NBTHREADS=-3 zstd -f mt_tmp # negative value, warn and revert to default setting
ZSTD_NBTHREADS='' zstd -f mt_tmp # empty env var, warn and revert to default setting
ZSTD_NBTHREADS=- zstd -f mt_tmp # malformed env var, warn and revert to default setting
ZSTD_NBTHREADS=a zstd -f mt_tmp # malformed env var, warn and revert to default setting
ZSTD_NBTHREADS=+a zstd -f mt_tmp # malformed env var, warn and revert to default setting
ZSTD_NBTHREADS=3a7 zstd -f mt_tmp # malformed env var, warn and revert to default setting
ZSTD_NBTHREADS=50000000000 zstd -f mt_tmp # numeric value too large, warn and revert to default setting=
ZSTD_NBTHREADS=2 zstd -f mt_tmp # correct usage
ZSTD_NBTHREADS=1 zstd -f mt_tmp # correct usage: single thread
rm mt_tmp*
println "\n===> ovLog tests " println "\n===> ovLog tests "
datagen -g2MB > tmp datagen -g2MB > tmp
refSize=$(zstd tmp -6 -c --zstd=wlog=18 | wc -c) refSize=$(zstd tmp -6 -c --zstd=wlog=18 | wc -c)