mirror of
https://github.com/facebook/zstd.git
synced 2025-07-29 11:21:22 +03:00
Merge branch 'dev' into adapt
This commit is contained in:
@ -84,7 +84,10 @@ static U32 g_ldmMinMatch = 0;
|
||||
static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT;
|
||||
static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
|
||||
|
||||
#define DEFAULT_SPLITPOINT 1.0
|
||||
|
||||
#define DEFAULT_ACCEL 1
|
||||
|
||||
typedef enum { cover, fastCover, legacy } dictType;
|
||||
|
||||
/*-************************************
|
||||
* Display Macros
|
||||
@ -173,6 +176,7 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( "Dictionary builder : \n");
|
||||
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
|
||||
DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args\n");
|
||||
DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fast cover algorithm with optional args\n");
|
||||
DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
|
||||
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
|
||||
DISPLAY( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
|
||||
@ -296,6 +300,33 @@ static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* parseFastCoverParameters() :
|
||||
* reads fastcover parameters from *stringPtr (e.g. "--train-fastcover=k=48,d=8,f=20,steps=32,accel=2") into *params
|
||||
* @return 1 means that fastcover parameters were correct
|
||||
* @return 0 in case of malformed parameters
|
||||
*/
|
||||
static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_params_t* params)
|
||||
{
|
||||
memset(params, 0, sizeof(*params));
|
||||
for (; ;) {
|
||||
if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "f=")) { params->f = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "accel=")) { params->accel = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "split=")) {
|
||||
unsigned splitPercentage = readU32FromChar(&stringPtr);
|
||||
params->splitPoint = (double)splitPercentage / 100.0;
|
||||
if (stringPtr[0]==',') { stringPtr++; continue; } else break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (stringPtr[0] != 0) return 0;
|
||||
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* parseLegacyParameters() :
|
||||
* reads legacy dictioanry builter parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity
|
||||
@ -317,7 +348,19 @@ static ZDICT_cover_params_t defaultCoverParams(void)
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.d = 8;
|
||||
params.steps = 4;
|
||||
params.splitPoint = DEFAULT_SPLITPOINT;
|
||||
params.splitPoint = 1.0;
|
||||
return params;
|
||||
}
|
||||
|
||||
static ZDICT_fastCover_params_t defaultFastCoverParams(void)
|
||||
{
|
||||
ZDICT_fastCover_params_t params;
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.d = 8;
|
||||
params.f = 20;
|
||||
params.steps = 4;
|
||||
params.splitPoint = 0.75; /* different from default splitPoint of cover */
|
||||
params.accel = DEFAULT_ACCEL;
|
||||
return params;
|
||||
}
|
||||
#endif
|
||||
@ -433,7 +476,8 @@ int main(int argCount, const char* argv[])
|
||||
#endif
|
||||
#ifndef ZSTD_NODICT
|
||||
ZDICT_cover_params_t coverParams = defaultCoverParams();
|
||||
int cover = 1;
|
||||
ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams();
|
||||
dictType dict = fastCover;
|
||||
#endif
|
||||
#ifndef ZSTD_NOBENCH
|
||||
BMK_advancedParams_t benchParams = BMK_initAdvancedParams();
|
||||
@ -469,6 +513,9 @@ int main(int argCount, const char* argv[])
|
||||
if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(FIO_lz4Compression); } /* behave like unlz4, also supports multiple formats */
|
||||
memset(&compressionParams, 0, sizeof(compressionParams));
|
||||
|
||||
/* init crash handler */
|
||||
FIO_addAbortHandler();
|
||||
|
||||
/* command switches */
|
||||
for (argNb=1; argNb<argCount; argNb++) {
|
||||
const char* argument = argv[argNb];
|
||||
@ -533,18 +580,29 @@ int main(int argCount, const char* argv[])
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
cover = 1;
|
||||
dict = cover;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--train-fastcover")) {
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
dict = fastCover;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseFastCoverParameters(argument, &fastCoverParams)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--train-legacy")) {
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
cover = 0;
|
||||
dict = legacy;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { continue; }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
@ -849,13 +907,13 @@ int main(int argCount, const char* argv[])
|
||||
if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
|
||||
if (cLevelLast < cLevel) cLevelLast = cLevel;
|
||||
if (cLevelLast > cLevel)
|
||||
DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
|
||||
DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
|
||||
if(filenameIdx) {
|
||||
if(separateFiles) {
|
||||
unsigned i;
|
||||
for(i = 0; i < filenameIdx; i++) {
|
||||
int c;
|
||||
DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]);
|
||||
DISPLAYLEVEL(3, "Benchmarking %s \n", filenameTable[i]);
|
||||
for(c = cLevel; c <= cLevelLast; c++) {
|
||||
BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
|
||||
}
|
||||
@ -884,17 +942,22 @@ int main(int argCount, const char* argv[])
|
||||
zParams.compressionLevel = dictCLevel;
|
||||
zParams.notificationLevel = g_displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
if (cover) {
|
||||
if (dict == cover) {
|
||||
int const optimize = !coverParams.k || !coverParams.d;
|
||||
coverParams.nbThreads = nbWorkers;
|
||||
coverParams.zParams = zParams;
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, optimize);
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, NULL, optimize);
|
||||
} else if (dict == fastCover) {
|
||||
int const optimize = !fastCoverParams.k || !fastCoverParams.d;
|
||||
fastCoverParams.nbThreads = nbWorkers;
|
||||
fastCoverParams.zParams = zParams;
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, NULL, &fastCoverParams, optimize);
|
||||
} else {
|
||||
ZDICT_legacy_params_t dictParams;
|
||||
memset(&dictParams, 0, sizeof(dictParams));
|
||||
dictParams.selectivityLevel = dictSelect;
|
||||
dictParams.zParams = zParams;
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, 0);
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, NULL, 0);
|
||||
}
|
||||
#endif
|
||||
goto _end;
|
||||
|
Reference in New Issue
Block a user