1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-29 11:21:22 +03:00

Merge branch 'dev' into adapt

This commit is contained in:
Yann Collet
2018-09-19 12:43:42 -07:00
78 changed files with 4963 additions and 1661 deletions

View File

@ -84,7 +84,10 @@ static U32 g_ldmMinMatch = 0;
static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT;
static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
#define DEFAULT_SPLITPOINT 1.0
#define DEFAULT_ACCEL 1
typedef enum { cover, fastCover, legacy } dictType;
/*-************************************
* Display Macros
@ -173,6 +176,7 @@ static int usage_advanced(const char* programName)
DISPLAY( "Dictionary builder : \n");
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args\n");
DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fast cover algorithm with optional args\n");
DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
DISPLAY( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
@ -296,6 +300,33 @@ static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t
return 1;
}
/**
* parseFastCoverParameters() :
* reads fastcover parameters from *stringPtr (e.g. "--train-fastcover=k=48,d=8,f=20,steps=32,accel=2") into *params
* @return 1 means that fastcover parameters were correct
* @return 0 in case of malformed parameters
*/
static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_params_t* params)
{
memset(params, 0, sizeof(*params));
for (; ;) {
if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "f=")) { params->f = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "accel=")) { params->accel = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "split=")) {
unsigned splitPercentage = readU32FromChar(&stringPtr);
params->splitPoint = (double)splitPercentage / 100.0;
if (stringPtr[0]==',') { stringPtr++; continue; } else break;
}
return 0;
}
if (stringPtr[0] != 0) return 0;
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel);
return 1;
}
/**
* parseLegacyParameters() :
* reads legacy dictioanry builter parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity
@ -317,7 +348,19 @@ static ZDICT_cover_params_t defaultCoverParams(void)
memset(&params, 0, sizeof(params));
params.d = 8;
params.steps = 4;
params.splitPoint = DEFAULT_SPLITPOINT;
params.splitPoint = 1.0;
return params;
}
static ZDICT_fastCover_params_t defaultFastCoverParams(void)
{
ZDICT_fastCover_params_t params;
memset(&params, 0, sizeof(params));
params.d = 8;
params.f = 20;
params.steps = 4;
params.splitPoint = 0.75; /* different from default splitPoint of cover */
params.accel = DEFAULT_ACCEL;
return params;
}
#endif
@ -433,7 +476,8 @@ int main(int argCount, const char* argv[])
#endif
#ifndef ZSTD_NODICT
ZDICT_cover_params_t coverParams = defaultCoverParams();
int cover = 1;
ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams();
dictType dict = fastCover;
#endif
#ifndef ZSTD_NOBENCH
BMK_advancedParams_t benchParams = BMK_initAdvancedParams();
@ -469,6 +513,9 @@ int main(int argCount, const char* argv[])
if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(FIO_lz4Compression); } /* behave like unlz4, also supports multiple formats */
memset(&compressionParams, 0, sizeof(compressionParams));
/* init crash handler */
FIO_addAbortHandler();
/* command switches */
for (argNb=1; argNb<argCount; argNb++) {
const char* argument = argv[argNb];
@ -533,18 +580,29 @@ int main(int argCount, const char* argv[])
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
cover = 1;
dict = cover;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
if (longCommandWArg(&argument, "--train-fastcover")) {
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
dict = fastCover;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseFastCoverParameters(argument, &fastCoverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
if (longCommandWArg(&argument, "--train-legacy")) {
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
cover = 0;
dict = legacy;
/* Allow optional arguments following an = */
if (*argument == 0) { continue; }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
@ -849,13 +907,13 @@ int main(int argCount, const char* argv[])
if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
if (cLevelLast < cLevel) cLevelLast = cLevel;
if (cLevelLast > cLevel)
DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
if(filenameIdx) {
if(separateFiles) {
unsigned i;
for(i = 0; i < filenameIdx; i++) {
int c;
DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]);
DISPLAYLEVEL(3, "Benchmarking %s \n", filenameTable[i]);
for(c = cLevel; c <= cLevelLast; c++) {
BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
}
@ -884,17 +942,22 @@ int main(int argCount, const char* argv[])
zParams.compressionLevel = dictCLevel;
zParams.notificationLevel = g_displayLevel;
zParams.dictID = dictID;
if (cover) {
if (dict == cover) {
int const optimize = !coverParams.k || !coverParams.d;
coverParams.nbThreads = nbWorkers;
coverParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, optimize);
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, NULL, optimize);
} else if (dict == fastCover) {
int const optimize = !fastCoverParams.k || !fastCoverParams.d;
fastCoverParams.nbThreads = nbWorkers;
fastCoverParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, NULL, &fastCoverParams, optimize);
} else {
ZDICT_legacy_params_t dictParams;
memset(&dictParams, 0, sizeof(dictParams));
dictParams.selectivityLevel = dictSelect;
dictParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, 0);
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, NULL, 0);
}
#endif
goto _end;