1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-29 11:21:22 +03:00

Adding shrinking flag for cover and fastcover (#1656)

* Changed ERROR(GENERIC) excluding inits

* editing git ignore

* Edited init functions to size_t returns

* moved declarations earlier

* resolved issues with changes to init functions

* fixed style and an error check

* attempting to add tests that might trigger changes

* added && die to cases expecting to fail

* resolved no die on expected failed command

* fixed accel to be incorrect value

* Adding an automated shrinking option

* Fixing build

* finalizing fixes

* fix?

* Removing added comment in cover.h

* Styling fixes

* Merging with fb dev

* removing megic number for default regression

* Requested revisions

* fixing support for fast cover

* fixing casting errors

* parenthesis fix

* fixing some build nits

* resolving travis ci syntax

* might resolve all compilation issues

* removed unused variable

* remodeling the selectDict function

* fixing bad memory access

* fixing error checks

* fixed erroring check in selectDict

* fixing mixed declarations

* modify mixed declaration

* fixing nits and adding test cases

* Adding requested changes + fixed bug for error checking

* switched double comparison from != to <

* fixed declaration typing

* refactoring COVER_best_finish() and changing shrinkDict

* removing the const's

* modifying ZDICT_optimizeTrainFromBuffer_cover functions

* fixing potential bad memcpy

* fixing the error function for dict size
This commit is contained in:
Tyler-Tran
2019-06-27 16:26:57 -07:00
committed by Nick Terrell
parent 9038579ab2
commit c55d2e7ba3
7 changed files with 259 additions and 42 deletions

View File

@ -179,8 +179,8 @@ static int usage_advanced(const char* programName)
DISPLAY( "\n");
DISPLAY( "Dictionary builder : \n");
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args\n");
DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fast cover algorithm with optional args\n");
DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args\n");
DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args\n");
DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
DISPLAY( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
@ -299,6 +299,7 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
* @return 1 means that cover parameters were correct
* @return 0 in case of malformed parameters
*/
static const unsigned kDefaultRegression = 1;
static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
{
memset(params, 0, sizeof(*params));
@ -311,10 +312,23 @@ static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t
params->splitPoint = (double)splitPercentage / 100.0;
if (stringPtr[0]==',') { stringPtr++; continue; } else break;
}
if (longCommandWArg(&stringPtr, "shrink")) {
params->shrinkDictMaxRegression = kDefaultRegression;
params->shrinkDict = 1;
if (stringPtr[0]=='=') {
stringPtr++;
params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
}
if (stringPtr[0]==',') {
stringPtr++;
continue;
}
else break;
}
return 0;
}
if (stringPtr[0] != 0) return 0;
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100));
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\nshrink%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100), params->shrinkDictMaxRegression);
return 1;
}
@ -338,10 +352,23 @@ static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_
params->splitPoint = (double)splitPercentage / 100.0;
if (stringPtr[0]==',') { stringPtr++; continue; } else break;
}
if (longCommandWArg(&stringPtr, "shrink")) {
params->shrinkDictMaxRegression = kDefaultRegression;
params->shrinkDict = 1;
if (stringPtr[0]=='=') {
stringPtr++;
params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
}
if (stringPtr[0]==',') {
stringPtr++;
continue;
}
else break;
}
return 0;
}
if (stringPtr[0] != 0) return 0;
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel);
DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\nshrink=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel, params->shrinkDictMaxRegression);
return 1;
}
@ -367,6 +394,8 @@ static ZDICT_cover_params_t defaultCoverParams(void)
params.d = 8;
params.steps = 4;
params.splitPoint = 1.0;
params.shrinkDict = 0;
params.shrinkDictMaxRegression = kDefaultRegression;
return params;
}
@ -379,6 +408,8 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void)
params.steps = 4;
params.splitPoint = 0.75; /* different from default splitPoint of cover */
params.accel = DEFAULT_ACCEL;
params.shrinkDict = 0;
params.shrinkDictMaxRegression = kDefaultRegression;
return params;
}
#endif