diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 1fe8d89ee..7ac7eb1c3 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -225,7 +225,7 @@ static COVER_ctx_t *g_ctx = NULL; */ static size_t COVER_sum(const size_t *samplesSizes, unsigned firstSample, unsigned lastSample) { size_t sum = 0; - size_t i; + unsigned i; for (i = firstSample; i < lastSample; ++i) { sum += samplesSizes[i]; } @@ -540,13 +540,12 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, unsigned d, double splitPoint) { const BYTE *const samples = (const BYTE *)samplesBuffer; - const unsigned first = 0; - const size_t totalSamplesSize = COVER_sum(samplesSizes, first, nbSamples); + const unsigned kFirst = 0; + const size_t totalSamplesSize = COVER_sum(samplesSizes, kFirst, nbSamples); /* Split samples into testing and training sets */ - double tmp = (double)nbSamples * splitPoint; - const unsigned nbTrainSamples = (unsigned)tmp; + const unsigned nbTrainSamples = (unsigned)((double)nbSamples * splitPoint); const unsigned nbTestSamples = nbSamples - nbTrainSamples; - const size_t trainingSamplesSize = COVER_sum(samplesSizes, first, nbTrainSamples); + const size_t trainingSamplesSize = COVER_sum(samplesSizes, kFirst, nbTrainSamples); const size_t testSamplesSize = COVER_sum(samplesSizes, nbTrainSamples, nbSamples); /* Checks */ if (totalSamplesSize < MAX(d, sizeof(U64)) || @@ -560,7 +559,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); return 0; } - /* Check if there's testing sample when splitPoint is nonzero */ + /* Check if there's testing sample when splitPoint is not 1.0 */ if (nbTestSamples < 1 && splitPoint < 1.0) { DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); return 0; diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 28bed2309..5408d2a51 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -84,6 +84,7 @@ static U32 g_ldmMinMatch = 0; static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT; static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT; +#define DEFAULT_SPLITPOINT 0.8 /*-************************************ * Display Macros @@ -277,21 +278,20 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) */ static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params) { - unsigned splitPercentage = 100; memset(params, 0, sizeof(*params)); for (; ;) { if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "split=")) { - splitPercentage = readU32FromChar(&stringPtr); + unsigned splitPercentage = readU32FromChar(&stringPtr); params->splitPoint = (double)splitPercentage / 100.0; if (stringPtr[0]==',') { stringPtr++; continue; } else break; } return 0; } if (stringPtr[0] != 0) return 0; - DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplitPoint=%d\n", params->k, params->d, params->steps, splitPercentage); + DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100)); return 1; } @@ -316,6 +316,7 @@ static ZDICT_cover_params_t defaultCoverParams(void) memset(¶ms, 0, sizeof(params)); params.d = 8; params.steps = 4; + params.splitPoint = DEFAULT_SPLITPOINT; return params; } #endif