From 348e5f77a95922f4bf2232df1bd220ce665cc369 Mon Sep 17 00:00:00 2001
From: Jennifer Liu <jenniferliu620@fb.com>
Date: Fri, 29 Jun 2018 17:54:41 -0700
Subject: [PATCH] Add split=# to cli

---
 lib/dictBuilder/cover.c | 8 ++++----
 programs/zstd.1.md      | 5 ++++-
 programs/zstdcli.c      | 8 +++++++-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index 53f3d79a8..a3195aa77 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -558,15 +558,15 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
   /* Check if there's training sample */
   if (nbTrainSamples < 1) {
     DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
-    DISPLAYLEVEL(1, "splitPoint is %i", (int)(splitPoint*100));
-    DISPLAYLEVEL(1, "nbSamples is %u", nbSamples);
     return 0;
   }
   /* Check if there's testing sample when splitPoint is nonzero */
   if (nbTestSamples < 1 && splitPoint < 1.0) {
     DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
-    DISPLAYLEVEL(1, "splitPoint is %i", (int)(splitPoint*100));
-    DISPLAYLEVEL(1, "nbSamples is %u", nbSamples);
+    return 0;
+  }
+  if (nbTrainSamples + nbTestSamples != nbSamples) {
+    DISPLAYLEVEL(1, "nbTrainSamples plus nbTestSamples don't add up to nbSamples");
     return 0;
   }
   /* Zero the context */
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 4b3818141..c45bdb386 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -223,11 +223,12 @@ Compression of small files similar to the sample set will be greatly improved.
     This compares favorably to 4 bytes default.
     However, it's up to the dictionary manager to not assign twice the same ID to
     2 different dictionaries.
-* `--train-cover[=k#,d=#,steps=#]`:
+* `--train-cover[=k#,d=#,steps=#,split=#]`:
     Select parameters for the default dictionary builder algorithm named cover.
     If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
     If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
     If _steps_ is not specified, then the default value of 40 is used.
+    If _split_ is not specified, then the default value of 80 is used.
     Requires that _d_ <= _k_.
 
     Selects segments of size _k_ with highest score to put in the dictionary.
@@ -249,6 +250,8 @@ Compression of small files similar to the sample set will be greatly improved.
 
     `zstd --train-cover=k=50 FILEs`
 
+    `zstd --train-cover=k=50,split=60 FILEs`
+
 * `--train-legacy[=selectivity=#]`:
     Use legacy dictionary builder algorithm with the given dictionary
     _selectivity_ (default: 9).
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index ae8c9cba9..68404d660 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -278,14 +278,20 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
 static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
 {
     memset(params, 0, sizeof(*params));
+    unsigned splitPercentage = 100;
     for (; ;) {
         if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
         if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "split=")) {
+          splitPercentage = readU32FromChar(&stringPtr);
+          params->splitPoint = (double)splitPercentage / 100.0;
+          if (stringPtr[0]==',') { stringPtr++; continue; } else break;
+        }
         return 0;
     }
     if (stringPtr[0] != 0) return 0;
-    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
+    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplitPoint=%d\n", params->k, params->d, params->steps, splitPercentage);
     return 1;
 }