From 531a4273c0c8536c9fd0d914e4c4d7f9c1030a1c Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Wed, 15 Jun 2016 19:02:11 +0200
Subject: [PATCH 01/20] stronger dictionary compression tests

---
 programs/playTests.sh | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/programs/playTests.sh b/programs/playTests.sh
index 3be4c7775..6dafb630a 100755
--- a/programs/playTests.sh
+++ b/programs/playTests.sh
@@ -139,22 +139,32 @@ $ECHO "\n**** dictionary tests **** "
 ./datagen -g1M | $MD5SUM > tmp1
 ./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | $MD5SUM > tmp2
 diff -q tmp1 tmp2
-$ECHO "Create first dictionary"
+$ECHO "- Create first dictionary"
 $ZSTD --train *.c -o tmpDict
 cp zstdcli.c tmp
 $ZSTD -f tmp -D tmpDict
 $ZSTD -d tmp.zst -D tmpDict -of result
 diff zstdcli.c result
-$ECHO "Create second (different) dictionary"
+$ECHO "- Create second (different) dictionary"
 $ZSTD --train *.c *.h -o tmpDictC
 $ZSTD -d tmp.zst -D tmpDictC -of result && die "wrong dictionary not detected!"
-$ECHO "Create dictionary with short dictID"
+$ECHO "- Create dictionary with short dictID"
 $ZSTD --train *.c --dictID 1 -o tmpDict1
 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
-$ECHO "Compress without dictID"
+$ECHO "- Compress without dictID"
 $ZSTD -f tmp -D tmpDict1 --no-dictID
 $ZSTD -d tmp.zst -D tmpDict -of result
 diff zstdcli.c result
+$ECHO "- Compress multiple files with dictionary"
+cat *.c *.h | $MD5SUM > tmp1
+rm -rf dirTestDict
+mkdir dirTestDict
+cp *.c dirTestDict
+cp *.h dirTestDict
+$ZSTD -f dirTestDict/* -D tmpDictC
+$ZSTD -d dirTestDict/*.zst -D tmpDictC -c | $MD5SUM > tmp2
+diff -q tmp1 tmp2
+rm -rf dirTestDict
 rm tmp*
 
 

From 3915545605c679b67b6707726b66e06c29f530af Mon Sep 17 00:00:00 2001
From: Tobias Ibounig <tobijdc@users.noreply.github.com>
Date: Wed, 15 Jun 2016 22:20:46 +0200
Subject: [PATCH 02/20] Fix Max Compression Level in zstd.1

---
 programs/zstd.1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/zstd.1 b/programs/zstd.1
index cc62eb30f..d7760f78f 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -43,7 +43,7 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
 .SH OPTIONS
 .TP
 .B \-#
- # compression level [1-21] (default:1)
+ # compression level [1-22] (default:1)
 .TP
 .BR \-d ", " --decompress
  decompression

From 9b998e4d0846ac702153caaa36bfc2fb4654a038 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Wed, 15 Jun 2016 23:11:20 +0200
Subject: [PATCH 03/20] Fixed decompression of literals in dictionary mode

---
 lib/decompress/huf_decompress.c |  2 +-
 programs/playTests.sh           | 46 ++++++++++++++++-----------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 5a998ee2d..1580b3750 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -625,7 +625,7 @@ size_t HUF_decompress1X4_usingDTable(
     const HUF_DTable* DTable)
 {
     DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 0) return ERROR(GENERIC);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
     return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 }
 
diff --git a/programs/playTests.sh b/programs/playTests.sh
index 6dafb630a..60d413c99 100755
--- a/programs/playTests.sh
+++ b/programs/playTests.sh
@@ -133,6 +133,29 @@ diff tmpSparse2M tmpSparseRegenerated
 rm tmpSparse*
 
 
+$ECHO "\n**** multiple files tests **** "
+
+./datagen -s1        > tmp1 2> /dev/null
+./datagen -s2 -g100K > tmp2 2> /dev/null
+./datagen -s3 -g1M   > tmp3 2> /dev/null
+$ZSTD -f tmp*
+$ECHO "compress tmp* : "
+ls -ls tmp*
+rm tmp1 tmp2 tmp3
+$ECHO "decompress tmp* : "
+$ZSTD -df *.zst
+ls -ls tmp*
+$ECHO "compress tmp* into stdout > tmpall : "
+$ZSTD -c tmp1 tmp2 tmp3 > tmpall
+ls -ls tmp*
+$ECHO "decompress tmpall* into stdout > tmpdec : "
+cp tmpall tmpall2
+$ZSTD -dc tmpall* > tmpdec
+ls -ls tmp*
+$ECHO "compress multiple files including a missing one (notHere) : "
+$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
+
+
 $ECHO "\n**** dictionary tests **** "
 
 ./datagen > tmpDict
@@ -168,29 +191,6 @@ rm -rf dirTestDict
 rm tmp*
 
 
-$ECHO "\n**** multiple files tests **** "
-
-./datagen -s1        > tmp1 2> /dev/null
-./datagen -s2 -g100K > tmp2 2> /dev/null
-./datagen -s3 -g1M   > tmp3 2> /dev/null
-$ZSTD -f tmp*
-$ECHO "compress tmp* : "
-ls -ls tmp*
-rm tmp1 tmp2 tmp3
-$ECHO "decompress tmp* : "
-$ZSTD -df *.zst
-ls -ls tmp*
-$ECHO "compress tmp* into stdout > tmpall : "
-$ZSTD -c tmp1 tmp2 tmp3 > tmpall
-ls -ls tmp*
-$ECHO "decompress tmpall* into stdout > tmpdec : "
-cp tmpall tmpall2
-$ZSTD -dc tmpall* > tmpdec
-ls -ls tmp*
-$ECHO "compress multiple files including a missing one (notHere) : "
-$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
-
-
 $ECHO "\n**** integrity tests **** "
 
 $ECHO "test one file (tmp1.zst) "

From 1a7b8fbc24e855d33474b783cd453568dc2dc686 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Wed, 15 Jun 2016 23:33:38 +0200
Subject: [PATCH 04/20] fixed dictionary tests

---
 programs/playTests.sh | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/programs/playTests.sh b/programs/playTests.sh
index 60d413c99..a7edbeb94 100755
--- a/programs/playTests.sh
+++ b/programs/playTests.sh
@@ -179,14 +179,14 @@ $ZSTD -f tmp -D tmpDict1 --no-dictID
 $ZSTD -d tmp.zst -D tmpDict -of result
 diff zstdcli.c result
 $ECHO "- Compress multiple files with dictionary"
-cat *.c *.h | $MD5SUM > tmp1
 rm -rf dirTestDict
 mkdir dirTestDict
 cp *.c dirTestDict
 cp *.h dirTestDict
+cat dirTestDict/* | $MD5SUM > tmph1  # note : we expect same file order to generate same hash
 $ZSTD -f dirTestDict/* -D tmpDictC
-$ZSTD -d dirTestDict/*.zst -D tmpDictC -c | $MD5SUM > tmp2
-diff -q tmp1 tmp2
+$ZSTD -d dirTestDict/*.zst -D tmpDictC -c | $MD5SUM > tmph2
+diff -q tmph1 tmph2
 rm -rf dirTestDict
 rm tmp*
 
@@ -194,6 +194,8 @@ rm tmp*
 $ECHO "\n**** integrity tests **** "
 
 $ECHO "test one file (tmp1.zst) "
+./datagen > tmp1
+$ZSTD tmp1
 $ZSTD -t tmp1.zst
 $ZSTD --test tmp1.zst
 $ECHO "test multiple files (*.zst) "

From efd0b4993a44ed09e7b7cc3629a2def8cf6eaf8e Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 00:53:56 +0200
Subject: [PATCH 05/20] fixed fuzzer error (inter-block repeated offsets)

---
 lib/compress/zstd_compress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index b8f7b32a5..220fadc65 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1847,7 +1847,7 @@ _storeSequence:
     /* Save reps for next block */
     {   int i;
         for (i=0; i<ZSTD_REP_NUM; i++) {
-            if (!rep[i]) rep[i] = (U32)(iend-base);   /* in case some zero are left */
+            if (!rep[i]) rep[i] = (U32)(iend - ctx->base);   /* in case some zero are left */
             ctx->savedRep[i] = rep[i];
     }   }
 

From 52a0622beb1812dcc39da7c085e4793569fa07e6 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Wed, 15 Jun 2016 13:53:34 +0200
Subject: [PATCH 06/20] RepsCodes are saved into Dict (uncomplete : need
 decompression to regenerate them)

---
 lib/common/zstd_internal.h   |   2 +-
 lib/compress/fse_compress.c  |   2 +-
 lib/compress/zstd_compress.c |  28 ++++----
 lib/dictBuilder/zdict.c      | 126 +++++++++++++++++++++++++----------
 lib/dictBuilder/zdict.h      |   2 +-
 5 files changed, 111 insertions(+), 49 deletions(-)

diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 17ae1a77a..0909955a9 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -64,7 +64,7 @@
 #endif
 
 #define ZSTD_OPT_NUM    (1<<12)
-#define ZSTD_DICT_MAGIC  0xEC30A437
+#define ZSTD_DICT_MAGIC  0xEC30A437   /* v0.7 */
 
 #define ZSTD_REP_NUM    3
 #define ZSTD_REP_INIT   ZSTD_REP_NUM
diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c
index 5c804dcaf..192d55026 100644
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@@ -256,7 +256,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
             bitStream += count << bitCount;
             bitCount  += nbBits;
             bitCount  -= (count<max);
-            previous0 = (count==1);
+            previous0  = (count==1);
             while (remaining<threshold) nbBits--, threshold>>=1;
         }
         if (bitCount>16) {
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 220fadc65..b1edaff3b 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2342,45 +2342,49 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
 static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
 {
     /* note : magic number already checked */
-    size_t const dictSizeStart = dictSize;
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
 
     {   size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
         if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
-        dict = (const char*)dict + hufHeaderSize;
-        dictSize -= hufHeaderSize;
+        dictPtr += hufHeaderSize;
     }
 
     {   short offcodeNCount[MaxOff+1];
         unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
-        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
-        dict = (const char*)dict + offcodeHeaderSize;
-        dictSize -= offcodeHeaderSize;
+        dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
         unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
-        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
-        dict = (const char*)dict + matchlengthHeaderSize;
-        dictSize -= matchlengthHeaderSize;
+        dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
         unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
-        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
-        dictSize -= litlengthHeaderSize;
+        dictPtr += litlengthHeaderSize;
     }
 
+    if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
+    zc->rep[0] = MEM_readLE32(dictPtr+0); if (zc->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+    zc->rep[1] = MEM_readLE32(dictPtr+4); if (zc->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+    zc->rep[2] = MEM_readLE32(dictPtr+8); if (zc->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+    dictPtr += 12;
+
     zc->flagStaticTables = 1;
-    return (dictSizeStart-dictSize);
+    return dictPtr - (const BYTE*)dict;
 }
 
 /** ZSTD_compress_insertDictionary() :
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 2e15cbbf8..ace0fc154 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -578,9 +578,10 @@ typedef struct
     void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
 } EStats_ress_t;
 
+#define MAXREPOFFSET 1024
 
 static void ZDICT_countEStats(EStats_ress_t esr,
-                            U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
+                            U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
                             const void* src, size_t srcSize)
 {
     const seqStore_t* seqStorePtr;
@@ -614,6 +615,17 @@ static void ZDICT_countEStats(EStats_ress_t esr,
             size_t u;
             for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
     }   }
+
+    /* rep offsets */
+    {   const U32* const offsetPtr = seqStorePtr->offsetStart;
+        U32 offset1 = offsetPtr[0] - 3;
+        U32 offset2 = offsetPtr[1] - 3;
+        if (offset1 >= MAXREPOFFSET) offset1 = 0;
+        if (offset2 >= MAXREPOFFSET) offset2 = 0;
+        repOffsets[offset1] += 3;
+        repOffsets[offset2] += 1;
+    }
+
 }
 
 /*
@@ -629,12 +641,29 @@ static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
 
 static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
 {
-    size_t total;
+    size_t total=0;
     unsigned u;
-    for (u=0, total=0; u<nbFiles; u++) total += fileSizes[u];
+    for (u=0; u<nbFiles; u++) total += fileSizes[u];
     return total;
 }
 
+typedef struct { U32 offset; U32 count; } offsetCount_t;
+
+static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val, U32 count)
+{
+    U32 u;
+    table[ZSTD_REP_NUM].offset = val;
+    table[ZSTD_REP_NUM].count = count;
+    for (u=ZSTD_REP_NUM; u>0; u--) {
+        offsetCount_t tmp;
+        if (table[u-1].count >= table[u].count) break;
+        tmp = table[u-1];
+        table[u-1] = table[u];
+        table[u] = tmp;
+    }
+}
+
+
 #define OFFCODE_MAX 18  /* only applicable to first block */
 static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
                                  unsigned compressionLevel,
@@ -649,6 +678,8 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     short matchLengthNCount[MaxML+1];
     U32 litLengthCount[MaxLL+1];
     short litLengthNCount[MaxLL+1];
+    U32 repOffset[MAXREPOFFSET] = { 0 };
+    offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
     EStats_ress_t esr;
     ZSTD_parameters params;
     U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
@@ -656,12 +687,15 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     size_t eSize = 0;
     size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
     size_t const averageSampleSize = totalSrcSize / nbFiles;
+    BYTE* dstPtr = (BYTE*)dstBuffer;
 
     /* init */
     for (u=0; u<256; u++) countLit[u]=1;   /* any character must be described */
     for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
     for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
     for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
+    repOffset[1] = repOffset[4] = repOffset[8] = 1;
+    memset(bestRepOffset, 0, sizeof(bestRepOffset));
     esr.ref = ZSTD_createCCtx();
     esr.zc = ZSTD_createCCtx();
     esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
@@ -679,7 +713,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     /* collect stats on all files */
     for (u=0; u<nbFiles; u++) {
         ZDICT_countEStats(esr,
-                        countLit, offcodeCount, matchLengthCount, litLengthCount,
+                        countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
            (const char*)srcBuffer + pos, fileSizes[u]);
         pos += fileSizes[u];
     }
@@ -720,46 +754,70 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     }
     llLog = (U32)errorCode;
 
+    {   U32 offset;
+        for (offset=1; offset<MAXREPOFFSET; offset++)
+            ZDICT_insertSortCount(bestRepOffset, offset, repOffset[offset]);
+    }
+
+
     /* write result to buffer */
-    errorCode = HUF_writeCTable(dstBuffer, maxDstSize, hufTable, 255, huffLog);
-    if (HUF_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "HUF_writeCTable error");
-        goto _cleanup;
+    {   size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
+        if (HUF_isError(hhSize)) {
+            eSize = ERROR(GENERIC);
+            DISPLAYLEVEL(1, "HUF_writeCTable error");
+            goto _cleanup;
+        }
+        dstPtr += hhSize;
+        maxDstSize -= hhSize;
+        eSize += hhSize;
     }
-    dstBuffer = (char*)dstBuffer + errorCode;
-    maxDstSize -= errorCode;
-    eSize += errorCode;
 
-    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
-    if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
-        goto _cleanup;
+    {   size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
+        if (FSE_isError(ohSize)) {
+            eSize = ERROR(GENERIC);
+            DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
+            goto _cleanup;
+        }
+        dstPtr += ohSize;
+        maxDstSize -= ohSize;
+        eSize += ohSize;
     }
-    dstBuffer = (char*)dstBuffer + errorCode;
-    maxDstSize -= errorCode;
-    eSize += errorCode;
 
-    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, matchLengthNCount, MaxML, mlLog);
-    if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
-        goto _cleanup;
+    {   size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
+        if (FSE_isError(mhSize)) {
+            eSize = ERROR(GENERIC);
+            DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
+            goto _cleanup;
+        }
+        dstPtr += mhSize;
+        maxDstSize -= mhSize;
+        eSize += mhSize;
     }
-    dstBuffer = (char*)dstBuffer + errorCode;
-    maxDstSize -= errorCode;
-    eSize += errorCode;
 
-    errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litLengthNCount, MaxLL, llLog);
-    if (FSE_isError(errorCode)) {
+    {   size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
+        if (FSE_isError(lhSize)) {
+            eSize = ERROR(GENERIC);
+            DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
+            goto _cleanup;
+        }
+        dstPtr += lhSize;
+        maxDstSize -= lhSize;
+        eSize += lhSize;
+    }
+
+    if (maxDstSize<12) {
         eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
+        DISPLAYLEVEL(1, "not enough space to write RepOffsets");
         goto _cleanup;
     }
-    dstBuffer = (char*)dstBuffer + errorCode;
-    maxDstSize -= errorCode;
-    eSize += errorCode;
+    MEM_writeLE32(dstPtr+0, bestRepOffset[0].offset);
+    MEM_writeLE32(dstPtr+4, bestRepOffset[1].offset);
+    MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
+    //MEM_writeLE32(dstPtr+0, 1);
+    //MEM_writeLE32(dstPtr+4, 4);
+    //MEM_writeLE32(dstPtr+8, 8);
+    dstPtr += 12;
+    eSize += 12;
 
 _cleanup:
     ZSTD_freeCCtx(esr.ref);
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h
index 534d29de8..39acdf852 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -79,7 +79,7 @@ const char* ZDICT_getErrorName(size_t errorCode);
 
 /* ====================================================================================
  * The definitions in this section are considered experimental.
- * They should never be used in association with a dynamic library, as they may change in the future.
+ * They should never be used with a dynamic library, as they may change in the future.
  * They are provided for advanced usages.
  * Use them only in association with static linking.
  * ==================================================================================== */

From 8e36a9c16999a2fee3cad4c082035ed497a55035 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Wed, 15 Jun 2016 14:05:07 +0200
Subject: [PATCH 07/20] decoder restores repOffsets from dictionary

---
 lib/decompress/zstd_decompress.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 82d54fb5c..6d0903830 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1186,47 +1186,50 @@ static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSi
     dctx->previousDstEnd = (const char*)dict + dictSize;
 }
 
-static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const dictSizeStart)
+static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const dictSize)
 {
-    size_t dictSize = dictSizeStart;
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dict + dictSize;
 
     {   size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize);
         if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
-        dict = (const char*)dict + hSize;
-        dictSize -= hSize;
+        dictPtr += hSize;
     }
 
     {   short offcodeNCount[MaxOff+1];
         U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog;
-        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSE_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
-        dict = (const char*)dict + offcodeHeaderSize;
-        dictSize -= offcodeHeaderSize;
+        dictPtr += offcodeHeaderSize;
     }
 
     {   short matchlengthNCount[MaxML+1];
         unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
-        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
-        dict = (const char*)dict + matchlengthHeaderSize;
-        dictSize -= matchlengthHeaderSize;
+        dictPtr += matchlengthHeaderSize;
     }
 
     {   short litlengthNCount[MaxLL+1];
         unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
-        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
         { size_t const errorCode = FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
-        dictSize -= litlengthHeaderSize;
+        dictPtr += litlengthHeaderSize;
     }
 
+    dctx->rep[0] = MEM_readLE32(dictPtr+0);
+    dctx->rep[1] = MEM_readLE32(dictPtr+4);
+    dctx->rep[2] = MEM_readLE32(dictPtr+8);
+    dictPtr += 12;
+
     dctx->litEntropy = dctx->fseEntropy = 1;
-    return dictSizeStart - dictSize;
+    return dictPtr - (const BYTE*)dict;
 }
 
 static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)

From 736d419289591260f51a515b9e46384d8cc99322 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Wed, 15 Jun 2016 18:48:51 +0200
Subject: [PATCH 08/20] strengthened dict loading on decompresson side

---
 lib/compress/zstd_compress.c     | 26 +++++++++++++++-----------
 lib/decompress/zstd_decompress.c |  9 +++++----
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index b1edaff3b..1ae321838 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2335,17 +2335,21 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
 /* Dictionary format :
      Magic == ZSTD_DICT_MAGIC (4 bytes)
      HUF_writeCTable(256)
+     FSE_writeNCount(ml)
+     FSE_writeNCount(off)
+     FSE_writeNCount(ll)
+     RepOffsets
      Dictionary content
 */
 /*! ZSTD_loadDictEntropyStats() :
-    @return : size read from dictionary */
-static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
+    @return : size read from dictionary
+    note : magic number supposed already checked */
+static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
 {
-    /* note : magic number already checked */
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dictPtr + dictSize;
 
-    {   size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
+    {   size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
         if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
         dictPtr += hufHeaderSize;
     }
@@ -2354,7 +2358,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t
         unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
-        { size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
+        { size_t const errorCode = FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dictPtr += offcodeHeaderSize;
     }
@@ -2363,7 +2367,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t
         unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        { size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
+        { size_t const errorCode = FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dictPtr += matchlengthHeaderSize;
     }
@@ -2372,18 +2376,18 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t
         unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        { size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
+        { size_t const errorCode = FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
         dictPtr += litlengthHeaderSize;
     }
 
     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
-    zc->rep[0] = MEM_readLE32(dictPtr+0); if (zc->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
-    zc->rep[1] = MEM_readLE32(dictPtr+4); if (zc->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
-    zc->rep[2] = MEM_readLE32(dictPtr+8); if (zc->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+    cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+    cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+    cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
     dictPtr += 12;
 
-    zc->flagStaticTables = 1;
+    cctx->flagStaticTables = 1;
     return dictPtr - (const BYTE*)dict;
 }
 
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 6d0903830..9bc888961 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1186,7 +1186,7 @@ static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSi
     dctx->previousDstEnd = (const char*)dict + dictSize;
 }
 
-static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const dictSize)
+static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize)
 {
     const BYTE* dictPtr = (const BYTE*)dict;
     const BYTE* const dictEnd = dict + dictSize;
@@ -1223,9 +1223,10 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t const d
         dictPtr += litlengthHeaderSize;
     }
 
-    dctx->rep[0] = MEM_readLE32(dictPtr+0);
-    dctx->rep[1] = MEM_readLE32(dictPtr+4);
-    dctx->rep[2] = MEM_readLE32(dictPtr+8);
+    if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
+    dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+    dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
     dictPtr += 12;
 
     dctx->litEntropy = dctx->fseEntropy = 1;

From ad39b7a7189c43a19af63aac11396ac38c1825ae Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 01:14:41 +0200
Subject: [PATCH 09/20] zdict stores standard rep-offset. It can use custom
 ones, but the proper formula and impact on statistics is not done yet.

---
 lib/dictBuilder/zdict.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index ace0fc154..0814581fe 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -727,6 +727,13 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     }
     huffLog = (U32)errorCode;
 
+    /* looking for most common first offsets */
+    {   U32 offset;
+        for (offset=1; offset<MAXREPOFFSET; offset++)
+            ZDICT_insertSortCount(bestRepOffset, offset, repOffset[offset]);
+    }
+    /* note : the result of this phase should be used to better appreciate the impact on statistics */
+
     total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u];
     errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX);
     if (FSE_isError(errorCode)) {
@@ -754,11 +761,6 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     }
     llLog = (U32)errorCode;
 
-    {   U32 offset;
-        for (offset=1; offset<MAXREPOFFSET; offset++)
-            ZDICT_insertSortCount(bestRepOffset, offset, repOffset[offset]);
-    }
-
 
     /* write result to buffer */
     {   size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
@@ -810,12 +812,17 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
         DISPLAYLEVEL(1, "not enough space to write RepOffsets");
         goto _cleanup;
     }
+# if 0
     MEM_writeLE32(dstPtr+0, bestRepOffset[0].offset);
     MEM_writeLE32(dstPtr+4, bestRepOffset[1].offset);
     MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
-    //MEM_writeLE32(dstPtr+0, 1);
-    //MEM_writeLE32(dstPtr+4, 4);
-    //MEM_writeLE32(dstPtr+8, 8);
+#else
+    /* at this stage, we don't use the result of "most common first offset",
+       as the impact of statistics is not properly evaluated */
+    MEM_writeLE32(dstPtr+0, repStartValue[0]);
+    MEM_writeLE32(dstPtr+4, repStartValue[1]);
+    MEM_writeLE32(dstPtr+8, repStartValue[2]);
+#endif
     dstPtr += 12;
     eSize += 12;
 

From 80d033fb43a82da9bffb308eb2240a5cfa8d9df3 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 01:41:50 +0200
Subject: [PATCH 10/20] fixed ptr arithmetic warning

---
 lib/decompress/zstd_decompress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 9bc888961..1763499d1 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1189,7 +1189,7 @@ static void ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSi
 static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize)
 {
     const BYTE* dictPtr = (const BYTE*)dict;
-    const BYTE* const dictEnd = dict + dictSize;
+    const BYTE* const dictEnd = dictPtr + dictSize;
 
     {   size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize);
         if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);

From 803c05ec7e9890d4d4d4dc87e734fc9279825b12 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 11:32:57 +0200
Subject: [PATCH 11/20] fuzzer : tests with high id are run without need to
 change finalTestNb

---
 programs/fuzzer.c           | 2 ++
 tests/test-zstd-versions.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index 480fd3072..42d3640da 100644
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -851,6 +851,8 @@ int main(int argc, const char** argv)
     DISPLAY("Seed = %u\n", seed);
     if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %u%%\n", proba);
 
+    if (nbTests < testNb) nbTests = testNb;
+
     if (testNb==0)
         result = basicUnitTests(0, ((double)proba) / 100);  /* constant seed for predictability */
     if (!result)
diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py
index 437cd4c01..34b584087 100755
--- a/tests/test-zstd-versions.py
+++ b/tests/test-zstd-versions.py
@@ -130,7 +130,7 @@ if __name__ == '__main__':
     # Build all release zstd
     for tag in tags:
         os.chdir(base_dir)
-        dst_zstd = '{}/zstd.{}'  .format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/zstd.<TAG>
+        dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/zstd.<TAG>
         if not os.path.isfile(dst_zstd) or tag == head:
             if tag != head:
                 r_dir = '{}/{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/<TAG>

From 23ba41533a9d6592f5b71f8dd3b7655f61f35f9d Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 13:20:46 +0200
Subject: [PATCH 12/20] Fixed zstd_opt encoding error with repeat-offsets

---
 lib/compress/zstd_opt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index 703b568e2..8b15bf6ad 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -574,7 +574,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
            best_mlen = minMatch;
            {   U32 i;
                for (i=0; i<ZSTD_REP_NUM; i++) {
-                   if ((rep[i]<(U32)(inr-prefixStart))
+                   if ((opt[cur].rep[i]<(U32)(inr-prefixStart))
                        && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch))) {  /* check rep */
                        mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch;
                        ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off);

From 3f01c8833f237a17cb44e1f65fa63bd67fabd0d8 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 13:38:10 +0200
Subject: [PATCH 13/20] better seed randomization for systems with poor clock()
 resolutation

---
 programs/fuzzer.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index 42d3640da..d1dfe51e8 100644
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -847,7 +847,12 @@ int main(int argc, const char** argv)
     /* Get Seed */
     DISPLAY("Starting zstd tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING);
 
-    if (!seedset) seed = (U32)(clock() % 10000);
+    if (!seedset) {
+        time_t const t = time(NULL);
+        U32 const h = XXH32(&t, sizeof(t), 1);
+        seed = h % 10000;
+    }
+
     DISPLAY("Seed = %u\n", seed);
     if (proba!=FUZ_compressibility_default) DISPLAY("Compressibility : %u%%\n", proba);
 

From ec2031e2a741e3ecb5b6ffd7a035663ea2ceeb17 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 14:08:48 +0200
Subject: [PATCH 14/20] update readme for 0.7

---
 README.md | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 5173c9f9b..7b58e5e72 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
- **Zstd**, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios at zlib-level compression ratio.
+ **Zstd**, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios at zlib-level and better compression ratios.
 
 It is provided as a BSD-license package, hosted on Github.
 
@@ -7,7 +7,7 @@ It is provided as a BSD-license package, hosted on Github.
 |master      | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=master)](https://travis-ci.org/Cyan4973/zstd) |
 |dev         | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=dev)](https://travis-ci.org/Cyan4973/zstd) |
 
-As a reference, several fast compression algorithms were tested and compared to [zlib] on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, on the [Silesia compression corpus].
+As a reference, several fast compression algorithms were tested and compared on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, with the [Silesia compression corpus].
 
 [lzbench]: https://github.com/inikep/lzbench
 [Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
@@ -16,7 +16,7 @@ As a reference, several fast compression algorithms were tested and compared to
 |Name             | Ratio | C.speed | D.speed |
 |-----------------|-------|--------:|--------:|
 |                 |       |   MB/s  |  MB/s   |
-|**zstd 0.6.0 -1**|**2.877**|**330**| **915** |
+|**zstd 0.7.0 -1**|**2.877**|**325**| **930** |
 | [zlib] 1.2.8 -1 | 2.730 |    95   |   360   |
 | brotli -0       | 2.708 |   220   |   430   |
 | QuickLZ 1.5     | 2.237 |   510   |   605   |
@@ -28,16 +28,16 @@ As a reference, several fast compression algorithms were tested and compared to
 [zlib]:http://www.zlib.net/
 [LZ4]: http://www.lz4.org/
 
-Zstd can also offer stronger compression ratio at the cost of compression speed. 
-Speed vs Compression trade-off is configurable by small increment. Decompression speed is preserved and remain roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib].
+Zstd can also offer stronger compression ratios at the cost of compression speed.
+Speed vs Compression trade-off is configurable by small increment. Decompression speed is preserved and remain roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib] or lzma.
 
-The following test is run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, on the [Silesia compression corpus].
+The following tests were run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, on the [Silesia compression corpus].
 
 Compression Speed vs Ratio | Decompression Speed
 ---------------------------|--------------------
 ![Compression Speed vs Ratio](images/Cspeed4.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed4.png "Decompression Speed")
 
-Several algorithms can produce higher compression ratio at slower speed, falling outside of the graph.
+Several algorithms can produce higher compression ratio but at slower speed, falling outside of the graph.
 For a larger picture including very slow modes, [click on this link](images/DCspeed5.png) .
 
 
@@ -74,8 +74,10 @@ Hence, deploying one dictionary per type of data will provide the greater benefi
 
 ### Status
 
-Zstd is in development. The internal format evolves to reach better performance. "Final Format" is projected H1 2016, and will be tagged `v1.0`. Zstd offers legacy support, meaning any data compressed by any version >= 0.1 (therefore including current one) remain decodable in the future.
-The library is also quite robust, able to withstand hazards situations, including invalid inputs. Library reliability has been tested using [Fuzz Testing](https://en.wikipedia.org/wiki/Fuzz_testing), with both [internal tools](programs/fuzzer.c) and [external ones](http://lcamtuf.coredump.cx/afl). Therefore, Zstandard is considered safe for production environments.
+Zstd compression format has reached "Final status". It means it is planned to become the official stable zstd format and be tagged `v1.0`. The reason it's not yet tagged `v1.0` is that it currently performs its "validation period", making sure the format holds all its promises and nothing was missed.
+Zstd library also offers legacy decoder support. Any data compressed by any version >= `v0.1` (hence including current one) remains decodable now and in the future.
+The library has been validated using strong [fuzzer tests](https://en.wikipedia.org/wiki/Fuzz_testing), including both [internal tools](programs/fuzzer.c) and [external ones](http://lcamtuf.coredump.cx/afl). It's able to withstand hazard situations, including invalid inputs.
+As a consequence, Zstandard is considered safe for, and is currently used in, production environments.
 
 ### Branch Policy
 

From 4948f270b36e81f753e4f8d9c8dd4388103e8b76 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 15:38:51 +0200
Subject: [PATCH 15/20] make room for reserved "information bit" in frame
 header

---
 NEWS                             | 5 +++--
 lib/decompress/zstd_decompress.c | 8 +++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index c7aeae98d..dc2d66bc1 100644
--- a/NEWS
+++ b/NEWS
@@ -5,8 +5,9 @@ New : Visual build scripts, by Christophe Chevalier
 New : Support for Sparse File-systems (do not use space for zero-filled sectors)
 New : Frame checksum support
 New : Support pass-through mode (when using `-df`)
-New : API : dictionary files from custom content, by Giuseppe Ottaviano
-New : API support for custom malloc/free functions
+API : more efficient Dictionary API : `ZSTD_compress_usingCDict()`, `ZSTD_decompress_usingDDict()`
+API : create dictionary files from custom content, by Giuseppe Ottaviano
+API : support for custom malloc/free functions
 New : controllable Dictionary ID
 New : Support for skippable frames
 
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 1763499d1..b22021ed7 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -223,8 +223,10 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
     // new
    1 byte - FrameHeaderDescription :
    bit 0-1 : dictID (0, 1, 2 or 4 bytes)
-   bit 2-4 : reserved (must be zero)
-   bit 5   : SkippedWindowLog (if 1, WindowLog byte is not present)
+   bit 2   : checksumFlag
+   bit 3   : reserved (must be zero)
+   bit 4   : reserved (unused, can be any value)
+   bit 5   : Single Segment (if 1, WindowLog byte is not present)
    bit 6-7 : FrameContentFieldSize (0, 2, 4, or 8)
              if (SkippedWindowLog && !FrameContentFieldsize) FrameContentFieldsize=1;
 
@@ -365,7 +367,7 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
         U32 windowSize = 0;
         U32 dictID = 0;
         U64 frameContentSize = 0;
-        if ((fhdByte & 0x18) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits */
+        if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported);   /* reserved bits, which must be zero */
         if (!directMode) {
             BYTE const wlByte = ip[pos++];
             U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;

From 61cc4f207eda14582acccdf5d97ed879d7b30e25 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 15:44:30 +0200
Subject: [PATCH 16/20] Added build/README from @KrzysFR (#201)

---
 build/README.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 build/README.md

diff --git a/build/README.md b/build/README.md
new file mode 100644
index 000000000..e1658493d
--- /dev/null
+++ b/build/README.md
@@ -0,0 +1,51 @@
+Here are a few command lines for reference :
+
+### Build with Visual Studio 2013 for msvcr120.dll
+
+Running the following command will build both the `Release Win32` and `Release x64` versions:
+```batch
+build\build.VS2013.cmd
+```
+The result of each build will be in the corresponding `build\bin\Release\{ARCH}\` folder.
+
+If you want to only need one architecture:
+- Win32: `build\build.generic.cmd VS2013 Win32 Release v120`
+- x64: `build\build.generic.cmd VS2013 x64 Release v120`
+
+If you want a Debug build:
+- Win32: `build\build.generic.cmd VS2013 Win32 Debug v120`
+- x64: `build\build.generic.cmd VS2013 x64 Debug v120`
+
+### Build with Visual Studio 2015 for msvcr140.dll
+
+Running the following command will build both the `Release Win32` and `Release x64` versions:
+```batch
+build\build.VS2015.cmd
+```
+The result of each build will be in the corresponding `build\bin\Release\{ARCH}\` folder.
+
+If you want to only need one architecture:
+- Win32: `build\build.generic.cmd VS2015 Win32 Release v140`
+- x64: `build\build.generic.cmd VS2015 x64 Release v140`
+
+If you want a Debug build:
+- Win32: `build\build.generic.cmd VS2015 Win32 Debug v140`
+- x64: `build\build.generic.cmd VS2015 x64 Debug v140`
+
+### Build with Visual Studio 2015 for msvcr120.dll
+
+You need to invoke `build\build.generic.cmd` with the proper arguments:
+
+**For Win32**
+```batch
+build\build.generic.cmd VS2015 Win32 Release v120
+```
+The result of the build will be in the `build\bin\Release\Win32\` folder.
+
+**For x64**
+```batch
+build\build.generic.cmd VS2015 x64 Release v120
+```
+The result of the build will be in the `build\bin\Release\x64\` folder.
+
+If you want Debug builds, replace `Release` with `Debug`.

From 201d82f5d06278deb43b3fcdefe518866dc5a7e0 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 15:53:02 +0200
Subject: [PATCH 17/20] `.cmd` files use windows-style eol

---
 .gitattributes | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitattributes b/.gitattributes
index da0f7a530..387080198 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -18,3 +18,4 @@
 
 # Windows
 *.bat text eol=crlf
+*.cmd text eol=crlf

From 12d881e810ead2577b795942d9c37f031db27fa4 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 16:08:40 +0200
Subject: [PATCH 18/20] Move `build` into `projects/build`

---
 {build => projects/build}/README.md         | 0
 {build => projects/build}/build.VS2010.cmd  | 0
 {build => projects/build}/build.VS2012.cmd  | 0
 {build => projects/build}/build.VS2013.cmd  | 0
 {build => projects/build}/build.VS2015.cmd  | 0
 {build => projects/build}/build.generic.cmd | 3 +--
 6 files changed, 1 insertion(+), 2 deletions(-)
 rename {build => projects/build}/README.md (100%)
 rename {build => projects/build}/build.VS2010.cmd (100%)
 rename {build => projects/build}/build.VS2012.cmd (100%)
 rename {build => projects/build}/build.VS2013.cmd (100%)
 rename {build => projects/build}/build.VS2015.cmd (100%)
 rename {build => projects/build}/build.generic.cmd (97%)

diff --git a/build/README.md b/projects/build/README.md
similarity index 100%
rename from build/README.md
rename to projects/build/README.md
diff --git a/build/build.VS2010.cmd b/projects/build/build.VS2010.cmd
similarity index 100%
rename from build/build.VS2010.cmd
rename to projects/build/build.VS2010.cmd
diff --git a/build/build.VS2012.cmd b/projects/build/build.VS2012.cmd
similarity index 100%
rename from build/build.VS2012.cmd
rename to projects/build/build.VS2012.cmd
diff --git a/build/build.VS2013.cmd b/projects/build/build.VS2013.cmd
similarity index 100%
rename from build/build.VS2013.cmd
rename to projects/build/build.VS2013.cmd
diff --git a/build/build.VS2015.cmd b/projects/build/build.VS2015.cmd
similarity index 100%
rename from build/build.VS2015.cmd
rename to projects/build/build.VS2015.cmd
diff --git a/build/build.generic.cmd b/projects/build/build.generic.cmd
similarity index 97%
rename from build/build.generic.cmd
rename to projects/build/build.generic.cmd
index ed46c922f..362952340 100644
--- a/build/build.generic.cmd
+++ b/projects/build/build.generic.cmd
@@ -33,7 +33,7 @@ IF %msbuild_version% == VS2013 SET msbuild="%programfiles(x86)%\MSBuild\12.0\Bin
 IF %msbuild_version% == VS2015 SET msbuild="%programfiles(x86)%\MSBuild\14.0\Bin\MSBuild.exe"
 rem TODO: Visual Studio "15" (vNext) will use MSBuild 15.0 ?
 
-SET project="%~p0\..\projects\VS2010\zstd.sln"
+SET project="%~p0\..\VS2010\zstd.sln"
 
 SET msbuild_params=/verbosity:minimal /nologo /t:Clean,Build /p:Platform=%msbuild_platform% /p:Configuration=%msbuild_configuration%
 IF NOT "%msbuild_toolset%" == "" SET msbuild_params=%msbuild_params% /p:PlatformToolset=%msbuild_toolset%
@@ -50,4 +50,3 @@ IF ERRORLEVEL 1 EXIT /B 1
 echo # Success
 echo # OutDir: %output%
 echo #
-

From 510cff3570862eba04a09dd72c23298346902177 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Thu, 16 Jun 2016 16:39:55 +0200
Subject: [PATCH 19/20] minor comment change

---
 lib/common/fse.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/common/fse.h b/lib/common/fse.h
index 6be3e5aa0..e711d0135 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -132,8 +132,8 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src,
 /*! FSE_optimalTableLog():
     dynamically downsize 'tableLog' when conditions are met.
     It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
-    @return : recommended tableLog (necessarily <= initial 'tableLog') */
-unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
+    @return : recommended tableLog (necessarily <= 'maxTableLog') */
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
 
 /*! FSE_normalizeCount():
     normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)

From 19cab46f2f2ae3e512c00df98043083101410810 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Fri, 17 Jun 2016 12:54:52 +0200
Subject: [PATCH 20/20] Joined `seqStore` initialization at dispatch point

---
 .gitignore                   |  1 +
 Makefile                     |  1 +
 lib/compress/zstd_compress.c | 11 ++---------
 lib/compress/zstd_opt.h      |  2 --
 programs/.gitignore          |  1 +
 5 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index a06c2afa2..181652401 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@
 *.dylib
 
 # Executables
+zstd
 *.exe
 *.out
 *.app
diff --git a/Makefile b/Makefile
index 18db04245..77a67a231 100644
--- a/Makefile
+++ b/Makefile
@@ -51,6 +51,7 @@ all:
 
 zstdprogram:
 	$(MAKE) -C $(PRGDIR)
+	mv $(PRGDIR)/zstd .
 
 zlibwrapper:
 	$(MAKE) -C $(ZSTDDIR) all
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 1ae321838..b8d1d2c0a 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1128,7 +1128,6 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
     size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1];
 
     /* init */
-    ZSTD_resetSeqStore(seqStorePtr);
     ip += (ip==lowest);
     {   U32 const maxRep = (U32)(ip-lowest);
         if (offset_1 > maxRep) offset_1 = 0;
@@ -1239,7 +1238,6 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
     U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
 
     /* init */
-    ZSTD_resetSeqStore(seqStorePtr);
     /* skip first position to avoid read overflow during repcode match check */
     hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
     ip++;
@@ -1743,7 +1741,6 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
     /* init */
     ip += (ip==base);
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_resetSeqStore(seqStorePtr);
     {   U32 i;
         U32 const maxRep = (U32)(ip-base);
         for (i=0; i<ZSTD_REP_INIT; i++) {
@@ -1913,7 +1910,6 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
     { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
 
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_resetSeqStore(seqStorePtr);
     ip += (ip == prefixStart);
 
     /* Match Loop */
@@ -2097,11 +2093,7 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr
 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
 {
     static const ZSTD_blockCompressor blockCompressor[2][6] = {
-#if 1
         { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
-#else
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict },
-#endif
         { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
     };
 
@@ -2111,8 +2103,9 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int
 
 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
+    ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0;   /* don't even attempt compression below a certain srcSize */
+    ZSTD_resetSeqStore(&(zc->seqStore));
     blockCompressor(zc, src, srcSize);
     return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
 }
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index 8b15bf6ad..97b1623ba 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -465,7 +465,6 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
 
     /* init */
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_resetSeqStore(seqStorePtr);
     ZSTD_rescaleFreqs(seqStorePtr);
     ip += (ip==prefixStart);
     { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
@@ -757,7 +756,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
     { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
 
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    ZSTD_resetSeqStore(seqStorePtr);
     ZSTD_rescaleFreqs(seqStorePtr);
     ip += (ip==prefixStart);
 
diff --git a/programs/.gitignore b/programs/.gitignore
index 5f50de0dd..cbe39dcdf 100644
--- a/programs/.gitignore
+++ b/programs/.gitignore
@@ -50,3 +50,4 @@ afl
 # Misc files
 *.bat
 fileTests.sh
+dirTest*