diff --git a/.coverity.yml b/.coverity.yml
new file mode 100644
index 000000000..907f09601
--- /dev/null
+++ b/.coverity.yml
@@ -0,0 +1,5 @@
+configurationVersion: 1
+
+filters:
+    # third-party embedded
+    - filePath: lib/dictBuilder/divsufsort.c
diff --git a/.gitignore b/.gitignore
index 181652401..f8024e024 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,6 @@ _zstdbench/
 
 # CMake
 projects/cmake/
+
+# Test artefacts
+tmp*
diff --git a/Makefile b/Makefile
index 428452880..9f5e1ebfa 100644
--- a/Makefile
+++ b/Makefile
@@ -41,7 +41,7 @@ else
 VOID = /dev/null
 endif
 
-.PHONY: default all zlibwrapper zstdprogram clean install uninstall travis-install test clangtest gpptest armtest usan asan uasan
+.PHONY: default all zlibwrapper zstdprogram zstd clean install uninstall travis-install test clangtest gpptest armtest usan asan uasan
 
 default: zstdprogram
 
@@ -53,6 +53,8 @@ zstdprogram:
 	$(MAKE) -C $(PRGDIR)
 	cp $(PRGDIR)/zstd .
 
+zstd: zstdprogram
+
 zlibwrapper:
 	$(MAKE) -C $(ZSTDDIR) all
 	$(MAKE) -C $(ZWRAPDIR) all
@@ -168,6 +170,9 @@ bmix32test: clean
 
 bmi32test: clean
 	CFLAGS="-O3 -mbmi -m32 -Werror" $(MAKE) -C $(PRGDIR) test
+
+staticAnalyze: clean
+	CPPFLAGS=-g scan-build --status-bugs -v $(MAKE) all
 endif
 
 
@@ -187,7 +192,7 @@ gcc5install:
 
 gcc6install:
 	sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
-	sudo apt-get update -y -qq 
+	sudo apt-get update -y -qq
 	sudo apt-get install -y -qq gcc-6-multilib
 
 arminstall: clean
diff --git a/NEWS b/NEWS
index a980e80e0..e3dd19102 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,33 @@
+v0.7.5
+Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
+Modified : minor compression level adaptations
+Update : specification, to v0.1.2 : max huffman depth at 11 bits
+changed : zstd.h moved to /lib directory
+
+v0.7.4
+Added : homebrew for Mac
+Added : more examples
+Fixed : segfault when using small dictionaries, reported by Felix Handte
+Modified : default compression level for CLI is now 3
+Updated : specification, to v0.1.1
+
+v0.7.3
+New : compression format specification
+New : `--` separator, stating that all following arguments are file names. Suggested by Chip Turner.
+New : `ZSTD_getDecompressedSize()`
+New : OpenBSD target, by Juan Francisco Cantero Hurtado
+New : `examples` directory
+fixed : dictBuilder using HC levels, reported by Bartosz Taudul
+fixed : legacy support from ZSTD_decompress_usingDDict(), reported by Felix Handte
+fixed : multi-blocks decoding with intermediate uncompressed blocks, reported by Greg Slazinski
+modified : removed "mem.h" and "error_public.h" dependencies from "zstd.h" (experimental section)
+modified : legacy functions no longer need magic number
+
+v0.7.2
+fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski.
+fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
+fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner.
+
 v0.7.1
 fixed : ZBUFF_compressEnd() called multiple times with too small `dst` buffer, reported by Christophe Chevalier
 fixed : dictBuilder fails if first sample is too small, reported by Руслан Ковалёв
diff --git a/README.md b/README.md
index 7b58e5e72..b87e35381 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,9 @@
- **Zstd**, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios at zlib-level and better compression ratios.
+ **Zstd**, short for Zstandard, is a fast lossless compression algorithm,
+ targeting real-time compression scenarios at zlib-level and better compression ratios.
 
-It is provided as a BSD-license package, hosted on Github.
+It is provided as an open-source BSD-licensed **C** library.
+For other programming languages,
+you can consult a list of known ports on [Zstandard homepage](http://www.zstd.net/#other-languages).
 
 |Branch      |Status   |
 |------------|---------|
diff --git a/examples/.gitignore b/examples/.gitignore
new file mode 100644
index 000000000..9d241dba6
--- /dev/null
+++ b/examples/.gitignore
@@ -0,0 +1,10 @@
+#build
+simple_compression
+simple_decompression
+dictionary_compression
+dictionary_decompression
+
+#test artefact
+tmp*
+test*
+*.zst
diff --git a/examples/Makefile b/examples/Makefile
new file mode 100644
index 000000000..5e3f0e17f
--- /dev/null
+++ b/examples/Makefile
@@ -0,0 +1,59 @@
+# ##########################################################################
+# ZSTD educational examples - Makefile
+# Copyright (C) Yann Collet 2016
+#
+# GPL v2 License
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# You can contact the author at :
+#  - zstd homepage : http://www.zstd.net/
+# ##########################################################################
+
+# This Makefile presumes libzstd is installed, using `sudo make install`
+
+LDFLAGS+= -lzstd
+
+.PHONY: default all clean test
+
+default: all
+
+all: simple_compression simple_decompression \
+	dictionary_compression dictionary_decompression
+
+simple_compression : simple_compression.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+simple_decompression : simple_decompression.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+dictionary_compression : dictionary_compression.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+dictionary_decompression : dictionary_decompression.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+clean:
+	@rm -f core *.o tmp* result* *.zst \
+        simple_compression simple_decompression \
+		dictionary_compression dictionary_decompression
+	@echo Cleaning completed
+
+test: all
+	cp README.md tmp
+	./simple_compression tmp
+	@echo starting simple_decompression
+	./simple_decompression tmp.zst
+	@echo tests completed
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 000000000..2f4603881
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,18 @@
+Zstandard library : usage examples
+==================================
+
+- [Simple compression](simple_compression.c)
+  Compress a single file.
+  Introduces usage of : `ZSTD_compress()`
+
+- [Simple decompression](simple_decompression.c)
+  Decompress a single file compressed by zstd.
+  Introduces usage of : `ZSTD_decompress()`
+
+- [Dictionary compression](dictionary_compression.c)
+  Compress multiple files using the same dictionary.
+  Introduces usage of : `ZSTD_createCDict()` and `ZSTD_compress_usingCDict()`
+
+- [Dictionary decompression](dictionary_decompression.c)
+  Decompress multiple files using the same dictionary.
+  Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()`
diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c
new file mode 100644
index 000000000..c4dc1b90d
--- /dev/null
+++ b/examples/dictionary_compression.c
@@ -0,0 +1,163 @@
+/*
+  Dictionary compression
+  Educational program using zstd library
+  Copyright (C) Yann Collet 2016
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd homepage : http://www.zstd.net/
+*/
+
+#include <stdlib.h>    // malloc, exit
+#include <stdio.h>     // printf
+#include <string.h>    // strerror
+#include <errno.h>     // errno
+#include <sys/stat.h>  // stat
+#include <zstd.h>      // presumes zstd library is installed
+
+
+static off_t fsize_X(const char *filename)
+{
+    struct stat st;
+    if (stat(filename, &st) == 0) return st.st_size;
+    /* error */
+    perror(filename);
+    exit(1);
+}
+
+static FILE* fopen_X(const char *filename, const char *instruction)
+{
+    FILE* const inFile = fopen(filename, instruction);
+    if (inFile) return inFile;
+    /* error */
+    perror(filename);
+    exit(2);
+}
+
+static void* malloc_X(size_t size)
+{
+    void* const buff = malloc(size);
+    if (buff) return buff;
+    /* error */
+    perror(NULL);
+    exit(3);
+}
+
+static void* loadFile_X(const char* fileName, size_t* size)
+{
+    off_t const buffSize = fsize_X(fileName);
+    FILE* const inFile = fopen_X(fileName, "rb");
+    void* const buffer = malloc_X(buffSize);
+    size_t const readSize = fread(buffer, 1, buffSize, inFile);
+    if (readSize != (size_t)buffSize) {
+        fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno));
+        exit(4);
+    }
+    fclose(inFile);
+    *size = buffSize;
+    return buffer;
+}
+
+static void saveFile_X(const char* fileName, const void* buff, size_t buffSize)
+{
+    FILE* const oFile = fopen_X(fileName, "wb");
+    size_t const wSize = fwrite(buff, 1, buffSize, oFile);
+    if (wSize != (size_t)buffSize) {
+        fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno));
+        exit(5);
+    }
+    if (fclose(oFile)) {
+        perror(fileName);
+        exit(6);
+    }
+}
+
+/* createDict() :
+   `dictFileName` is supposed to have been created using `zstd --train` */
+static const ZSTD_CDict* createDict(const char* dictFileName)
+{
+    size_t dictSize;
+    printf("loading dictionary %s \n", dictFileName);
+    void* const dictBuffer = loadFile_X(dictFileName, &dictSize);
+    const ZSTD_CDict* const ddict = ZSTD_createCDict(dictBuffer, dictSize, 3);
+    free(dictBuffer);
+    return ddict;
+}
+
+
+static void compress(const char* fname, const char* oname, const ZSTD_CDict* cdict)
+{
+    size_t fSize;
+    void* const fBuff = loadFile_X(fname, &fSize);
+    size_t const cBuffSize = ZSTD_compressBound(fSize);
+    void* const cBuff = malloc_X(cBuffSize);
+
+    ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+    size_t const cSize = ZSTD_compress_usingCDict(cctx, cBuff, cBuffSize, fBuff, fSize, cdict);
+    if (ZSTD_isError(cSize)) {
+        fprintf(stderr, "error compressing %s : %s \n", fname, ZSTD_getErrorName(cSize));
+        exit(7);
+    }
+
+    saveFile_X(oname, cBuff, cSize);
+
+    /* success */
+    printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
+
+    ZSTD_freeCCtx(cctx);
+    free(fBuff);
+    free(cBuff);
+}
+
+
+static char* createOutFilename(const char* filename)
+{
+    size_t const inL = strlen(filename);
+    size_t const outL = inL + 5;
+    void* outSpace = malloc_X(outL);
+    memset(outSpace, 0, outL);
+    strcat(outSpace, filename);
+    strcat(outSpace, ".zst");
+    return (char*)outSpace;
+}
+
+int main(int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+
+    if (argc<3) {
+        fprintf(stderr, "wrong arguments\n");
+        fprintf(stderr, "usage:\n");
+        fprintf(stderr, "%s [FILES] dictionary\n", exeName);
+        return 1;
+    }
+
+    /* load dictionary only once */
+    const char* const dictName = argv[argc-1];
+    const ZSTD_CDict* const dictPtr = createDict(dictName);
+
+    int u;
+    for (u=1; u<argc-1; u++) {
+        const char* inFilename = argv[u];
+        char* const outFilename = createOutFilename(inFilename);
+        compress(inFilename, outFilename, dictPtr);
+        free(outFilename);
+    }
+
+    printf("All %u files compressed. \n", argc-2);
+}
diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c
new file mode 100644
index 000000000..8c5034b75
--- /dev/null
+++ b/examples/dictionary_decompression.c
@@ -0,0 +1,136 @@
+/*
+  Dictionary decompression
+  Educational program using zstd library
+  Copyright (C) Yann Collet 2016
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd homepage : http://www.zstd.net/
+*/
+
+#include <stdlib.h>    // malloc, exit
+#include <stdio.h>     // printf
+#include <string.h>    // strerror
+#include <errno.h>     // errno
+#include <sys/stat.h>  // stat
+#include <zstd.h>      // presumes zstd library is installed
+
+
+static off_t fsize_X(const char *filename)
+{
+    struct stat st;
+    if (stat(filename, &st) == 0) return st.st_size;
+    /* error */
+    printf("stat: %s : %s \n", filename, strerror(errno));
+    exit(1);
+}
+
+static FILE* fopen_X(const char *filename, const char *instruction)
+{
+    FILE* const inFile = fopen(filename, instruction);
+    if (inFile) return inFile;
+    /* error */
+    printf("fopen: %s : %s \n", filename, strerror(errno));
+    exit(2);
+}
+
+static void* malloc_X(size_t size)
+{
+    void* const buff = malloc(size);
+    if (buff) return buff;
+    /* error */
+    printf("malloc: %s \n", strerror(errno));
+    exit(3);
+}
+
+static void* loadFile_X(const char* fileName, size_t* size)
+{
+    off_t const buffSize = fsize_X(fileName);
+    FILE* const inFile = fopen_X(fileName, "rb");
+    void* const buffer = malloc_X(buffSize);
+    size_t const readSize = fread(buffer, 1, buffSize, inFile);
+    if (readSize != (size_t)buffSize) {
+        printf("fread: %s : %s \n", fileName, strerror(errno));
+        exit(4);
+    }
+    fclose(inFile);
+    *size = buffSize;
+    return buffer;
+}
+
+/* createDict() :
+   `dictFileName` is supposed to have been created using `zstd --train` */
+static const ZSTD_DDict* createDict(const char* dictFileName)
+{
+    size_t dictSize;
+    printf("loading dictionary %s \n", dictFileName);
+    void* const dictBuffer = loadFile_X(dictFileName, &dictSize);
+    const ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictSize);
+    free(dictBuffer);
+    return ddict;
+}
+
+
+static void decompress(const char* fname, const ZSTD_DDict* ddict)
+{
+    size_t cSize;
+    void* const cBuff = loadFile_X(fname, &cSize);
+    unsigned long long const rSize = ZSTD_getDecompressedSize(cBuff, cSize);
+    if (rSize==0) {
+        printf("%s : original size unknown \n", fname);
+        exit(5);
+    }
+    void* const rBuff = malloc_X(rSize);
+
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+    size_t const dSize = ZSTD_decompress_usingDDict(dctx, rBuff, rSize, cBuff, cSize, ddict);
+
+    if (dSize != rSize) {
+        printf("error decoding %s : %s \n", fname, ZSTD_getErrorName(dSize));
+        exit(7);
+    }
+
+    /* success */
+    printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize);
+
+    ZSTD_freeDCtx(dctx);
+    free(rBuff);
+    free(cBuff);
+}
+
+
+int main(int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+
+    if (argc<3) {
+        printf("wrong arguments\n");
+        printf("usage:\n");
+        printf("%s [FILES] dictionary\n", exeName);
+        return 1;
+    }
+
+    /* load dictionary only once */
+    const char* const dictName = argv[argc-1];
+    const ZSTD_DDict* const dictPtr = createDict(dictName);
+
+    int u;
+    for (u=1; u<argc-1; u++) decompress(argv[u], dictPtr);
+
+    printf("All %u files correctly decoded (in memory) \n", argc-2);
+}
diff --git a/examples/simple_compression.c b/examples/simple_compression.c
new file mode 100644
index 000000000..71a40c271
--- /dev/null
+++ b/examples/simple_compression.c
@@ -0,0 +1,142 @@
+/*
+  Simple compression
+  Educational program using zstd library
+  Copyright (C) Yann Collet 2016
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd homepage : http://www.zstd.net/
+*/
+
+#include <stdlib.h>    // malloc, exit
+#include <stdio.h>     // fprintf, perror
+#include <string.h>    // strerror
+#include <errno.h>     // errno
+#include <sys/stat.h>  // stat
+#include <zstd.h>      // presumes zstd library is installed
+
+
+static off_t fsize_X(const char *filename)
+{
+    struct stat st;
+    if (stat(filename, &st) == 0) return st.st_size;
+    /* error */
+    perror(filename);
+    exit(1);
+}
+
+static FILE* fopen_X(const char *filename, const char *instruction)
+{
+    FILE* const inFile = fopen(filename, instruction);
+    if (inFile) return inFile;
+    /* error */
+    perror(filename);
+    exit(2);
+}
+
+static void* malloc_X(size_t size)
+{
+    void* const buff = malloc(size);
+    if (buff) return buff;
+    /* error */
+    perror(NULL);
+    exit(3);
+}
+
+static void* loadFile_X(const char* fileName, size_t* size)
+{
+    off_t const buffSize = fsize_X(fileName);
+    FILE* const inFile = fopen_X(fileName, "rb");
+    void* const buffer = malloc_X(buffSize);
+    size_t const readSize = fread(buffer, 1, buffSize, inFile);
+    if (readSize != (size_t)buffSize) {
+        fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno));
+        exit(4);
+    }
+    fclose(inFile);
+    *size = buffSize;
+    return buffer;
+}
+
+
+static void saveFile_X(const char* fileName, const void* buff, size_t buffSize)
+{
+    FILE* const oFile = fopen_X(fileName, "wb");
+    size_t const wSize = fwrite(buff, 1, buffSize, oFile);
+    if (wSize != (size_t)buffSize) {
+        fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno));
+        exit(5);
+    }
+    if (fclose(oFile)) {
+        perror(fileName);
+        exit(6);
+    }
+}
+
+
+static void compress(const char* fname, const char* oname)
+{
+    size_t fSize;
+    void* const fBuff = loadFile_X(fname, &fSize);
+    size_t const cBuffSize = ZSTD_compressBound(fSize);
+    void* const cBuff = malloc_X(cBuffSize);
+
+    size_t const cSize = ZSTD_compress(cBuff, cBuffSize, fBuff, fSize, 1);
+    if (ZSTD_isError(cSize)) {
+        fprintf(stderr, "error compressing %s : %s \n", fname, ZSTD_getErrorName(cSize));
+        exit(7);
+    }
+
+    saveFile_X(oname, cBuff, cSize);
+
+    /* success */
+    printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
+
+    free(fBuff);
+    free(cBuff);
+}
+
+
+static const char* createOutFilename(const char* filename)
+{
+    size_t const inL = strlen(filename);
+    size_t const outL = inL + 5;
+    void* outSpace = malloc_X(outL);
+    memset(outSpace, 0, outL);
+    strcat(outSpace, filename);
+    strcat(outSpace, ".zst");
+    return (const char*)outSpace;
+}
+
+int main(int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+    const char* const inFilename = argv[1];
+
+    if (argc!=2) {
+        printf("wrong arguments\n");
+        printf("usage:\n");
+        printf("%s FILE\n", exeName);
+        return 1;
+    }
+
+    const char* const outFilename = createOutFilename(inFilename);
+    compress(inFilename, outFilename);
+
+    return 0;
+}
diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c
new file mode 100644
index 000000000..b907afa19
--- /dev/null
+++ b/examples/simple_decompression.c
@@ -0,0 +1,119 @@
+/*
+  Simple decompression
+  Educational program using zstd library
+  Copyright (C) Yann Collet 2016
+
+  GPL v2 License
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License along
+  with this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+  You can contact the author at :
+  - zstd homepage : http://www.zstd.net/
+*/
+
+#include <stdlib.h>    // malloc, exit
+#include <stdio.h>     // printf
+#include <string.h>    // strerror
+#include <errno.h>     // errno
+#include <sys/stat.h>  // stat
+#include <zstd.h>      // presumes zstd library is installed
+
+
+static off_t fsize_X(const char *filename)
+{
+    struct stat st;
+    if (stat(filename, &st) == 0) return st.st_size;
+    /* error */
+    printf("stat: %s : %s \n", filename, strerror(errno));
+    exit(1);
+}
+
+static FILE* fopen_X(const char *filename, const char *instruction)
+{
+    FILE* const inFile = fopen(filename, instruction);
+    if (inFile) return inFile;
+    /* error */
+    printf("fopen: %s : %s \n", filename, strerror(errno));
+    exit(2);
+}
+
+static void* malloc_X(size_t size)
+{
+    void* const buff = malloc(size);
+    if (buff) return buff;
+    /* error */
+    printf("malloc: %s \n", strerror(errno));
+    exit(3);
+}
+
+static void* loadFile_X(const char* fileName, size_t* size)
+{
+    off_t const buffSize = fsize_X(fileName);
+    FILE* const inFile = fopen_X(fileName, "rb");
+    void* const buffer = malloc_X(buffSize);
+    size_t const readSize = fread(buffer, 1, buffSize, inFile);
+    if (readSize != (size_t)buffSize) {
+        printf("fread: %s : %s \n", fileName, strerror(errno));
+        exit(4);
+    }
+    fclose(inFile);
+    *size = buffSize;
+    return buffer;
+}
+
+
+static void decompress(const char* fname)
+{
+    size_t cSize;
+    void* const cBuff = loadFile_X(fname, &cSize);
+    unsigned long long const rSize = ZSTD_getDecompressedSize(cBuff, cSize);
+    if (rSize==0) {
+        printf("%s : original size unknown \n", fname);
+        exit(5);
+    }
+    void* const rBuff = malloc_X(rSize);
+
+    size_t const dSize = ZSTD_decompress(rBuff, rSize, cBuff, cSize);
+
+    if (dSize != rSize) {
+        printf("error decoding %s : %s \n", fname, ZSTD_getErrorName(dSize));
+        exit(7);
+    }
+
+    /* success */
+    printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize);
+
+    free(rBuff);
+    free(cBuff);
+}
+
+
+int main(int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+
+    if (argc!=2) {
+        printf("wrong arguments\n");
+        printf("usage:\n");
+        printf("%s FILE\n", exeName);
+        return 1;
+    }
+
+    decompress(argv[1]);
+
+    printf("%s correctly decoded (in memory). \n", argv[1]);
+
+    return 0;
+}
diff --git a/lib/.gitignore b/lib/.gitignore
new file mode 100644
index 000000000..b43a8543a
--- /dev/null
+++ b/lib/.gitignore
@@ -0,0 +1,2 @@
+# make install artefact
+libzstd.pc
diff --git a/lib/Makefile b/lib/Makefile
index 76731abc1..1f4f7eb85 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -31,9 +31,9 @@
 # ################################################################
 
 # Version numbers
-LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
-LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
-LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./common/zstd.h`
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
 LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
 LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
 LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
@@ -46,7 +46,7 @@ PREFIX ?= /usr/local
 LIBDIR ?= $(PREFIX)/lib
 INCLUDEDIR=$(PREFIX)/include
 
-CPPFLAGS= -I./common -DXXH_NAMESPACE=ZSTD_
+CPPFLAGS= -I. -I./common -DXXH_NAMESPACE=ZSTD_
 CFLAGS ?= -O3
 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef
 FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
@@ -117,7 +117,7 @@ install: libzstd libzstd.pc
 	@cp -a libzstd.$(SHARED_EXT) $(DESTDIR)$(LIBDIR)
 	@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
 	@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
-	@install -m 644 common/zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
 	@install -m 644 common/zbuff.h $(DESTDIR)$(INCLUDEDIR)/zbuff.h
 	@install -m 644 dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd static and shared library installed
diff --git a/lib/README.md b/lib/README.md
index 45e8e6fdc..2c24c254b 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -1,63 +1,57 @@
 zstd - library files
 ================================
 
-The __lib__ directory contains several files, but depending on target use case, some of them may not be necessary.
-
-#### Minimal library files
-
-To build the zstd library the following files are required:
-
-- [common/bitstream.h](common/bitstream.h)
-- [common/error_private.h](common/error_private.h)
-- [common/error_public.h](common/error_public.h)
-- common/fse.h
-- common/fse_decompress.c
-- common/huf.h
-- [common/mem.h](common/mem.h)
-- [common/zstd.h]
-- common/zstd_internal.h
-- compress/fse_compress.c
-- compress/huf_compress.c
-- compress/zstd_compress.c
-- compress/zstd_opt.h
-- decompress/huf_decompress.c
-- decompress/zstd_decompress.c
-
-Stable API is exposed in [common/zstd.h].
-Advanced and experimental API can be enabled by defining `ZSTD_STATIC_LINKING_ONLY`.
-Never use them with a dynamic library, as their definition may change in future versions.
-
-[common/zstd.h]: common/zstd.h
+The __lib__ directory contains several directories.
+Depending on target use case, it's enough to include only files from relevant directories.
 
 
-#### Separate compressor and decompressor
+#### API
 
-To build a separate zstd compressor all files from `common/` and `compressor/` directories are required.
-In a similar way to build a separate zstd decompressor all files from `common/` and `decompressor/` directories are needed.
+Zstandard's stable API is exposed within [zstd.h](zstd.h),
+at the root of `lib` directory.
 
 
-#### Buffered streaming
+#### Advanced API
 
-This complementary API makes streaming integration easier.
-It is used by `zstd` command line utility, and [7zip plugin](http://mcmilk.de/projects/7-Zip-ZStd) :
+Some additional API may be useful if you're looking into advanced features :
+- common/error_public.h : transform function result into an `enum`,
+                          for precise error handling.
+- ZSTD_STATIC_LINKING_ONLY : if you define this macro _before_ including `zstd.h`,
+                          it will give access to advanced and experimental API.
+                          These APIs shall ___never be used with dynamic library___ !
+                          They are not "stable", their definition may change in the future.
+                          Only static linking is allowed.
 
-- common/zbuff.h
-- compress/zbuff_compress.c
-- decompress/zbuff_decompress.c
 
-#### Dictionary builder
+#### Modular build
 
-To create dictionaries from training sets :
+Directory `common/` is required in all circumstances.
+You can select to support compression only, by just adding files from the `compress/` directory,
+In a similar way, you can build a decompressor-only library with the `decompress/` directory.
+
+Other optional functionalities provided are :
+
+- `dictBuilder/`  : this directory contains source files required to create dictionaries.
+                    The API can be consulted in `dictBuilder/zdict.h`.
+                    It also depends on `common/` and `compress/` .
+
+- `legacy/` : this directory contains source code to decompress previous versions of Zstd,
+              starting from `v0.1`. The main API can be consulted in `legacy/zstd_legacy.h`.
+              Note that it's required to compile the library with `ZSTD_LEGACY_SUPPORT = 1` .
+              Advanced API from each version can be found in its relevant header file.
+              For example, advanced API for version `v0.4` is in `zstd_v04.h` .
+              It also depends on `common/` and `decompress/` .
+
+
+#### Streaming API
+
+Streaming is currently provided by `common/zbuff.h`.
 
-- dictBuilder/divsufsort.c
-- dictBuilder/divsufsort.h
-- dictBuilder/zdict.c
-- dictBuilder/zdict.h
 
 #### Miscellaneous
 
 The other files are not source code. There are :
 
  - LICENSE : contains the BSD license text
- - Makefile : script to compile or install zstd library (static or dynamic)
- - libzstd.pc.in : for pkg-config (make install)
+ - Makefile : script to compile or install zstd library (static and dynamic)
+ - libzstd.pc.in : for pkg-config (`make install`)
diff --git a/lib/common/error_public.h b/lib/common/error_public.h
index e8cfcc918..29050b3b3 100644
--- a/lib/common/error_public.h
+++ b/lib/common/error_public.h
@@ -63,7 +63,11 @@ typedef enum {
   ZSTD_error_maxCode
 } ZSTD_ErrorCode;
 
-/* note : compare with size_t function results using ZSTD_getError() */
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare directly with enum list published into "error_public.h" */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
 
 
 #if defined (__cplusplus)
diff --git a/lib/common/huf.h b/lib/common/huf.h
index 3b837f101..29bab4b76 100644
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -100,7 +100,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
 /* *** Constants *** */
 #define HUF_TABLELOG_ABSOLUTEMAX  16   /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
 #define HUF_TABLELOG_MAX  12           /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
-#define HUF_TABLELOG_DEFAULT  HUF_TABLELOG_MAX   /* tableLog by default, when not specified */
+#define HUF_TABLELOG_DEFAULT  11       /* tableLog by default, when not specified */
 #define HUF_SYMBOLVALUE_MAX 255
 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
 #  error "HUF_TABLELOG_MAX is too large !"
diff --git a/lib/common/mem.h b/lib/common/mem.h
index 9156bfda9..f76c52d9f 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -44,16 +44,14 @@ extern "C" {
 ******************************************/
 #include <stddef.h>     /* size_t, ptrdiff_t */
 #include <string.h>     /* memcpy */
-#if defined(_MSC_VER)   /* Visual Studio */
-#   include <stdlib.h>  /* _byteswap_ulong */
-#endif
 
 
 /*-****************************************
 *  Compiler specifics
 ******************************************/
-#if defined(_MSC_VER)
-#   include <intrin.h>   /* _byteswap_ */
+#if defined(_MSC_VER)   /* Visual Studio */
+#   include <stdlib.h>  /* _byteswap_ulong */
+#   include <intrin.h>  /* _byteswap_* */
 #endif
 #if defined(__GNUC__)
 #  define MEM_STATIC static __attribute__((unused))
@@ -65,6 +63,10 @@ extern "C" {
 #  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
 #endif
 
+/* code only tested on 32 and 64 bits systems */
+#define MEM_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
+MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
+
 
 /*-**************************************************************
 *  Basic Types
diff --git a/lib/common/zbuff.h b/lib/common/zbuff.h
index e449f6d3b..7820db26d 100644
--- a/lib/common/zbuff.h
+++ b/lib/common/zbuff.h
@@ -44,10 +44,8 @@ extern "C" {
 /* ***************************************************************
 *  Compiler specifics
 *****************************************************************/
-/*!
-*  ZSTD_DLL_EXPORT :
-*  Enable exporting of functions when building a Windows DLL
-*/
+/* ZSTD_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL */
 #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
 #  define ZSTDLIB_API __declspec(dllexport)
 #else
@@ -103,8 +101,8 @@ ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCap
 *  @return : nb of bytes still present into internal buffer (0 if it's empty)
 *            or an error code, which can be tested using ZBUFF_isError().
 *
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedCInSize / ZBUFF_recommendedCOutSize
-*  input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, it improves latency to use this value (skipped buffering).
+*  Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize()
+*  input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency)
 *  output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering.
 *  By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering.
 * **************************************************/
@@ -187,7 +185,7 @@ ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);
 /*--- Advanced Streaming function ---*/
 ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
                                                const void* dict, size_t dictSize,
-                                               ZSTD_parameters params, U64 pledgedSrcSize);
+                                               ZSTD_parameters params, unsigned long long pledgedSrcSize);
 
 #endif /* ZBUFF_STATIC_LINKING_ONLY */
 
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 0909955a9..43cbc9a3a 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -51,7 +51,7 @@
 /*-*************************************
 *  Common constants
 ***************************************/
-#define ZSTD_OPT_DEBUG 0     // 3 = compression stats;  5 = check encoded sequences;  9 = full logs
+#define ZSTD_OPT_DEBUG 0     /* 3 = compression stats;  5 = check encoded sequences;  9 = full logs */
 #include <stdio.h>
 #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9
     #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
@@ -233,6 +233,6 @@ int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
 /* custom memory allocation functions */
 void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
 void ZSTD_defaultFreeFunction(void* opaque, void* address);
-static ZSTD_customMem const defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
+static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
 
 #endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index 3533bb613..b5b0eb440 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -239,7 +239,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
 
         /* repay normalized cost */
         {   U32 const noSymbol = 0xF0F0F0F0;
-            U32 rankLast[HUF_TABLELOG_MAX+1];
+            U32 rankLast[HUF_TABLELOG_MAX+2];
             int pos;
 
             /* Get pos of last (smallest) symbol per rank */
@@ -535,6 +535,7 @@ static size_t HUF_compress_internal (
     {   size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
         if (HUF_isError(hSize)) return hSize;
         if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
+        //static U64 totalHSize = 0; static U32 nbHSize = 0; totalHSize += hSize; nbHSize++; if ((nbHSize & 63) == 1) printf("average : %6.3f \n", (double)totalHSize / nbHSize);
         op += hSize;
     }
 
diff --git a/lib/compress/zbuff_compress.c b/lib/compress/zbuff_compress.c
index 6ed5e52a4..837d22cf7 100644
--- a/lib/compress/zbuff_compress.c
+++ b/lib/compress/zbuff_compress.c
@@ -137,7 +137,7 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
 
 size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
                                    const void* dict, size_t dictSize,
-                                   ZSTD_parameters params, U64 pledgedSrcSize)
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
 {
     /* allocate buffers */
     {   size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
@@ -170,9 +170,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
 
 size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params;
-    memset(&params, 0, sizeof(params));
-    params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
     return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0);
 }
 
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 42cf648a1..51980d550 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -152,7 +152,7 @@ ZSTD_CCtx* ZSTD_createCCtx(void)
 
 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
 {
-    ZSTD_CCtx* ctx;
+    ZSTD_CCtx* cctx;
 
     if (!customMem.customAlloc && !customMem.customFree)
         customMem = defaultCustomMem;
@@ -160,11 +160,11 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
     if (!customMem.customAlloc || !customMem.customFree)
         return NULL;
 
-    ctx = (ZSTD_CCtx*) customMem.customAlloc(customMem.opaque, sizeof(ZSTD_CCtx));
-    if (!ctx) return NULL;
-    memset(ctx, 0, sizeof(ZSTD_CCtx));
-    memcpy(&ctx->customMem, &customMem, sizeof(ZSTD_customMem));
-    return ctx;
+    cctx = (ZSTD_CCtx*) customMem.customAlloc(customMem.opaque, sizeof(ZSTD_CCtx));
+    if (!cctx) return NULL;
+    memset(cctx, 0, sizeof(ZSTD_CCtx));
+    memcpy(&(cctx->customMem), &customMem, sizeof(ZSTD_customMem));
+    return cctx;
 }
 
 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
@@ -175,6 +175,11 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
     return 0;   /* reserved as a potential error code in the future */
 }
 
+size_t ZSTD_sizeofCCtx(const ZSTD_CCtx* cctx)
+{
+    return sizeof(*cctx) + cctx->workSpaceSize;
+}
+
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */
 {
     return &(ctx->seqStore);
@@ -221,7 +226,7 @@ size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSiz
     Both `srcSize` and `dictSize` are optional (use 0 if unknown),
     but if both are 0, no optimization can be done.
     Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
-ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, U64 srcSize, size_t dictSize)
+ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
 {
     if (srcSize+dictSize == 0) return cPar;   /* no size information available : no adjustment */
 
@@ -244,23 +249,32 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, U
 }
 
 
-size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams)   /* hidden interface, for paramagrill */
+size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
 {
-    ZSTD_CCtx* const zc = ZSTD_createCCtx();
-    ZSTD_parameters params;
-    memset(&params, 0, sizeof(params));
-    params.cParams = cParams;
-    params.fParams.contentSizeFlag = 1;
-    ZSTD_compressBegin_advanced(zc, NULL, 0, params, 0);
-    { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize;
-      ZSTD_freeCCtx(zc);
-      return ccsize; }
+    const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+    const U32    divider = (cParams.searchLength==3) ? 3 : 4;
+    const size_t maxNbSeq = blockSize / divider;
+    const size_t tokenSpace = blockSize + 11*maxNbSeq;
+
+    const size_t chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
+    const size_t hSize = ((size_t)1) << cParams.hashLog;
+    const U32 hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
+    const size_t h3Size = ((size_t)1) << hashLog3;
+    const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+
+    size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
+                          + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
+    size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
+                             + ((cParams.strategy == ZSTD_btopt) ? optSpace : 0);
+
+    return sizeof(ZSTD_CCtx) + neededSpace;
 }
 
 /*! ZSTD_resetCCtx_advanced() :
     note : 'params' is expected to be validated */
 static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
-                                       ZSTD_parameters params, U64 frameContentSize, U32 reset)
+                                       ZSTD_parameters params, U64 frameContentSize,
+                                       U32 reset)
 {   /* note : params considered validated here */
     const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog);
     const U32    divider = (params.cParams.searchLength==3) ? 3 : 4;
@@ -268,9 +282,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
     const size_t tokenSpace = blockSize + 11*maxNbSeq;
     const size_t chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
     const size_t hSize = ((size_t)1) << params.cParams.hashLog;
-    const U32 hashLog3 = (params.cParams.searchLength>3) ? 0 :
-                        ( (!frameContentSize || frameContentSize >= 8192) ? ZSTD_HASHLOG3_MAX :
-                          ((frameContentSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN) );
+    const U32 hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
     const size_t h3Size = ((size_t)1) << hashLog3;
     const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
 
@@ -427,21 +439,8 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
 */
 
 
-/* Frame descriptor
+/* Frame header :
 
-    // old
-   1 byte - Alloc :
-   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h)
-   bit 4   : reserved for windowLog (must be zero)
-   bit 5   : reserved (must be zero)
-   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
-
-   1 byte - checker :
-   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
-   bit 2-7 : reserved (must be zero)
-
-
-    // new
    1 byte - FrameHeaderDescription :
    bit 0-1 : dictID (0, 1, 2 or 4 bytes)
    bit 2-4 : reserved (must be zero)
@@ -453,24 +452,24 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
    bit 0-2 : octal Fractional (1/8th)
    bit 3-7 : Power of 2, with 0 = 1 KB (up to 2 TB)
 
+   Optional : content size (0, 1, 2, 4 or 8 bytes)
+   0 : unknown
+   1 : 0-255 bytes
+   2 : 256 - 65535+256
+   8 : up to 16 exa
+
    Optional : dictID (0, 1, 2 or 4 bytes)
    Automatic adaptation
    0 : no dictID
    1 : 1 - 255
    2 : 256 - 65535
    4 : all other values
-
-   Optional : content size (0, 1, 2, 4 or 8 bytes)
-   0 : unknown
-   1 : 0-255 bytes
-   2 : 256 - 65535+256
-   8 : up to 16 exa
 */
 
 
 /* Block format description
 
-   Block = Literal Section - Sequences Section
+   Block = Literals Section - Sequences Section
    Prerequisite : size of (compressed) block, maximum size of regenerated data
 
    1) Literal Section
@@ -478,7 +477,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
    1.1) Header : 1-5 bytes
         flags: 2 bits
             00 compressed by Huff0
-            01 unused
+            01 repeat
             10 is Raw (uncompressed)
             11 is Rle
             Note : using 01 => Huff0 with precomputed table ?
@@ -514,7 +513,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
             else           => 5 bytes (2-2-18-18)
             big endian convention
 
-        1- CTable available (stored into workspace ?)
+        1- CTable available (stored into workspace)
         2- Small input (fast heuristic ? Full comparison ? depend on clevel ?)
 
 
@@ -654,11 +653,11 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
         singleStream = 1;
         cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
     } else {
-        cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12)
-                                : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12);
+        cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11)
+                                : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11);
     }
 
-    if ((cLitSize==0) || (cLitSize >= srcSize - minGain))
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
     if (cLitSize==1)
         return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
@@ -936,7 +935,7 @@ _check_compressibility:
     `offsetCode` : distance to match, or 0 == repCode.
     `matchCode` : matchLength - MINMATCH
 */
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, size_t offsetCode, size_t matchCode)
+MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
 {
 #if 0  /* for debug */
     static const BYTE* g_start = NULL;
@@ -957,7 +956,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
     *seqStorePtr->litLength++ = (U16)litLength;
 
     /* match offset */
-    *(seqStorePtr->offset++) = (U32)offsetCode + 1;
+    *(seqStorePtr->offset++) = offsetCode + 1;
 
     /* match Length */
     if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); }
@@ -1063,7 +1062,7 @@ static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE
 ***************************************/
 static const U32 prime3bytes = 506832829U;
 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
-static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }   /* only in zstd_opt.h */
 
 static const U32 prime4bytes = 2654435761U;
 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
@@ -1081,6 +1080,11 @@ static const U64 prime7bytes = 58295818150454627ULL;
 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
 
+//static const U64 prime8bytes = 58295818150454627ULL;
+static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
+static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
+
 static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 {
     switch(mls)
@@ -1090,6 +1094,7 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
     case 5: return ZSTD_hash5Ptr(p, hBits);
     case 6: return ZSTD_hash6Ptr(p, hBits);
     case 7: return ZSTD_hash7Ptr(p, hBits);
+    case 8: return ZSTD_hash8Ptr(p, hBits);
     }
 }
 
@@ -1129,13 +1134,14 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
     const BYTE* const lowest = base + lowestIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
-    size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1];
+    U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
+    U32 offsetSaved = 0;
 
     /* init */
     ip += (ip==lowest);
     {   U32 const maxRep = (U32)(ip-lowest);
-        if (offset_1 > maxRep) offset_1 = 0;
-        if (offset_2 > maxRep) offset_2 = 0;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
     }
 
     /* Main Search Loop */
@@ -1148,17 +1154,17 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
         hashTable[h] = current;   /* update hash table */
 
         if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
-            mLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
+            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
             ip++;
             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
         } else {
-            size_t offset;
+            U32 offset;
             if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
                 ip += ((ip-anchor) >> g_searchStrength) + 1;
                 continue;
             }
-            mLength = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32;
-            offset = ip-match;
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            offset = (U32)(ip-match);
             while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
             offset_2 = offset_1;
             offset_1 = offset;
@@ -1179,8 +1185,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
                  && ( (offset_2>0)
                  & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
                 /* store sequence */
-                size_t const rLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
-                { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; }  /* swap offset_2 <=> offset_1 */
                 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
                 ip += rLength;
@@ -1189,8 +1195,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
     }   }   }
 
     /* save reps for next block */
-    cctx->savedRep[0] = offset_1 ? (U32)offset_1 : (U32)(iend - base) + 1;
-    cctx->savedRep[1] = offset_2 ? (U32)offset_2 : (U32)(iend - base) + 1;
+    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1333,6 +1339,283 @@ static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
 }
 
 
+/*-*************************************
+*  Double Fast
+***************************************/
+static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
+{
+    U32* const hashLarge = cctx->hashTable;
+    const U32 hBitsL = cctx->params.cParams.hashLog;
+    U32* const hashSmall = cctx->chainTable;
+    const U32 hBitsS = cctx->params.cParams.chainLog;
+    const BYTE* const base = cctx->base;
+    const BYTE* ip = base + cctx->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - 8;
+    const size_t fastHashFillStep = 3;
+
+    while(ip <= iend) {
+        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
+        hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
+        ip += fastHashFillStep;
+    }
+}
+
+
+FORCE_INLINE
+void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
+                                 const void* src, size_t srcSize,
+                                 const U32 mls)
+{
+    U32* const hashLong = cctx->hashTable;
+    const U32 hBitsL = cctx->params.cParams.hashLog;
+    U32* const hashSmall = cctx->chainTable;
+    const U32 hBitsS = cctx->params.cParams.chainLog;
+    seqStore_t* seqStorePtr = &(cctx->seqStore);
+    const BYTE* const base = cctx->base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32 lowestIndex = cctx->dictLimit;
+    const BYTE* const lowest = base + lowestIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
+    U32 offsetSaved = 0;
+
+    /* init */
+    ip += (ip==lowest);
+    {   U32 const maxRep = (U32)(ip-lowest);
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
+        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
+        U32 const current = (U32)(ip-base);
+        U32 const matchIndexL = hashLong[h2];
+        U32 const matchIndexS = hashSmall[h];
+        const BYTE* matchLong = base + matchIndexL;
+        const BYTE* match = base + matchIndexS;
+        hashLong[h2] = hashSmall[h] = current;   /* update hash tables */
+
+        if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
+            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+        } else {
+            U32 offset;
+            if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
+                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
+                offset = (U32)(ip-matchLong);
+                while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+            } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
+                mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+                offset = (U32)(ip-match);
+                while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+            } else {
+                ip += ((ip-anchor) >> g_searchStrength) + 1;
+                continue;
+            }
+
+            offset_2 = offset_1;
+            offset_1 = offset;
+
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        }
+
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
+                hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;  /* here because current+2 could be > iend-8 */
+            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
+                hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
+
+            /* check immediate repcode */
+            while ( (ip <= ilimit)
+                 && ( (offset_2>0)
+                 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
+                /* store sequence */
+                size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
+                hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
+                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
+                ip += rLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+    }   }   }
+
+    /* save reps for next block */
+    cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
+    cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Last Literals */
+    {   size_t const lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+}
+
+
+static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    const U32 mls = ctx->params.cParams.searchLength;
+    switch(mls)
+    {
+    default:
+    case 4 :
+        ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
+    case 5 :
+        ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
+    case 6 :
+        ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
+    case 7 :
+        ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
+    }
+}
+
+
+static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
+                                 const void* src, size_t srcSize,
+                                 const U32 mls)
+{
+    U32* const hashLong = ctx->hashTable;
+    const U32 hBitsL = ctx->params.cParams.hashLog;
+    U32* const hashSmall = ctx->chainTable;
+    const U32 hBitsS = ctx->params.cParams.chainLog;
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const base = ctx->base;
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   lowestIndex = ctx->lowLimit;
+    const BYTE* const dictStart = dictBase + lowestIndex;
+    const U32   dictLimit = ctx->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
+        const U32 matchIndex = hashSmall[hSmall];
+        const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
+        const BYTE* match = matchBase + matchIndex;
+
+        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
+        const U32 matchLongIndex = hashLong[hLong];
+        const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
+        const BYTE* matchLong = matchLongBase + matchLongIndex;
+
+        const U32 current = (U32)(ip-base);
+        const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
+        const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
+        const BYTE* repMatch = repBase + repIndex;
+        size_t mLength;
+        hashSmall[hSmall] = hashLong[hLong] = current;   /* update hash table */
+
+        if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
+           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+        } else {
+            if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
+                const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
+                const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
+                U32 offset;
+                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
+                offset = current - matchLongIndex;
+                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
+                const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
+                const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
+                U32 offset;
+                mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
+                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                offset = current - matchIndex;
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            } else {
+                ip += ((ip-anchor) >> g_searchStrength) + 1;
+                continue;
+        }   }
+
+        /* found a match : store it */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+			hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
+			hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
+            hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
+            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
+
+    /* Last Literals */
+    {   size_t const lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+}
+
+
+static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
+                         const void* src, size_t srcSize)
+{
+    const U32 mls = ctx->params.cParams.searchLength;
+    switch(mls)
+    {
+    default:
+    case 4 :
+        ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
+    case 5 :
+        ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
+    case 6 :
+        ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
+    case 7 :
+        ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
+    }
+}
+
+
 /*-*************************************
 *  Binary Tree search
 ***************************************/
@@ -1364,17 +1647,19 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     const U32 windowLow = zc->lowLimit;
     U32 matchEndIdx = current+8;
     size_t bestLength = 8;
+#ifdef ZSTD_C_PREDICT
     U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
     U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
     predictedSmall += (predictedSmall>0);
     predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */
 
     hashTable[h] = current;   /* Update Hash Table */
 
     while (nbCompares-- && (matchIndex > windowLow)) {
         U32* nextPtr = bt + 2*(matchIndex & btMask);
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-#if 0   /* note : can create issues when hlog small <= 11 */
+#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
         if (matchIndex == predictedSmall) {
             /* no need to check length, result known */
@@ -1731,17 +2016,15 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
                         size_t* offsetPtr,
                         U32 maxNbAttempts, U32 matchLengthSearch);
     searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
-    U32 rep[ZSTD_REP_INIT];
+    U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
 
     /* init */
     ip += (ip==base);
     ctx->nextToUpdate3 = ctx->nextToUpdate;
-    {   U32 i;
-        U32 const maxRep = (U32)(ip-base);
-        for (i=0; i<ZSTD_REP_INIT; i++) {
-            rep[i]=ctx->rep[i];
-            if (rep[i]>maxRep) rep[i]=0;
-    }   }
+    {   U32 const maxRep = (U32)(ip-base);
+        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
+    }
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -1750,9 +2033,9 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         const BYTE* start=ip+1;
 
         /* check repCode */
-        if ((rep[0]>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0]))) {
+        if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
             /* repcode : we take it */
-            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+            matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
             if (depth==0) goto _storeSequence;
         }
 
@@ -1772,8 +2055,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         if (depth>=1)
         while (ip<ilimit) {
             ip ++;
-            if ((offset) && ((rep[0]>0) & (MEM_read32(ip) == MEM_read32(ip - rep[0])))) {
-                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+            if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
                 int const gain2 = (int)(mlRep * 3);
                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
                 if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
@@ -1791,8 +2074,8 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
             /* let's find an even better one */
             if ((depth==2) && (ip<ilimit)) {
                 ip ++;
-                if ((offset) && ((rep[0]>0) & (MEM_read32(ip) == MEM_read32(ip - rep[0])))) {
-                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32;
+                if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                    size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
                     int const gain2 = (int)(ml2 * 4);
                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
                     if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
@@ -1813,23 +2096,23 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
         if (offset) {
             while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]))   /* only search for offset within prefix */
                 { start--; matchLength++; }
-            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
         }
 
         /* store sequence */
 _storeSequence:
         {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
             anchor = ip = start + matchLength;
         }
 
         /* check immediate repcode */
         while ( (ip <= ilimit)
-             && ((rep[1]>0)
-             & (MEM_read32(ip) == MEM_read32(ip - rep[1])) )) {
+             && ((offset_2>0)
+             & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
             /* store sequence */
-            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32;
-            offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */
+            matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
+            offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
             ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
             ip += matchLength;
             anchor = ip;
@@ -1837,11 +2120,8 @@ _storeSequence:
     }   }
 
     /* Save reps for next block */
-    {   int i;
-        for (i=0; i<ZSTD_REP_NUM; i++) {
-            if (!rep[i]) rep[i] = (U32)(iend - ctx->base) + 1;   /* in case some zero are left */
-            ctx->savedRep[i] = rep[i];
-    }   }
+    ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
+    ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -1900,10 +2180,9 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                         U32 maxNbAttempts, U32 matchLengthSearch);
     searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
 
-    /* init */
-    U32 rep[ZSTD_REP_INIT];
-    { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=ctx->rep[i]; }
+    U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
 
+    /* init */
     ctx->nextToUpdate3 = ctx->nextToUpdate;
     ip += (ip == prefixStart);
 
@@ -1915,7 +2194,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
         U32 current = (U32)(ip-base);
 
         /* check repCode */
-        {   const U32 repIndex = (U32)(current+1 - rep[0]);
+        {   const U32 repIndex = (U32)(current+1 - offset_1);
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
             if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))   /* intentional overflow */
@@ -1945,7 +2224,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
             current++;
             /* check repCode */
             if (offset) {
-                const U32 repIndex = (U32)(current - rep[0]);
+                const U32 repIndex = (U32)(current - offset_1);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                 const BYTE* const repMatch = repBase + repIndex;
                 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
@@ -1975,7 +2254,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                 current++;
                 /* check repCode */
                 if (offset) {
-                    const U32 repIndex = (U32)(current - rep[0]);
+                    const U32 repIndex = (U32)(current - offset_1);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                     const BYTE* const repMatch = repBase + repIndex;
                     if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
@@ -2007,19 +2286,19 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-            rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
         }
 
         /* store sequence */
 _storeSequence:
         {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
             anchor = ip = start + matchLength;
         }
 
         /* check immediate repcode */
         while (ip <= ilimit) {
-            const U32 repIndex = (U32)((ip-base) - rep[1]);
+            const U32 repIndex = (U32)((ip-base) - offset_2);
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
             if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
@@ -2027,7 +2306,7 @@ _storeSequence:
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                 matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-                offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset;   /* swap offset history */
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
                 ip += matchLength;
                 anchor = ip;
@@ -2037,7 +2316,7 @@ _storeSequence:
     }   }
 
     /* Save reps for next block */
-    ctx->savedRep[0] = rep[0]; ctx->savedRep[1] = rep[1]; ctx->savedRep[2] = rep[2];
+    ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
 
     /* Last Literals */
     {   size_t const lastLLSize = iend - anchor;
@@ -2068,18 +2347,27 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src,
 }
 
 
-
 /* The optimal parser */
 #include "zstd_opt.h"
 
 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
+#ifdef ZSTD_OPT_H_91842398743
     ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
 }
 
 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
+#ifdef ZSTD_OPT_H_91842398743
     ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
+#else
+    (void)ctx; (void)src; (void)srcSize;
+    return;
+#endif
 }
 
 
@@ -2087,9 +2375,9 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr
 
 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
 {
-    static const ZSTD_blockCompressor blockCompressor[2][6] = {
-        { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
+    static const ZSTD_blockCompressor blockCompressor[2][7] = {
+        { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
+        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
     };
 
     return blockCompressor[extDict][(U32)strat];
@@ -2119,7 +2407,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
     BYTE* op = ostart;
     const U32 maxDist = 1 << cctx->params.cParams.windowLog;
     ZSTD_stats_t* stats = &cctx->seqStore.stats;
-    ZSTD_statsInit(stats);
+    ZSTD_statsInit(stats);   /* debug only */
 
     if (cctx->params.fParams.checksumFlag)
         XXH64_update(&cctx->xxhState, src, srcSize);
@@ -2272,7 +2560,8 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
 
 size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    if (srcSize > ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
+    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << zc->params.cParams.windowLog);
+    if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
     ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", zc->base, zc->params.cParams.searchLength);
     return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 0);
 }
@@ -2300,6 +2589,10 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
         ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
         break;
 
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength);
+        break;
+
     case ZSTD_greedy:
     case ZSTD_lazy:
     case ZSTD_lazy2:
@@ -2414,7 +2707,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc,
 *   @return : 0, or an error code */
 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
                              const void* dict, size_t dictSize,
-                                   ZSTD_parameters params, U64 pledgedSrcSize)
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
 {
     /* compression parameters verification and optimization */
     { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize);
@@ -2426,9 +2719,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
 
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params;
-    memset(&params, 0, sizeof(params));
-    params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
     ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", cctx->base, compressionLevel);
     return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
 }
@@ -2538,11 +2829,9 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
 
 size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params;
-    memset(&params, 0, sizeof(params));
-    ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel);
-    params.cParams =  ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize);
     params.fParams.contentSizeFlag = 1;
+    ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel);
     return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
 }
 
@@ -2577,7 +2866,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_pa
     if (!customMem.customAlloc && !customMem.customFree)
         customMem = defaultCustomMem;
 
-    if (!customMem.customAlloc || !customMem.customFree)
+    if (!customMem.customAlloc || !customMem.customFree)  /* can't have 1/2 custom alloc/free as NULL */
         return NULL;
 
     {   ZSTD_CDict* const cdict = (ZSTD_CDict*) customMem.customAlloc(customMem.opaque, sizeof(*cdict));
@@ -2648,24 +2937,24 @@ unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
 {   /* "default" */
     /* W,  C,  H,  S,  L, TL, strat */
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
-    { 19, 13, 14,  1,  7,  4, ZSTD_fast    },  /* level  1 */
-    { 19, 15, 16,  1,  6,  4, ZSTD_fast    },  /* level  2 */
-    { 20, 18, 20,  1,  6,  4, ZSTD_fast    },  /* level  3 */
-    { 20, 13, 17,  2,  5,  4, ZSTD_greedy  },  /* level  4.*/
-    { 20, 15, 18,  3,  5,  4, ZSTD_greedy  },  /* level  5 */
-    { 21, 16, 19,  2,  5,  4, ZSTD_lazy    },  /* level  6 */
-    { 21, 17, 20,  3,  5,  4, ZSTD_lazy    },  /* level  7 */
-    { 21, 18, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  8.*/
-    { 21, 20, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  9 */
-    { 21, 19, 21,  4,  5,  4, ZSTD_lazy2   },  /* level 10 */
-    { 22, 20, 22,  4,  5,  4, ZSTD_lazy2   },  /* level 11 */
-    { 22, 20, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 12 */
-    { 22, 21, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 13 */
-    { 22, 21, 22,  6,  5,  4, ZSTD_lazy2   },  /* level 14 */
-    { 22, 21, 21,  5,  5,  4, ZSTD_btlazy2 },  /* level 15 */
-    { 23, 22, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 16 */
-    { 23, 23, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 17.*/
+    { 18, 12, 12,  1,  7, 16, ZSTD_fast    },  /* level  0 - not used */
+    { 19, 13, 14,  1,  7, 16, ZSTD_fast    },  /* level  1 */
+    { 19, 15, 16,  1,  6, 16, ZSTD_fast    },  /* level  2 */
+    { 20, 16, 18,  1,  5, 16, ZSTD_dfast   },  /* level  3 */
+    { 20, 13, 17,  2,  5, 16, ZSTD_greedy  },  /* level  4.*/
+    { 20, 15, 18,  3,  5, 16, ZSTD_greedy  },  /* level  5 */
+    { 21, 16, 19,  2,  5, 16, ZSTD_lazy    },  /* level  6 */
+    { 21, 17, 20,  3,  5, 16, ZSTD_lazy    },  /* level  7 */
+    { 21, 18, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  8.*/
+    { 21, 20, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 21, 19, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
+    { 22, 20, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
+    { 22, 20, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 13 */
+    { 22, 21, 22,  6,  5, 16, ZSTD_lazy2   },  /* level 14 */
+    { 22, 21, 21,  5,  5, 16, ZSTD_btlazy2 },  /* level 15 */
+    { 23, 22, 22,  5,  5, 16, ZSTD_btlazy2 },  /* level 16 */
+    { 23, 23, 22,  5,  5, 16, ZSTD_btlazy2 },  /* level 17.*/
     { 23, 23, 22,  6,  5, 24, ZSTD_btopt   },  /* level 18.*/
     { 23, 23, 22,  6,  3, 48, ZSTD_btopt   },  /* level 19.*/
     { 25, 26, 23,  7,  3, 64, ZSTD_btopt   },  /* level 20.*/
@@ -2674,7 +2963,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
 },
 {   /* for srcSize <= 256 KB */
     /* W,  C,  H,  S,  L,  T, strat */
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 */
+    { 18, 12, 12,  1,  7,  4, ZSTD_fast    },  /* level  0 - not used */
     { 18, 13, 14,  1,  6,  4, ZSTD_fast    },  /* level  1 */
     { 18, 15, 17,  1,  5,  4, ZSTD_fast    },  /* level  2 */
     { 18, 13, 15,  1,  5,  4, ZSTD_greedy  },  /* level  3.*/
@@ -2700,7 +2989,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
 },
 {   /* for srcSize <= 128 KB */
     /* W,  C,  H,  S,  L,  T, strat */
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
+    { 17, 12, 12,  1,  7,  4, ZSTD_fast    },  /* level  0 - not used */
     { 17, 12, 13,  1,  6,  4, ZSTD_fast    },  /* level  1 */
     { 17, 13, 16,  1,  5,  4, ZSTD_fast    },  /* level  2 */
     { 17, 13, 14,  2,  5,  4, ZSTD_greedy  },  /* level  3 */
@@ -2726,16 +3015,16 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
 },
 {   /* for srcSize <= 16 KB */
     /* W,  C,  H,  S,  L,  T, strat */
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 -- never used */
-    { 14, 14, 14,  1,  4,  4, ZSTD_fast    },  /* level  1 */
-    { 14, 14, 15,  1,  4,  4, ZSTD_fast    },  /* level  2 */
-    { 14, 14, 14,  4,  4,  4, ZSTD_greedy  },  /* level  3.*/
-    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  4.*/
-    { 14, 14, 14,  4,  4,  4, ZSTD_lazy2   },  /* level  5 */
-    { 14, 14, 14,  5,  4,  4, ZSTD_lazy2   },  /* level  6 */
-    { 14, 14, 14,  6,  4,  4, ZSTD_lazy2   },  /* level  7.*/
-    { 14, 14, 14,  7,  4,  4, ZSTD_lazy2   },  /* level  8.*/
-    { 14, 15, 14,  6,  4,  4, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 12, 12,  1,  7,  6, ZSTD_fast    },  /* level  0 - not used */
+    { 14, 14, 14,  1,  6,  6, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 14,  1,  4,  6, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 14,  1,  4,  6, ZSTD_dfast   },  /* level  3.*/
+    { 14, 14, 14,  4,  4,  6, ZSTD_greedy  },  /* level  4.*/
+    { 14, 14, 14,  3,  4,  6, ZSTD_lazy    },  /* level  5.*/
+    { 14, 14, 14,  4,  4,  6, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  5,  4,  6, ZSTD_lazy2   },  /* level  7 */
+    { 14, 14, 14,  6,  4,  6, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  6,  4,  6, ZSTD_btlazy2 },  /* level  9.*/
     { 14, 15, 14,  3,  3,  6, ZSTD_btopt   },  /* level 10.*/
     { 14, 15, 14,  6,  3,  8, ZSTD_btopt   },  /* level 11.*/
     { 14, 15, 14,  6,  3, 16, ZSTD_btopt   },  /* level 12.*/
@@ -2755,7 +3044,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
 /*! ZSTD_getCParams() :
 *   @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
 *   Size values are optional, provide 0 if not known or unused */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize)
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
 {
     ZSTD_compressionParameters cp;
     size_t const addedSize = srcSize ? 0 : 500;
@@ -2772,3 +3061,14 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, si
     cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
     return cp;
 }
+
+/*! ZSTD_getParams() :
+*   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
+*   All fields of `ZSTD_frameParameters` are set to default (0) */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) {
+    ZSTD_parameters params;
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
+    memset(&params, 0, sizeof(params));
+    params.cParams = cParams;
+    return params;
+}
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index 97b1623ba..ef394f198 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -34,6 +34,10 @@
 /* Note : this file is intended to be included within zstd_compress.c */
 
 
+#ifndef ZSTD_OPT_H_91842398743
+#define ZSTD_OPT_H_91842398743
+
+
 #define ZSTD_FREQ_DIV   5
 
 /*-*************************************
@@ -110,7 +114,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
 
     /* literals */
     if (ssPtr->cachedLiterals == literals) {
-        U32 additional = litLength - ssPtr->cachedLitLength;
+        U32 const additional = litLength - ssPtr->cachedLitLength;
         const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
         price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
         for (u=0; u < additional; u++)
@@ -150,7 +154,7 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
 {
     /* offset */
-    BYTE offCode = (BYTE)ZSTD_highbit32(offset+1);
+    BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
     U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
 
     /* match Length */
@@ -196,7 +200,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
     }
 
     /* match offset */
-	{   BYTE offCode = (BYTE)ZSTD_highbit32(offset+1);
+	{   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
 		seqStorePtr->offCodeSum++;
 		seqStorePtr->offCodeFreq[offCode]++;
 	}
@@ -232,7 +236,6 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
 
 
 
-
 /* Update hashTable3 up to ip (excluded)
    Assumption : always within prefix (ie. not within extDict) */
 FORCE_INLINE
@@ -1039,3 +1042,5 @@ _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
         seqStorePtr->lit += lastLLSize;
     }
 }
+
+#endif  /* ZSTD_OPT_H_91842398743 */
diff --git a/lib/decompress/zbuff_decompress.c b/lib/decompress/zbuff_decompress.c
index b6e1806e3..e74fb5d1f 100644
--- a/lib/decompress/zbuff_decompress.c
+++ b/lib/decompress/zbuff_decompress.c
@@ -173,7 +173,7 @@ size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
                     if (ZSTD_isError(hSize)) return hSize;
                     if (toLoad > (size_t)(iend-ip)) {   /* not enough input to load full header */
                         memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
-                        zbd->lhSize += iend-ip; ip = iend; notDone = 0;
+                        zbd->lhSize += iend-ip;
                         *dstCapacityPtr = 0;
                         return (hSize - zbd->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
                     }
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 37aa403ff..7ccfb963d 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -135,7 +135,9 @@ struct ZSTD_DCtx_s
     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
 };  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
 
-size_t ZSTD_sizeofDCtx (void) { return sizeof(ZSTD_DCtx); }   /* non published interface */
+size_t ZSTD_sizeofDCtx (const ZSTD_DCtx* dctx) { return sizeof(*dctx); }
+
+size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
 
 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
 {
@@ -195,7 +197,7 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 /* Frame format description
    Frame Header -  [ Block Header - Block ] - Frame End
    1) Frame Header
-      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h)
+      - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd.h)
       - 1 byte  - Frame Descriptor
    2) Block Header
       - 3 bytes, starting with a 2-bits descriptor
@@ -207,20 +209,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 */
 
 
-/* Frame descriptor
+/* Frame Header :
 
-    // old
-   1 byte - Alloc :
-   bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN   (see zstd_internal.h)
-   bit 4   : reserved for windowLog (must be zero)
-   bit 5   : reserved (must be zero)
-   bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes
-
-   1 byte - checker :
-   bit 0-1 : dictID (0, 1, 2 or 4 bytes)
-   bit 2-7 : reserved (must be zero)
-
-    // new
    1 byte - FrameHeaderDescription :
    bit 0-1 : dictID (0, 1, 2 or 4 bytes)
    bit 2   : checksumFlag
@@ -403,6 +393,26 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
 }
 
 
+/** ZSTD_getDecompressedSize() :
+*   compatible with legacy mode
+*   @return : decompressed size if known, 0 otherwise
+              note : 0 can mean any of the following :
+                   - decompressed size is not provided within frame header
+                   - frame header unknown / not supported
+                   - frame header not completely provided (`srcSize` too small) */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
+{
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_getDecompressedSize_legacy(src, srcSize);
+#endif
+    {   ZSTD_frameParams fparams;
+        size_t const frResult = ZSTD_getFrameParams(&fparams, src, srcSize);
+        if (frResult!=0) return 0;
+        return fparams.frameContentSize;
+    }
+}
+
+
 /** ZSTD_decodeFrameHeader() :
 *   `srcSize` must be the size provided by ZSTD_frameHeaderSize().
 *   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
@@ -454,16 +464,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                           const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
 {
     const BYTE* const istart = (const BYTE*) src;
-    litBlockType_t lbt;
 
     if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
-    lbt = (litBlockType_t)(istart[0]>> 6);
 
-    switch(lbt)
+    switch((litBlockType_t)(istart[0]>> 6))
     {
     case lbt_huffman:
         {   size_t litSize, litCSize, singleStream=0;
-            U32 lhSize = ((istart[0]) >> 4) & 3;
+            U32 lhSize = (istart[0] >> 4) & 3;
             if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for lhSize, + cSize (+nbSeq) */
             switch(lhSize)
             {
@@ -643,7 +651,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr,
 
     /* FSE table descriptors */
     {   U32 const LLtype  = *ip >> 6;
-        U32 const Offtype = (*ip >> 4) & 3;
+        U32 const OFtype = (*ip >> 4) & 3;
         U32 const MLtype  = (*ip >> 2) & 3;
         ip++;
 
@@ -651,17 +659,17 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr,
         if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
 
         /* Build DTables */
-        {   size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
-            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
-            ip += bhSize;
+        {   size_t const llhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
+            ip += llhSize;
         }
-        {   size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable);
-            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
-            ip += bhSize;
+        {   size_t const ofhSize = ZSTD_buildSeqTable(DTableOffb, OFtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
+            ip += ofhSize;
         }
-        {   size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable);
-            if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
-            ip += bhSize;
+        {   size_t const mlhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable);
+            if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
+            ip += mlhSize;
     }   }
 
     return ip-istart;
@@ -702,42 +710,37 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
                             0x2000, 0x4000, 0x8000, 0x10000 };
 
     static const U32 ML_base[MaxML+1] = {
-                             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,   11,    12,    13,    14,    15,
-                            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,   27,    28,    29,    30,    31,
-                            32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800,
-                            0x1000, 0x2000, 0x4000, 0x8000, 0x10000 };
+                             3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,   14,    15,    16,    17,    18,
+                            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,   30,    31,    32,    33,    34,
+                            35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                            0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
 
     static const U32 OF_base[MaxOff+1] = {
-                 0,        1,       3,       7,     0xF,     0x1F,     0x3F,     0x7F,
-                 0xFF,   0x1FF,   0x3FF,   0x7FF,   0xFFF,   0x1FFF,   0x3FFF,   0x7FFF,
-                 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
-                 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1 };
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
 
     /* sequence */
     {   size_t offset;
         if (!ofCode)
             offset = 0;
         else {
-            offset = OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits);   /* <=  26 bits */
+            offset = OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
             if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
         }
 
-        if (offset < ZSTD_REP_NUM) {
-            if (llCode == 0 && offset <= 1) offset = 1-offset;
-
-            if (offset != 0) {
-                size_t temp = seqState->prevOffset[offset];
-                if (offset != 1) {
-                    seqState->prevOffset[2] = seqState->prevOffset[1];
-                }
+        if (ofCode <= 1) {
+            if ((llCode == 0) & (offset <= 1)) offset = 1-offset;
+            if (offset) {
+                size_t const temp = seqState->prevOffset[offset];
+                if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
                 seqState->prevOffset[1] = seqState->prevOffset[0];
                 seqState->prevOffset[0] = offset = temp;
-
             } else {
                 offset = seqState->prevOffset[0];
             }
         } else {
-            offset -= ZSTD_REP_MOVE;
             seqState->prevOffset[2] = seqState->prevOffset[1];
             seqState->prevOffset[1] = seqState->prevOffset[0];
             seqState->prevOffset[0] = offset;
@@ -745,11 +748,11 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
         seq.offset = offset;
     }
 
-    seq.matchLength = ML_base[mlCode] + MINMATCH + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0);   /* <=  16 bits */
+    seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0);   /* <=  16 bits */
     if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream));
 
     seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0);   /* <=  16 bits */
-    if (MEM_32bits() |
+    if (MEM_32bits() ||
        (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream));
 
     /* ANS state update */
@@ -930,12 +933,25 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
                             void* dst, size_t dstCapacity,
                       const void* src, size_t srcSize)
 {
+    size_t dSize;
     ZSTD_checkContinuity(dctx, dst);
-    return ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
 }
 
 
-size_t ZSTD_generateNxByte(void* dst, size_t dstCapacity, BYTE byte, size_t length)
+/** ZSTD_insertBlock() :
+    insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+    ZSTD_checkContinuity(dctx, blockStart);
+    dctx->previousDstEnd = (const char*)blockStart + blockSize;
+    return blockSize;
+}
+
+
+size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
 {
     if (length > dstCapacity) return ERROR(dstSize_tooSmall);
     memset(dst, byte, length);
@@ -987,7 +1003,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
             decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
             break;
         case bt_rle :
-            decodedSize = ZSTD_generateNxByte(op, oend-op, *ip, blockProperties.origSize);
+            decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize);
             break;
         case bt_end :
             /* end of frame */
@@ -997,7 +1013,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         default:
             return ERROR(GENERIC);   /* impossible */
         }
-        if (cBlockSize == 0) break;   /* bt_end */
+        if (blockProperties.blockType == bt_end) break;   /* bt_end */
 
         if (ZSTD_isError(decodedSize)) return decodedSize;
         if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize);
@@ -1031,10 +1047,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
                                  const void* dict, size_t dictSize)
 {
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-    {   U32 const magicNumber = MEM_readLE32(src);
-        if (ZSTD_isLegacy(magicNumber))
-            return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize, magicNumber);
-    }
+    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
 #endif
     ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
     ZSTD_checkContinuity(dctx, dst);
@@ -1273,8 +1286,8 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t
 
 
 struct ZSTD_DDict_s {
-    void* dictContent;
-    size_t dictContentSize;
+    void* dict;
+    size_t dictSize;
     ZSTD_DCtx* refContext;
 };  /* typedef'd tp ZSTD_CDict within zstd.h */
 
@@ -1306,8 +1319,8 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_cu
                 return NULL;
         }   }
 
-        ddict->dictContent = dictContent;
-        ddict->dictContentSize = dictSize;
+        ddict->dict = dictContent;
+        ddict->dictSize = dictSize;
         ddict->refContext = dctx;
         return ddict;
     }
@@ -1327,7 +1340,7 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
     ZSTD_freeFunction const cFree = ddict->refContext->customMem.customFree;
     void* const opaque = ddict->refContext->customMem.opaque;
     ZSTD_freeDCtx(ddict->refContext);
-    cFree(opaque, ddict->dictContent);
+    cFree(opaque, ddict->dict);
     cFree(opaque, ddict);
     return 0;
 }
@@ -1340,6 +1353,9 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
                                      const void* src, size_t srcSize,
                                      const ZSTD_DDict* ddict)
 {
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
+    if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize);
+#endif
     return ZSTD_decompress_usingPreparedDCtx(dctx, ddict->refContext,
                                            dst, dstCapacity,
                                            src, srcSize);
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 031509453..d3d4bec7e 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -31,14 +31,15 @@
     - Zstd homepage : https://www.zstd.net
 */
 
+/*-**************************************
+*  Tuning parameters
+****************************************/
+#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
+
+
 /*-**************************************
 *  Compiler Options
 ****************************************/
-/* Disable some Visual warning messages */
-#ifdef _MSC_VER
-#  pragma warning(disable : 4127)                /* disable: C4127: conditional expression is constant */
-#endif
-
 /* Unix Large Files support (>4GB) */
 #define _FILE_OFFSET_BITS 64
 #if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
@@ -58,13 +59,15 @@
 
 #include "mem.h"           /* read */
 #include "error_private.h"
-#include "fse.h"
+#include "fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
 #define HUF_STATIC_LINKING_ONLY
 #include "huf.h"
 #include "zstd_internal.h" /* includes zstd.h */
 #include "xxhash.h"
 #include "divsufsort.h"
-#define ZDICT_STATIC_LINKING_ONLY
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
 #include "zdict.h"
 
 
@@ -91,17 +94,19 @@ static const size_t g_min_fast_dictContent = 192;
 /*-*************************************
 *  Console display
 ***************************************/
-#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 static unsigned g_displayLevel = 0;   /* 0 : no display;   1: errors;   2: default;  4: full information */
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if (ZDICT_GetMilliSpan(g_time) > refreshRate)  \
+            if (ZDICT_clockSpan(g_time) > refreshRate)  \
             { g_time = clock(); DISPLAY(__VA_ARGS__); \
             if (g_displayLevel>=4) fflush(stdout); } }
-static const unsigned refreshRate = 300;
+static const clock_t refreshRate = CLOCKS_PER_SEC * 3 / 10;
 static clock_t g_time = 0;
 
+static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
+
 static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length)
 {
     const BYTE* const b = (const BYTE*)ptr;
@@ -117,13 +122,6 @@ static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length)
 /*-********************************************************
 *  Helper functions
 **********************************************************/
-static unsigned ZDICT_GetMilliSpan(clock_t nPrevious)
-{
-    clock_t nCurrent = clock();
-    unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
-    return nSpan;
-}
-
 unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
 
 const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
@@ -286,7 +284,7 @@ static dictItem ZDICT_analyzePos(
         U32 refinedEnd = end;
 
         DISPLAYLEVEL(4, "\n");
-        DISPLAYLEVEL(4, "found %3u matches of length >= %u at pos %7u  ", (U32)(end-start), MINMATCHLENGTH, (U32)pos);
+        DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u  ", (U32)(end-start), MINMATCHLENGTH, (U32)pos);
         DISPLAYLEVEL(4, "\n");
 
         for (searchLength = MINMATCHLENGTH ; ; searchLength++) {
@@ -489,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
 
 
 static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
-                            const void* const buffer, const size_t bufferSize,   /* buffer must end with noisy guard band */
+                            const void* const buffer, size_t bufferSize,   /* buffer must end with noisy guard band */
                             const size_t* fileSizes, unsigned nbFiles,
                             U32 shiftRatio, unsigned maxDictSize)
 {
@@ -499,7 +497,6 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
     BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks));   /* +16 for overflow security */
     U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
     U32 minRatio = nbFiles >> shiftRatio;
-    int divSuftSortResult;
     size_t result = 0;
 
     /* init */
@@ -511,15 +508,19 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
     if (minRatio < MINRATIO) minRatio = MINRATIO;
     memset(doneMarks, 0, bufferSize+16);
 
+    /* limit sample set size (divsufsort limitation)*/
+    if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20));
+    while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
+
     /* sort */
     DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
-    divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
-    if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
+    {   int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
+        if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
+    }
     suffix[bufferSize] = (int)bufferSize;   /* leads into noise */
     suffix0[0] = (int)bufferSize;           /* leads into noise */
-    {
-        /* build reverse suffix sort */
-        size_t pos;
+    /* build reverse suffix sort */
+    {   size_t pos;
         for (pos=0; pos < bufferSize; pos++)
             reverseSuffix[suffix[pos]] = (U32)pos;
         /* build file pos */
@@ -580,14 +581,17 @@ typedef struct
 
 #define MAXREPOFFSET 1024
 
-static void ZDICT_countEStats(EStats_ress_t esr,
+static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
                             U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
                             const void* src, size_t srcSize)
 {
+    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
     size_t cSize;
 
-    if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX;   /* protection vs large samples */
-    ZSTD_copyCCtx(esr.zc, esr.ref);
+    if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
+	{	size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref);
+		if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
+	}
     cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
     if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
 
@@ -685,7 +689,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
     EStats_ress_t esr;
     ZSTD_parameters params;
-    U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
+    U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
     size_t pos = 0, errorCode;
     size_t eSize = 0;
     size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
@@ -708,14 +712,17 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
             goto _cleanup;
     }
     if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
-    params.cParams = ZSTD_getCParams(compressionLevel, averageSampleSize, dictBufferSize);
-    params.cParams.strategy = ZSTD_greedy;
-    params.fParams.contentSizeFlag = 0;
-    ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
+    params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
+	{	size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
+		if (ZSTD_isError(beginResult)) {
+			eSize = ERROR(GENERIC);
+			DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed ");
+			goto _cleanup;
+	}	}
 
     /* collect stats on all files */
     for (u=0; u<nbFiles; u++) {
-        ZDICT_countEStats(esr,
+        ZDICT_countEStats(esr, params,
                         countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
            (const char*)srcBuffer + pos, fileSizes[u]);
         pos += fileSizes[u];
@@ -887,7 +894,8 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
     /* dictionary header */
     MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
     {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
-        U32 const dictID = params.dictID ? params.dictID : (U32)(randomID>>11);
+        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
+        U32 const dictID = params.dictID ? params.dictID : compliantID;
         MEM_writeLE32((char*)dictBuffer+4, dictID);
     }
     hSize = 8;
@@ -905,6 +913,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
     return MIN(dictBufferCapacity, hSize+dictContentSize);
 }
 
+
 #define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
 /*! ZDICT_trainFromBuffer_unsafe() :
 *   `samplesBuffer` must be followed by noisy guard band.
@@ -923,12 +932,12 @@ size_t ZDICT_trainFromBuffer_unsafe(
     size_t dictSize = 0;
 
     /* checks */
-    if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) return ERROR(dstSize_tooSmall);
     if (!dictList) return ERROR(memory_allocation);
+    if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
 
     /* init */
     { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
-    if (sBuffSize < DIB_MINSAMPLESSIZE) return 0;   /* not enough source to create dictionary */
+    if (sBuffSize < DIB_MINSAMPLESSIZE) { free(dictList); return 0; }   /* not enough source to create dictionary */
     ZDICT_initDictItem(dictList);
     g_displayLevel = params.notificationLevel;
     if (selectivity==0) selectivity = g_selectivity_default;
@@ -966,7 +975,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
             for (u=1; u<dictList->pos; u++) {
                 U32 l = dictList[u].length;
                 ptr -= l;
-                if (ptr<(BYTE*)dictBuffer) return ERROR(GENERIC);   /* should not happen */
+                if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); }   /* should not happen */
                 memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
         }   }
 
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h
index 39acdf852..b96b828f7 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -84,10 +84,6 @@ const char* ZDICT_getErrorName(size_t errorCode);
  * Use them only in association with static linking.
  * ==================================================================================== */
 
-
-/*-*************************************
-*  Public type
-***************************************/
 typedef struct {
     unsigned selectivityLevel;   /* 0 means default; larger => bigger selection => larger dictionary */
     unsigned compressionLevel;   /* 0 means default; target a specific zstd compression level */
@@ -97,9 +93,6 @@ typedef struct {
 } ZDICT_params_t;
 
 
-/*-*************************************
-*  Public functions
-***************************************/
 /*! ZDICT_trainFromBuffer_advanced() :
     Same as ZDICT_trainFromBuffer() with control over more parameters.
     `parameters` is optional and can be provided with values set to 0 to mean "default".
@@ -117,4 +110,4 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
 }
 #endif
 
-#endif
+#endif   /* DICTBUILDER_H_001 */
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index 22921beca..ab9634b32 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -54,8 +54,11 @@ extern "C" {
     @return : > 0 if supported by legacy decoder. 0 otherwise.
               return value is the version.
 */
-MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE)
+MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
 {
+    U32 magicNumberLE;
+    if (srcSize<4) return 0;
+    magicNumberLE = MEM_readLE32(src);
     switch(magicNumberLE)
     {
         case ZSTDv01_magicNumberLE:return 1;
@@ -69,23 +72,45 @@ MEM_STATIC unsigned ZSTD_isLegacy (U32 magicNumberLE)
 }
 
 
+MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize)
+{
+    if (srcSize < 4) return 0;
+
+    {   U32 const version = ZSTD_isLegacy(src, srcSize);
+        if (version < 5) return 0;  /* no decompressed size in frame header, or not a legacy format */
+        if (version==5) {
+            ZSTDv05_parameters fParams;
+            size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize);
+            if (frResult != 0) return 0;
+            return fParams.srcSize;
+        }
+        if (version==6) {
+            ZSTDv06_frameParams fParams;
+            size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize);
+            if (frResult != 0) return 0;
+            return fParams.frameContentSize;
+        }
+        return 0;   /* should not be possible */
+    }
+}
+
 MEM_STATIC size_t ZSTD_decompressLegacy(
                      void* dst, size_t dstCapacity,
                const void* src, size_t compressedSize,
-               const void* dict,size_t dictSize,
-                     U32 magicNumberLE)
+               const void* dict,size_t dictSize)
 {
-    switch(magicNumberLE)
+    U32 const version = ZSTD_isLegacy(src, compressedSize);
+    switch(version)
     {
-        case ZSTDv01_magicNumberLE :
+        case 1 :
             return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
-        case ZSTDv02_magicNumber :
+        case 2 :
             return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
-        case ZSTDv03_magicNumber :
+        case 3 :
             return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
-        case ZSTDv04_magicNumber :
+        case 4 :
             return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
-        case ZSTDv05_MAGICNUMBER :
+        case 5 :
             {   size_t result;
                 ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx();
                 if (zd==NULL) return ERROR(memory_allocation);
@@ -93,7 +118,7 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
                 ZSTDv05_freeDCtx(zd);
                 return result;
             }
-        case ZSTDv06_MAGICNUMBER :
+        case 6 :
             {   size_t result;
                 ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx();
                 if (zd==NULL) return ERROR(memory_allocation);
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 35469048a..eeb7cc1c3 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -3620,36 +3620,26 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
     switch (ctx->stage)
     {
     case ZSTDds_getFrameHeaderSize :
-        {
-            /* get frame header size */
-            if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
-            ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
-            if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
-            memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
-            if (ctx->headerSize > ZSTD_frameHeaderSize_min)
-            {
-                ctx->expected = ctx->headerSize - ZSTD_frameHeaderSize_min;
-                ctx->stage = ZSTDds_decodeFrameHeader;
-                return 0;
-            }
-            ctx->expected = 0;   /* not necessary to copy more */
-        }
+        /* get frame header size */
+        if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+        ctx->headerSize = ZSTD_decodeFrameHeader_Part1(ctx, src, ZSTD_frameHeaderSize_min);
+        if (ZSTD_isError(ctx->headerSize)) return ctx->headerSize;
+        memcpy(ctx->headerBuffer, src, ZSTD_frameHeaderSize_min);
+        if (ctx->headerSize > ZSTD_frameHeaderSize_min) return ERROR(GENERIC);   /* impossible */
+        ctx->expected = 0;   /* not necessary to copy more */
+        /* fallthrough */
     case ZSTDds_decodeFrameHeader:
-        {
-            /* get frame header */
-            size_t result;
-            memcpy(ctx->headerBuffer + ZSTD_frameHeaderSize_min, src, ctx->expected);
-            result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
+        /* get frame header */
+        {   size_t const result = ZSTD_decodeFrameHeader_Part2(ctx, ctx->headerBuffer, ctx->headerSize);
             if (ZSTD_isError(result)) return result;
             ctx->expected = ZSTD_blockHeaderSize;
             ctx->stage = ZSTDds_decodeBlockHeader;
             return 0;
         }
     case ZSTDds_decodeBlockHeader:
-        {
-            /* Decode block header */
-            blockProperties_t bp;
-            size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        /* Decode block header */
+        {   blockProperties_t bp;
+            size_t const blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
             if (ZSTD_isError(blockSize)) return blockSize;
             if (bp.blockType == bt_end)
             {
@@ -3864,11 +3854,9 @@ static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDs
 
         case ZBUFFds_readHeader :
             /* read header from src */
-            {
-                size_t headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr);
+            {   size_t const headerSize = ZSTD_getFrameParams(&(zbc->params), src, *srcSizePtr);
                 if (ZSTD_isError(headerSize)) return headerSize;
-                if (headerSize)
-                {
+                if (headerSize) {
                     /* not enough input to decode header : tell how many bytes would be necessary */
                     memcpy(zbc->headerBuffer+zbc->hPos, src, *srcSizePtr);
                     zbc->hPos += *srcSizePtr;
@@ -3882,8 +3870,7 @@ static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDs
 
         case ZBUFFds_loadHeader:
             /* complete header from src */
-            {
-                size_t headerSize = ZBUFF_limitCopy(
+            {   size_t headerSize = ZBUFF_limitCopy(
                     zbc->headerBuffer + zbc->hPos, ZSTD_frameHeaderSize_max - zbc->hPos,
                     src, *srcSizePtr);
                 zbc->hPos += headerSize;
@@ -3895,12 +3882,12 @@ static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDs
                     *maxDstSizePtr = 0;
                     return headerSize - zbc->hPos;
             }   }
+            /* intentional fallthrough */
 
         case ZBUFFds_decodeHeader:
                 /* apply header to create / resize buffers */
-                {
-                    size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
-                    size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
+                {   size_t const neededOutSize = (size_t)1 << zbc->params.windowLog;
+                    size_t const neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
                     if (zbc->inBuffSize < neededInSize) {
                         free(zbc->inBuff);
                         zbc->inBuffSize = neededInSize;
@@ -4067,3 +4054,11 @@ size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDs
 {
     return ZBUFF_decompressContinue(dctx, dst, maxDstSizePtr, src, srcSizePtr);
 }
+
+ZSTD_DCtx* ZSTDv04_createDCtx(void) { return ZSTD_createDCtx(); }
+size_t ZSTDv04_freeDCtx(ZSTD_DCtx* dctx) { return ZSTD_freeDCtx(dctx); }
+
+size_t ZSTDv04_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameParams(params, src, srcSize);
+}
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 9c57d18fc..f3c720fd2 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -3872,25 +3872,17 @@ size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t maxDstSi
     switch (dctx->stage)
     {
     case ZSTDv05ds_getFrameHeaderSize :
-        {
-            /* get frame header size */
-            if (srcSize != ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
-            dctx->headerSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
-            if (ZSTDv05_isError(dctx->headerSize)) return dctx->headerSize;
-            memcpy(dctx->headerBuffer, src, ZSTDv05_frameHeaderSize_min);
-            if (dctx->headerSize > ZSTDv05_frameHeaderSize_min) {
-                dctx->expected = dctx->headerSize - ZSTDv05_frameHeaderSize_min;
-                dctx->stage = ZSTDv05ds_decodeFrameHeader;
-                return 0;
-            }
-            dctx->expected = 0;   /* not necessary to copy more */
-        }
+        /* get frame header size */
+        if (srcSize != ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);   /* impossible */
+        dctx->headerSize = ZSTDv05_decodeFrameHeader_Part1(dctx, src, ZSTDv05_frameHeaderSize_min);
+        if (ZSTDv05_isError(dctx->headerSize)) return dctx->headerSize;
+        memcpy(dctx->headerBuffer, src, ZSTDv05_frameHeaderSize_min);
+        if (dctx->headerSize > ZSTDv05_frameHeaderSize_min) return ERROR(GENERIC); /* should never happen */
+        dctx->expected = 0;   /* not necessary to copy more */
+        /* fallthrough */
     case ZSTDv05ds_decodeFrameHeader:
-        {
-            /* get frame header */
-            size_t result;
-            memcpy(dctx->headerBuffer + ZSTDv05_frameHeaderSize_min, src, dctx->expected);
-            result = ZSTDv05_decodeFrameHeader_Part2(dctx, dctx->headerBuffer, dctx->headerSize);
+        /* get frame header */
+        {   size_t const result = ZSTDv05_decodeFrameHeader_Part2(dctx, dctx->headerBuffer, dctx->headerSize);
             if (ZSTDv05_isError(result)) return result;
             dctx->expected = ZSTDv05_blockHeaderSize;
             dctx->stage = ZSTDv05ds_decodeBlockHeader;
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index 2640c86b3..ce6967eb8 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -36,7 +36,7 @@
 #include "zstd_v06.h"
 #include <stddef.h>    /* size_t, ptrdiff_t */
 #include <string.h>    /* memcpy */
-#include <stdlib.h>    /* malloc, free, qsort */ 
+#include <stdlib.h>    /* malloc, free, qsort */
 
 
 
@@ -535,8 +535,6 @@ ZSTDLIB_API size_t ZSTDv06_decompress_usingPreparedDCtx(
 
 
 
-struct ZSTDv06_frameParams_s { U64 frameContentSize; U32 windowLog; };
-
 #define ZSTDv06_FRAMEHEADERSIZE_MAX 13    /* for static allocation */
 static const size_t ZSTDv06_frameHeaderSize_min = 5;
 static const size_t ZSTDv06_frameHeaderSize_max = ZSTDv06_FRAMEHEADERSIZE_MAX;
diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h
index 55619bef2..177f14834 100644
--- a/lib/legacy/zstd_v06.h
+++ b/lib/legacy/zstd_v06.h
@@ -107,7 +107,7 @@ ZSTDLIB_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx,
 /*-************************
 *  Advanced Streaming API
 ***************************/
-
+struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; };
 typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams;
 
 ZSTDLIB_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
diff --git a/lib/common/zstd.h b/lib/zstd.h
similarity index 80%
rename from lib/common/zstd.h
rename to lib/zstd.h
index d6a1cceea..a4b94ec5b 100644
--- a/lib/common/zstd.h
+++ b/lib/zstd.h
@@ -61,7 +61,7 @@ extern "C" {
 ***************************************/
 #define ZSTD_VERSION_MAJOR    0
 #define ZSTD_VERSION_MINOR    7
-#define ZSTD_VERSION_RELEASE  1
+#define ZSTD_VERSION_RELEASE  5
 
 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
 #define ZSTD_QUOTE(str) #str
@@ -85,9 +85,14 @@ ZSTDLIB_API size_t ZSTD_compress(   void* dst, size_t dstCapacity,
                               const void* src, size_t srcSize,
                                      int  compressionLevel);
 
+/** ZSTD_getDecompressedSize() :
+*   @return : decompressed size if known, 0 otherwise.
+        note : to know precise reason why result is `0`, follow up with ZSTD_getFrameParams() */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
 /*! ZSTD_decompress() :
-    `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
-    `dstCapacity` must be large enough, equal or larger than originalSize.
+    `compressedSize` : is the _exact_ size of compressed input, otherwise decompression will fail.
+    `dstCapacity` must be equal or larger than originalSize.
     @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
               or an errorCode if it fails (which can be tested using ZSTD_isError()) */
 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
@@ -197,22 +202,20 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
  * Use them only in association with static linking.
  * ==================================================================================== */
 
-/*--- Dependency ---*/
-#include "mem.h"   /* U32 */
-
-
 /*--- Constants ---*/
 #define ZSTD_MAGICNUMBER            0xFD2FB527   /* v0.7 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
 
-#define ZSTD_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? 25 : 27))
+#define ZSTD_WINDOWLOG_MAX_32  25
+#define ZSTD_WINDOWLOG_MAX_64  27
+#define ZSTD_WINDOWLOG_MAX    ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
 #define ZSTD_WINDOWLOG_MIN     18
 #define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
 #define ZSTD_CHAINLOG_MIN       4
 #define ZSTD_HASHLOG_MAX       ZSTD_WINDOWLOG_MAX
 #define ZSTD_HASHLOG_MIN       12
 #define ZSTD_HASHLOG3_MAX      17
-#define ZSTD_HASHLOG3_MIN      15
+//#define ZSTD_HASHLOG3_MIN      15
 #define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
 #define ZSTD_SEARCHLOG_MIN      1
 #define ZSTD_SEARCHLENGTH_MAX   7
@@ -227,22 +230,22 @@ static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable f
 
 
 /*--- Types ---*/
-typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   /*< from faster to stronger */
+typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   /*< from faster to stronger */
 
 typedef struct {
-    U32 windowLog;     /*< largest match distance : larger == more compression, more memory needed during decompression */
-    U32 chainLog;      /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
-    U32 hashLog;       /*< dispatch table : larger == faster, more memory */
-    U32 searchLog;     /*< nb of searches : larger == more compression, slower */
-    U32 searchLength;  /*< match length searched : larger == faster decompression, sometimes less compression */
-    U32 targetLength;  /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    unsigned windowLog;      /*< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;       /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;        /*< dispatch table : larger == faster, more memory */
+    unsigned searchLog;      /*< nb of searches : larger == more compression, slower */
+    unsigned searchLength;   /*< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;   /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
     ZSTD_strategy strategy;
 } ZSTD_compressionParameters;
 
 typedef struct {
-    U32 contentSizeFlag;  /*< 1: content size will be in frame header (if known). */
-    U32 checksumFlag;     /*< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
-    U32 noDictIDFlag;     /*< 1: no dict ID will be saved into frame header (if dictionary compression) */
+    unsigned contentSizeFlag; /*< 1: content size will be in frame header (if known). */
+    unsigned checksumFlag;    /*< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
+    unsigned noDictIDFlag;    /*< 1: no dict ID will be saved into frame header (if dictionary compression) */
 } ZSTD_frameParameters;
 
 typedef struct {
@@ -259,6 +262,11 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
 /*-*************************************
 *  Advanced compression functions
 ***************************************/
+/*! ZSTD_estimateCCtxSize() :
+ *  Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters.
+ *  `frameContentSize` is an optional parameter, provide `0` if unknown */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams);
+
 /*! ZSTD_createCCtx_advanced() :
  *  Create a ZSTD compression context using external alloc and free functions */
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
@@ -268,21 +276,30 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
                                                   ZSTD_parameters params, ZSTD_customMem customMem);
 
+/*! ZSTD_sizeofCCtx() :
+ *  Gives the amount of memory used by a given ZSTD_CCtx */
+ZSTDLIB_API size_t ZSTD_sizeofCCtx(const ZSTD_CCtx* cctx);
+
 ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
 
+/*! ZSTD_getParams() :
+*   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
+*   All fields of `ZSTD_frameParameters` are set to default (0) */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+
 /*! ZSTD_getCParams() :
 *   @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
 *   `srcSize` value is optional, select 0 if not known */
-ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize);
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
 
-/*! ZSTD_checkParams() :
+/*! ZSTD_checkCParams() :
 *   Ensure param values remain within authorized range */
 ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 
-/*! ZSTD_adjustParams() :
+/*! ZSTD_adjustCParams() :
 *   optimize params for a given `srcSize` and `dictSize`.
 *   both values are optional, select `0` if unknown. */
-ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, U64 srcSize, size_t dictSize);
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
 
 /*! ZSTD_compress_advanced() :
 *   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */
@@ -295,17 +312,25 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
 
 /*--- Advanced Decompression functions ---*/
 
+/*! ZSTD_estimateDCtxSize() :
+ *  Gives the potential amount of memory allocated to create a ZSTD_DCtx */
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
 /*! ZSTD_createDCtx_advanced() :
  *  Create a ZSTD decompression context using external alloc and free functions */
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
 
+/*! ZSTD_sizeofDCtx() :
+ *  Gives the amount of memory used by a given ZSTD_DCtx */
+ZSTDLIB_API size_t ZSTD_sizeofDCtx(const ZSTD_DCtx* dctx);
 
-/* ****************************************************************
+
+/* ******************************************************************
 *  Streaming functions (direct mode - synchronous and buffer-less)
-******************************************************************/
+********************************************************************/
 ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx);
 
 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
@@ -324,7 +349,7 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
   Then, consume your input using ZSTD_compressContinue().
   There are some important considerations to keep in mind when using this advanced function :
   - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only.
-  - Interface is synchronous : input will be entirely consumed and produce 1+ compressed blocks.
+  - Interface is synchronous : input is consumed entirely and produce 1 (or more) compressed blocks.
   - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
     Worst case evaluation is provided by ZSTD_compressBound().
     ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
@@ -341,10 +366,10 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
 */
 
 typedef struct {
-    U64 frameContentSize;
-    U32 windowSize;
-    U32 dictID;
-    U32 checksumFlag;
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
 } ZSTD_frameParams;
 
 ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
@@ -380,15 +405,23 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
   Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
   ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
   ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail.
-  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`.
-  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.
 
   @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
   It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
 
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`.
+  They should preferably be located contiguously, prior to current block.
+  Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+    or that previous contiguous segment is large enough to properly handle maximum back-reference.
+
   A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
   Context can then be reset to start a new decompression.
 
+
+  == Special case : skippable frames ==
+
   Skippable frames allow the integration of user-defined data into a flow of concatenated frames.
   Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frame is following:
   a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
@@ -404,11 +437,14 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
 *  Block functions
 ****************************************/
 /*! Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~18 bytes, which is non-negligible on very small blocks.
     User will have to take in charge required information to regenerate data, such as compressed and content sizes.
 
     A few rules to respect :
-    - Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB)
-    - Compressing or decompressing requires a context structure
+    - Uncompressed block size must be <= MIN (128 KB, 1 << windowLog)
+      + If you need to compress more, cut data into multiple blocks
+      + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+    - Compressing and decompressing require a context structure
       + Use ZSTD_createCCtx() and ZSTD_createDCtx()
     - It is necessary to init context before starting
       + compression : ZSTD_compressBegin()
@@ -418,23 +454,16 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
     - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
       In which case, nothing is produced into `dst`.
       + User must test for such outcome and deal directly with uncompressed data
-      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!
+      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
+        Use ZSTD_insertBlock() in such a case.
+        Insert block once it's copied into its final position.
 */
 
 #define ZSTD_BLOCKSIZE_MAX (128 * 1024)   /* define, for static allocation */
 ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-
-/*-*************************************
-*  Error management
-***************************************/
-#include "error_public.h"
-/*! ZSTD_getErrorCode() :
-    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
-    which can be used to compare directly with enum list published into "error_public.h" */
-ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
-ZSTDLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
+ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert block into `dctx` history. Useful to track uncompressed blocks */
 
 
 #endif   /* ZSTD_STATIC_LINKING_ONLY */
diff --git a/programs/.gitignore b/programs/.gitignore
index cbe39dcdf..8d6e993e7 100644
--- a/programs/.gitignore
+++ b/programs/.gitignore
@@ -43,6 +43,7 @@ _*
 tmp*
 *.zst
 result
+out
 
 # fuzzer
 afl
diff --git a/programs/Makefile b/programs/Makefile
index 52a7ca076..796742693 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -38,7 +38,7 @@ MANDIR  = $(PREFIX)/share/man/man1
 
 ZSTDDIR = ../lib
 
-CPPFLAGS= -I$(ZSTDDIR)/common -I$(ZSTDDIR)/dictBuilder -DXXH_NAMESPACE=ZSTD_
+CPPFLAGS= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/dictBuilder -DXXH_NAMESPACE=ZSTD_
 CFLAGS ?= -O3  # -falign-loops=32   # not always beneficial
 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef
 FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
@@ -154,10 +154,10 @@ clean:
 	@echo Cleaning completed
 
 
-#------------------------------------------------------------------------
-#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
-#------------------------------------------------------------------------
-ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+#---------------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD, Hurd and OpenBSD targets
+#---------------------------------------------------------------------------------
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD))
 HOST_OS = POSIX
 install: zstd
 	@echo Installing binaries
diff --git a/programs/bench.c b/programs/bench.c
index a8fc7408a..a463576b7 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -142,22 +142,20 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                         const size_t* fileSizes, U32 nbFiles,
                         const void* dictBuffer, size_t dictBufferSize, benchResult_t *result)
 {
-    size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize);   /* avoid div by 0 */
+    size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
     U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
     blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
     size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
     void* const compressedBuffer = malloc(maxCompressedSize);
     void* const resultBuffer = malloc(srcSize);
-    ZSTD_CCtx* refCtx = ZSTD_createCCtx();
-    ZSTD_CCtx* ctx = ZSTD_createCCtx();
-    ZSTD_DCtx* refDCtx = ZSTD_createDCtx();
-    ZSTD_DCtx* dctx = ZSTD_createDCtx();
+    ZSTD_CCtx* const ctx = ZSTD_createCCtx();
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
     U32 nbBlocks;
     UTIL_time_t ticksPerSecond;
 
     /* checks */
-    if (!compressedBuffer || !resultBuffer || !blockTable || !refCtx || !ctx || !refDCtx || !dctx)
-        EXM_THROW(31, "not enough memory");
+    if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx)
+        EXM_THROW(31, "allocation error : not enough memory");
 
     /* init */
     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
@@ -213,13 +211,17 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             DISPLAYLEVEL(2, "%2i-%-17.17s :%10u ->\r", testNb, displayName, (U32)srcSize);
             memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
 
-            UTIL_sleepMilli(1); /* give processor time to other processes */
+            UTIL_sleepMilli(1);  /* give processor time to other processes */
             UTIL_waitForNextTick(ticksPerSecond);
             UTIL_getTime(&clockStart);
 
-            {   U32 nbLoops = 0;
-                ZSTD_CDict* cdict = ZSTD_createCDict(dictBuffer, dictBufferSize, cLevel);
-                if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict() allocation failure");
+            {   //size_t const refSrcSize = (nbBlocks == 1) ? srcSize : 0;
+                //ZSTD_parameters const zparams = ZSTD_getParams(cLevel, refSrcSize, dictBufferSize);
+                ZSTD_parameters const zparams = ZSTD_getParams(cLevel, blockSize, dictBufferSize);
+                ZSTD_customMem const cmem = { NULL, NULL, NULL };
+                U32 nbLoops = 0;
+                ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, zparams, cmem);
+                if (cdict==NULL) EXM_THROW(1, "ZSTD_createCDict_advanced() allocation failure");
                 do {
                     U32 blockNb;
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
@@ -227,7 +229,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                                             blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
                                             blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize,
                                             cdict);
-                        if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingPreparedCCtx() failed : %s", ZSTD_getErrorName(rSize));
+                        if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingCDict() failed : %s", ZSTD_getErrorName(rSize));
                         blockTable[blockNb].cSize = rSize;
                     }
                     nbLoops++;
@@ -264,7 +266,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                             blockTable[blockNb].cPtr, blockTable[blockNb].cSize,
                             ddict);
                         if (ZSTD_isError(regenSize)) {
-                            DISPLAY("ZSTD_decompress_usingPreparedDCtx() failed on block %u : %s  \n",
+                            DISPLAY("ZSTD_decompress_usingDDict() failed on block %u : %s  \n",
                                       blockNb, ZSTD_getErrorName(regenSize));
                             clockLoop = 0;   /* force immediate test end */
                             break;
@@ -321,9 +323,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
     free(blockTable);
     free(compressedBuffer);
     free(resultBuffer);
-    ZSTD_freeCCtx(refCtx);
     ZSTD_freeCCtx(ctx);
-    ZSTD_freeDCtx(refDCtx);
     ZSTD_freeDCtx(dctx);
     return 0;
 }
diff --git a/programs/datagen.c b/programs/datagen.c
index ec118f5d1..6cb5111ff 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -23,12 +23,19 @@
     - source repository : https://github.com/Cyan4973/zstd
 */
 
+/* *************************************
+*  Compiler Options
+***************************************/
+#define _CRT_SECURE_NO_WARNINGS  /* removes Visual warning on strerror() */
+
+
 /*-************************************
-*  Includes
+*  Dependencies
 **************************************/
 #include <stdlib.h>    /* malloc */
 #include <stdio.h>     /* FILE, fwrite, fprintf */
 #include <string.h>    /* memcpy */
+#include <errno.h>     /* errno */
 #include "mem.h"       /* U32 */
 
 
@@ -87,12 +94,10 @@ static void RDG_fillLiteralDistrib(BYTE* ldt, double ld)
     U32 u;
 
     if (ld<=0.0) ld = 0.0;
-    //TRACE(" percent:%5.2f%% \n", ld*100.);
-    //TRACE(" start:(%c)[%02X] ", character, character);
     for (u=0; u<LTSIZE; ) {
         U32 const weight = (U32)((double)(LTSIZE - u) * ld) + 1;
         U32 const end = MIN ( u + weight , LTSIZE);
-        while (u < end) ldt[u++] = character;   // TRACE(" %u(%c)[%02X] ", u, character, character);
+        while (u < end) ldt[u++] = character;
         character++;
         if (character > lastChar) character = firstChar;
     }
@@ -102,9 +107,7 @@ static void RDG_fillLiteralDistrib(BYTE* ldt, double ld)
 static BYTE RDG_genChar(U32* seed, const BYTE* ldt)
 {
     U32 const id = RDG_rand(seed) & LTMASK;
-    //TRACE(" %u : \n", id);
-    //TRACE(" %4u [%4u] ; val : %4u \n", id, id&255, ldt[id]);
-    return (ldt[id]);  /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with 0.0. Checked : table is fully initialized */
+    return ldt[id];  /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */
 }
 
 
@@ -115,8 +118,7 @@ static U32 RDG_rand15Bits (unsigned* seedPtr)
 
 static U32 RDG_randLength(unsigned* seedPtr)
 {
-    if (RDG_rand(seedPtr) & 7)
-        return (RDG_rand(seedPtr) & 0xF);
+    if (RDG_rand(seedPtr) & 7) return (RDG_rand(seedPtr) & 0xF);   /* small length */
     return (RDG_rand(seedPtr) & 0x1FF) + 0xF;
 }
 
@@ -156,7 +158,6 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match
             U32 const randOffset = RDG_rand15Bits(seedPtr) + 1;
             U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos);
             size_t match = pos - offset;
-            //TRACE("pos : %u; offset: %u ; length : %u \n", (U32)pos, offset, length);
             while (pos < d) buffPtr[pos++] = buffPtr[match++];   /* correctly manages overlaps */
             prevOffset = offset;
         } else {
@@ -171,9 +172,8 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match
 void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
 {
     BYTE ldt[LTSIZE];
-    memset(ldt, '0', sizeof(ldt));
+    memset(ldt, '0', sizeof(ldt));  /* yes, character '0', this is intentional */
     if (litProba<=0.0) litProba = matchProba / 4.5;
-    //TRACE(" percent:%5.2f%% \n", litProba*100.);
     RDG_fillLiteralDistrib(ldt, litProba);
     RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed);
 }
@@ -185,12 +185,12 @@ void RDG_genStdout(unsigned long long size, double matchProba, double litProba,
     size_t const stdDictSize = 32 KB;
     BYTE* const buff = (BYTE*)malloc(stdDictSize + stdBlockSize);
     U64 total = 0;
-    BYTE ldt[LTSIZE];
+    BYTE ldt[LTSIZE];   /* literals distribution table */
 
     /* init */
-    if (buff==NULL) { fprintf(stdout, "not enough memory\n"); exit(1); }
+    if (buff==NULL) { fprintf(stderr, "datagen: error: %s \n", strerror(errno)); exit(1); }
     if (litProba<=0.0) litProba = matchProba / 4.5;
-    memset(ldt, '0', sizeof(ldt));
+    memset(ldt, '0', sizeof(ldt));   /* yes, character '0', this is intentional */
     RDG_fillLiteralDistrib(ldt, litProba);
     SET_BINARY_MODE(stdout);
 
diff --git a/programs/dibio.c b/programs/dibio.c
index d23476e31..a61ea9cc6 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -30,6 +30,7 @@
 #include <string.h>         /* memset */
 #include <stdio.h>          /* fprintf, fopen, ftello64 */
 #include <time.h>           /* clock_t, clock, CLOCKS_PER_SEC */
+#include <errno.h>          /* errno */
 
 #include "mem.h"            /* read */
 #include "error_private.h"
@@ -43,13 +44,10 @@
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
-#define DICTLISTSIZE 10000
 #define MEMMULT 11
 static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
 
 #define NOISELENGTH 32
-#define PRIME1   2654435761U
-#define PRIME2   2246822519U
 
 
 /*-*************************************
@@ -60,17 +58,13 @@ static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_
 static unsigned g_displayLevel = 0;   /* 0 : no display;   1: errors;   2: default;  4: full information */
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((DIB_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
+            if ((DIB_clockSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
             { g_time = clock(); DISPLAY(__VA_ARGS__); \
             if (g_displayLevel>=4) fflush(stdout); } }
-static const unsigned refreshRate = 150;
+static const clock_t refreshRate = CLOCKS_PER_SEC * 2 / 10;
 static clock_t g_time = 0;
 
-static unsigned DIB_GetMilliSpan(clock_t nPrevious)
-{
-    clock_t const nCurrent = clock();
-    return (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
-}
+static clock_t DIB_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
 
 
 /*-*************************************
@@ -97,13 +91,15 @@ unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); }
 
 const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
 
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+
 
 /* ********************************************************
 *  File related operations
 **********************************************************/
 /** DiB_loadFiles() :
 *   @return : nb of files effectively loaded into `buffer` */
-static unsigned DiB_loadFiles(void* buffer, size_t bufferSize,
+static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
                               size_t* fileSizes,
                               const char** fileNamesTable, unsigned nbFiles)
 {
@@ -112,18 +108,20 @@ static unsigned DiB_loadFiles(void* buffer, size_t bufferSize,
     unsigned n;
 
     for (n=0; n<nbFiles; n++) {
-        unsigned long long const fs64 = UTIL_getFileSize(fileNamesTable[n]);
-        size_t const fileSize = (size_t)(fs64 > bufferSize-pos ? 0 : fs64);
-        FILE* const f = fopen(fileNamesTable[n], "rb");
-        if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
-        DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
-        { size_t const readSize = fread(buff+pos, 1, fileSize, f);
-          if (readSize != fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
-          pos += readSize; }
-        fileSizes[n] = fileSize;
-        fclose(f);
-        if (fileSize == 0) break;  /* stop there, not enough memory to load all files */
-    }
+        const char* const fileName = fileNamesTable[n];
+        unsigned long long const fs64 = UTIL_getFileSize(fileName);
+        size_t const fileSize = (size_t) MIN(fs64, 128 KB);
+        if (fileSize > *bufferSizePtr-pos) break;
+        {   FILE* const f = fopen(fileName, "rb");
+            if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
+            DISPLAYUPDATE(2, "Loading %s...       \r", fileName);
+            { size_t const readSize = fread(buff+pos, 1, fileSize, f);
+              if (readSize != fileSize) EXM_THROW(11, "Pb reading %s", fileName);
+              pos += readSize; }
+            fileSizes[n] = fileSize;
+            fclose(f);
+    }   }
+    *bufferSizePtr = pos;
     return n;
 }
 
@@ -137,26 +135,28 @@ static size_t DiB_findMaxMem(unsigned long long requiredMem)
     void* testmem = NULL;
 
     requiredMem = (((requiredMem >> 23) + 1) << 23);
-    requiredMem += 2 * step;
+    requiredMem += step;
     if (requiredMem > maxMemory) requiredMem = maxMemory;
 
     while (!testmem) {
-        requiredMem -= step;
         testmem = malloc((size_t)requiredMem);
+        requiredMem -= step;
     }
 
     free(testmem);
-    return (size_t)(requiredMem - step);
+    return (size_t)requiredMem;
 }
 
 
 static void DiB_fillNoise(void* buffer, size_t length)
 {
-    unsigned acc = PRIME1;
+    unsigned const prime1 = 2654435761U;
+    unsigned const prime2 = 2246822519U;
+    unsigned acc = prime1;
     size_t p=0;;
 
     for (p=0; p<length; p++) {
-        acc *= PRIME2;
+        acc *= prime2;
         ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
     }
 }
@@ -188,7 +188,6 @@ size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
                               ZDICT_params_t parameters);
 
 
-#define MIN(a,b)  ((a)<(b)?(a):(b))
 int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
                        const char** fileNamesTable, unsigned nbFiles,
                        ZDICT_params_t params)
@@ -197,7 +196,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
     size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
     unsigned long long const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
     size_t const maxMem =  DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
-    size_t const benchedSize = MIN (maxMem, (size_t)totalSizeToLoad);
+    size_t benchedSize = MIN (maxMem, (size_t)totalSizeToLoad);
     void* const srcBuffer = malloc(benchedSize+NOISELENGTH);
     int result = 0;
 
@@ -210,7 +209,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
         DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
 
     /* Load input buffer */
-    nbFiles = DiB_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
+    nbFiles = DiB_loadFiles(srcBuffer, &benchedSize, fileSizes, fileNamesTable, nbFiles);
     DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
 
     {   size_t const dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
diff --git a/programs/fileio.c b/programs/fileio.c
index 5e7b26dc6..855385bef 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -41,13 +41,14 @@
 /* *************************************
 *  Compiler Options
 ***************************************/
-#define _POSIX_SOURCE 1        /* enable %llu on Windows */
+#define _POSIX_SOURCE 1          /* enable %llu on Windows */
+#define _CRT_SECURE_NO_WARNINGS  /* removes Visual warning on strerror() */
 
 
 /*-*************************************
 *  Includes
 ***************************************/
-#include "util.h"       /* Compiler options, UTIL_GetFileSize */
+#include "util.h"       /* Compiler options, UTIL_GetFileSize, _LARGEFILE64_SOURCE */
 #include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
 #include <stdlib.h>     /* malloc, free */
 #include <string.h>     /* strcmp, strlen */
@@ -58,7 +59,6 @@
 #include "fileio.h"
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
 #include "zstd.h"
-#include "zstd_internal.h" /* MIN, KB, MB */
 #define ZBUFF_STATIC_LINKING_ONLY
 #include "zbuff.h"
 
@@ -84,6 +84,10 @@
 /*-*************************************
 *  Constants
 ***************************************/
+#define KB *(1<<10)
+#define MB *(1<<20)
+#define GB *(1U<<30)
+
 #define _1BIT  0x01
 #define _2BITS 0x03
 #define _3BITS 0x07
@@ -113,21 +117,17 @@ static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result
 void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((FIO_GetMilliSpan(g_time) > refreshRate) || (g_displayLevel>=4)) \
+            if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
             { g_time = clock(); DISPLAY(__VA_ARGS__); \
             if (g_displayLevel>=4) fflush(stdout); } }
-static const unsigned refreshRate = 150;
+static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
 static clock_t g_time = 0;
 
-static unsigned FIO_GetMilliSpan(clock_t nPrevious)
-{
-    clock_t const nCurrent = clock();
-    return (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
-}
+#define MIN(a,b)    ((a) < (b) ? (a) : (b))
 
 
 /*-*************************************
-*  Local Parameters
+*  Local Parameters - Not thread safe
 ***************************************/
 static U32 g_overwrite = 0;
 void FIO_overwriteMode(void) { g_overwrite=1; }
@@ -175,7 +175,7 @@ static FILE* FIO_openSrcFile(const char* srcFileName)
         f = fopen(srcFileName, "rb");
     }
 
-    if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: No such file\n", srcFileName);
+    if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
 
     return f;
 }
@@ -201,18 +201,20 @@ static FILE* FIO_openDstFile(const char* dstFileName)
                 if (g_displayLevel <= 1) {
                     /* No interaction possible */
                     DISPLAY("zstd: %s already exists; not overwritten  \n", dstFileName);
-                    return 0;
+                    return NULL;
                 }
                 DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName);
                 {   int ch = getchar();
                     if ((ch!='Y') && (ch!='y')) {
                         DISPLAY("    not overwritten  \n");
-                        return 0;
+                        return NULL;
                     }
                     while ((ch!=EOF) && (ch!='\n')) ch = getchar();  /* flush rest of input line */
         }   }   }
         f = fopen( dstFileName, "wb" );
     }
+
+    if (f==NULL) DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
     return f;
 }
 
@@ -233,18 +235,18 @@ static size_t FIO_loadFile(void** bufferPtr, const char* fileName)
 
     DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
     fileHandle = fopen(fileName, "rb");
-    if (fileHandle==0) EXM_THROW(31, "Error opening file %s", fileName);
+    if (fileHandle==0) EXM_THROW(31, "zstd: %s: %s", fileName, strerror(errno));
     fileSize = UTIL_getFileSize(fileName);
     if (fileSize > MAX_DICT_SIZE) {
         int seekResult;
         if (fileSize > 1 GB) EXM_THROW(32, "Dictionary file %s is too large", fileName);   /* avoid extreme cases */
         DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", fileName, MAX_DICT_SIZE);
         seekResult = fseek(fileHandle, (long int)(fileSize-MAX_DICT_SIZE), SEEK_SET);   /* use end of file */
-        if (seekResult != 0) EXM_THROW(33, "Error seeking into file %s", fileName);
+        if (seekResult != 0) EXM_THROW(33, "zstd: %s: %s", fileName, strerror(errno));
         fileSize = MAX_DICT_SIZE;
     }
-    *bufferPtr = (BYTE*)malloc((size_t)fileSize);
-    if (*bufferPtr==NULL) EXM_THROW(34, "Allocation error : not enough memory for dictBuffer");
+    *bufferPtr = malloc((size_t)fileSize);
+    if (*bufferPtr==NULL) EXM_THROW(34, "zstd: %s", strerror(errno));
     { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
       if (readSize!=fileSize) EXM_THROW(35, "Error reading dictionary file %s", fileName); }
     fclose(fileHandle);
@@ -271,16 +273,15 @@ typedef struct {
 static cRess_t FIO_createCResources(const char* dictFileName)
 {
     cRess_t ress;
+    memset(&ress, 0, sizeof(ress));
 
     ress.ctx = ZBUFF_createCCtx();
-    if (ress.ctx == NULL) EXM_THROW(30, "Allocation error : can't create ZBUFF context");
-
-    /* Allocate Memory */
+    if (ress.ctx == NULL) EXM_THROW(30, "zstd: allocation error : can't create ZBUFF context");
     ress.srcBufferSize = ZBUFF_recommendedCInSize();
     ress.srcBuffer = malloc(ress.srcBufferSize);
     ress.dstBufferSize = ZBUFF_recommendedCOutSize();
     ress.dstBuffer = malloc(ress.dstBufferSize);
-    if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory");
+    if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "zstd: allocation error : not enough memory");
 
     /* dictionary */
     ress.dictBufferSize = FIO_loadFile(&(ress.dictBuffer), dictFileName);
@@ -295,7 +296,7 @@ static void FIO_freeCResources(cRess_t ress)
     free(ress.dstBuffer);
     free(ress.dictBuffer);
     errorCode = ZBUFF_freeCCtx(ress.ctx);
-    if (ZBUFF_isError(errorCode)) EXM_THROW(38, "Error : can't release ZBUFF context resource : %s", ZBUFF_getErrorName(errorCode));
+    if (ZBUFF_isError(errorCode)) EXM_THROW(38, "zstd: error : can't release ZBUFF context resource : %s", ZBUFF_getErrorName(errorCode));
 }
 
 
@@ -315,9 +316,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
     U64 const fileSize = UTIL_getFileSize(srcFileName);
 
     /* init */
-    {   ZSTD_parameters params;
-        memset(&params, 0, sizeof(params));
-        params.cParams = ZSTD_getCParams(cLevel, fileSize, ress.dictBufferSize);
+    {   ZSTD_parameters params = ZSTD_getParams(cLevel, fileSize, ress.dictBufferSize);
         params.fParams.contentSizeFlag = 1;
         params.fParams.checksumFlag = g_checksumFlag;
         params.fParams.noDictIDFlag = !g_dictIDFlag;
@@ -330,7 +329,6 @@ static int FIO_compressFilename_internal(cRess_t ress,
     }   }
 
     /* Main compression loop */
-    readsize = 0;
     while (1) {
         /* Fill input Buffer */
         size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
@@ -338,8 +336,8 @@ static int FIO_compressFilename_internal(cRess_t ress,
         readsize += inSize;
         DISPLAYUPDATE(2, "\rRead : %u MB  ", (U32)(readsize>>20));
 
-        {   /* Compress using buffered streaming */
-            size_t usedInSize = inSize;
+        /* Compress using buffered streaming */
+        {   size_t usedInSize = inSize;
             size_t cSize = ress.dstBufferSize;
             { size_t const result = ZBUFF_compressContinue(ress.ctx, ress.dstBuffer, &cSize, ress.srcBuffer, &usedInSize);
               if (ZBUFF_isError(result)) EXM_THROW(23, "Compression error : %s ", ZBUFF_getErrorName(result)); }
@@ -366,17 +364,19 @@ static int FIO_compressFilename_internal(cRess_t ress,
     }
 
     /* Status */
+    if (strlen(srcFileName) > 20) srcFileName += strlen(srcFileName)-20; /* display last 20 characters */
     DISPLAYLEVEL(2, "\r%79s\r", "");
-    DISPLAYLEVEL(2,"%-20.20s :%6.2f%%   (%6llu =>%6llu bytes, %s) \n", srcFileName,
-        (double)compressedfilesize/readsize*100, (unsigned long long)readsize, (unsigned long long) compressedfilesize,
-                 dstFileName);
+    DISPLAYLEVEL(2,"%-20.20s :%6.2f%%   (%6llu => %6llu bytes, %s) \n", srcFileName,
+        (double)compressedfilesize/(readsize+(!readsize) /* avoid div by zero */ )*100,
+        (unsigned long long)readsize, (unsigned long long) compressedfilesize,
+         dstFileName);
 
     return 0;
 }
 
 
-/*! FIO_compressFilename_internal() :
- *  same as FIO_compressFilename_extRess(), with ress.destFile already opened (typically stdout)
+/*! FIO_compressFilename_srcFile() :
+ *  note : ress.destFile already opened
  *  @return : 0 : compression completed correctly,
  *            1 : missing or pb opening srcFileName
  */
@@ -397,7 +397,7 @@ static int FIO_compressFilename_srcFile(cRess_t ress,
     result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, cLevel);
 
     fclose(ress.srcFile);
-    if ((g_removeSrcFile) && (!result)) remove(srcFileName);
+    if ((g_removeSrcFile) && (!result)) { if (remove(srcFileName)) EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); }
     return result;
 }
 
@@ -417,8 +417,8 @@ static int FIO_compressFilename_dstFile(cRess_t ress,
 
     result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, cLevel);
 
-    if (fclose(ress.dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName);
-    if (result!=0) remove(dstFileName);   /* remove operation artefact */
+    if (fclose(ress.dstFile)) { DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result=1; }
+    if (result!=0) { if (remove(dstFileName)) EXM_THROW(1, "zstd: %s: %s", dstFileName, strerror(errno)); }  /* remove operation artefact */
     return result;
 }
 
@@ -429,13 +429,13 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
     clock_t const start = clock();
 
     cRess_t const ress = FIO_createCResources(dictFileName);
-    int const issueWithSrcFile = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
-    FIO_freeCResources(ress);
+    int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
 
-    {   double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
-        DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
-    }
-    return issueWithSrcFile;
+    double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
+    DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
+
+    FIO_freeCResources(ress);
+    return result;
 }
 
 
@@ -444,13 +444,14 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
                                   const char* dictFileName, int compressionLevel)
 {
     int missed_files = 0;
-    char*  dstFileName = (char*)malloc(FNSPACE);
     size_t dfnSize = FNSPACE;
+    char*  dstFileName = (char*)malloc(FNSPACE);
     size_t const suffixSize = suffix ? strlen(suffix) : 0;
-    cRess_t ress;
+    cRess_t ress = FIO_createCResources(dictFileName);
 
     /* init */
-    ress = FIO_createCResources(dictFileName);
+    if (dstFileName==NULL) EXM_THROW(27, "FIO_compressMultipleFilenames : allocation error for dstFileName");
+    if (suffix == NULL) EXM_THROW(28, "FIO_compressMultipleFilenames : dst unknown");  /* should never happen */
 
     /* loop on each file */
     if (!strcmp(suffix, stdoutmark)) {
@@ -502,12 +503,11 @@ typedef struct {
 static dRess_t FIO_createDResources(const char* dictFileName)
 {
     dRess_t ress;
+    memset(&ress, 0, sizeof(ress));
 
-    /* init */
+    /* Allocation */
     ress.dctx = ZBUFF_createDCtx();
     if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZBUFF decompression context");
-
-    /* Allocate Memory */
     ress.srcBufferSize = ZBUFF_recommendedDInSize();
     ress.srcBuffer = malloc(ress.srcBufferSize);
     ress.dstBufferSize = ZBUFF_recommendedDOutSize();
@@ -700,16 +700,19 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
         if (sizeCheck != toRead) EXM_THROW(31, "zstd: %s read error : cannot read header", srcFileName);
         {   U32 const magic = MEM_readLE32(ress.srcBuffer);
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-            if (ZSTD_isLegacy(magic)) {
+            if (ZSTD_isLegacy(ress.srcBuffer, 4)) {
                 filesize += FIO_decompressLegacyFrame(dstFile, srcFile, ress.dictBuffer, ress.dictBufferSize, magic);
                 continue;
             }
 #endif
             if (((magic & 0xFFFFFFF0U) != ZSTD_MAGIC_SKIPPABLE_START) && (magic != ZSTD_MAGICNUMBER)) {
-                if (g_overwrite)   /* -df : pass-through mode */
-                    return FIO_passThrough(dstFile, srcFile, ress.srcBuffer, ress.srcBufferSize);
-                else {
+                if (g_overwrite) {  /* -df : pass-through mode */
+                    unsigned const result = FIO_passThrough(dstFile, srcFile, ress.srcBuffer, ress.srcBufferSize);
+                    if (fclose(srcFile)) EXM_THROW(32, "zstd: %s close error", srcFileName);  /* error should never happen */
+                    return result;
+                } else {
                     DISPLAYLEVEL(1, "zstd: %s: not in zstd format \n", srcFileName);
+                    fclose(srcFile);
                     return 1;
         }   }   }
         filesize += FIO_decompressFrame(ress, dstFile, srcFile, toRead);
@@ -720,8 +723,8 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
     DISPLAYLEVEL(2, "%-20.20s: %llu bytes \n", srcFileName, filesize);
 
     /* Close */
-    fclose(srcFile);
-    if (g_removeSrcFile) remove(srcFileName);
+    if (fclose(srcFile)) EXM_THROW(33, "zstd: %s close error", srcFileName);  /* error should never happen */
+    if (g_removeSrcFile) { if (remove(srcFileName)) EXM_THROW(34, "zstd: %s: %s", srcFileName, strerror(errno)); };
     return 0;
 }
 
@@ -741,7 +744,7 @@ static int FIO_decompressDstFile(dRess_t ress,
     result = FIO_decompressSrcFile(ress, srcFileName);
 
     if (fclose(ress.dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName);
-    if (result != 0) remove(dstFileName);
+    if (result != 0) if (remove(dstFileName)) EXM_THROW(39, "remove %s error : %s", dstFileName, strerror(errno));
     return result;
 }
 
@@ -768,19 +771,21 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
     int missingFiles = 0;
     dRess_t ress = FIO_createDResources(dictFileName);
 
+    if (suffix==NULL) EXM_THROW(70, "zstd: decompression: unknown dst");   /* should never happen */
+
     if (!strcmp(suffix, stdoutmark) || !strcmp(suffix, nulmark)) {
         unsigned u;
         ress.dstFile = FIO_openDstFile(suffix);
         if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", suffix);
         for (u=0; u<nbFiles; u++)
             missingFiles += FIO_decompressSrcFile(ress, srcNamesTable[u]);
-        if (fclose(ress.dstFile)) EXM_THROW(39, "Write error : cannot properly close %s", stdoutmark);
+        if (fclose(ress.dstFile)) EXM_THROW(72, "Write error : cannot properly close %s", stdoutmark);
     } else {
-        size_t const suffixSize = suffix ? strlen(suffix) : 0;
+        size_t const suffixSize = strlen(suffix);
         size_t dfnSize = FNSPACE;
         unsigned u;
         char* dstFileName = (char*)malloc(FNSPACE);
-        if (dstFileName==NULL) EXM_THROW(70, "not enough memory for dstFileName");
+        if (dstFileName==NULL) EXM_THROW(73, "not enough memory for dstFileName");
         for (u=0; u<nbFiles; u++) {   /* create dstFileName */
             const char* const srcFileName = srcNamesTable[u];
             size_t const sfnSize = strlen(srcFileName);
@@ -789,7 +794,7 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
                 free(dstFileName);
                 dfnSize = sfnSize + 20;
                 dstFileName = (char*)malloc(dfnSize);
-                if (dstFileName==NULL) EXM_THROW(71, "not enough memory for dstFileName");
+                if (dstFileName==NULL) EXM_THROW(74, "not enough memory for dstFileName");
             }
             if (sfnSize <= suffixSize || strcmp(suffixPtr, suffix) != 0) {
                 DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%4s expected) -- ignored \n", srcFileName, suffix);
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index d1dfe51e8..77a711869 100644
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -40,8 +40,9 @@
 #include <sys/timeb.h>   /* timeb */
 #include <string.h>      /* strcmp */
 #include <time.h>        /* clock_t */
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_compressContinue */
-#include "zstd.h"        /* ZSTD_VERSION_STRING, ZSTD_getErrorCode */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_compressContinue, ZSTD_compressBlock */
+#include "zstd.h"        /* ZSTD_VERSION_STRING */
+#include "error_public.h" /* ZSTD_getErrorCode */
 #include "zdict.h"       /* ZDICT_trainFromBuffer */
 #include "datagen.h"     /* RDG_genBuffer */
 #include "mem.h"
@@ -109,9 +110,9 @@ static unsigned FUZ_highbit32(U32 v32)
 }
 
 
-#define CHECKTEST(var, fn)  size_t const var = fn; if (ZSTD_isError(var)) goto _output_error
-#define CHECK(fn)  { CHECKTEST(err, fn); }
-#define CHECKPLUS(var, fn, more)  { CHECKTEST(var, fn); more; }
+#define CHECK_V(var, fn)  size_t const var = fn; if (ZSTD_isError(var)) goto _output_error
+#define CHECK(fn)  { CHECK_V(err, fn); }
+#define CHECKPLUS(var, fn, more)  { CHECK_V(var, fn); more; }
 static int basicUnitTests(U32 seed, double compressibility)
 {
     size_t const CNBuffSize = 5 MB;
@@ -137,6 +138,12 @@ static int basicUnitTests(U32 seed, double compressibility)
               cSize=r );
     DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
 
+    DISPLAYLEVEL(4, "test%3i : decompressed size test : ", testNb++);
+    {   unsigned long long const rSize = ZSTD_getDecompressedSize(compressedBuffer, cSize);
+        if (rSize != CNBuffSize) goto _output_error;
+    }
+    DISPLAYLEVEL(4, "OK \n");
+
     DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize);
     CHECKPLUS( r , ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize),
                if (r != CNBuffSize) goto _output_error);
@@ -202,7 +209,7 @@ static int basicUnitTests(U32 seed, double compressibility)
                       cSize += r);
             CHECKPLUS(r, ZSTD_compressEnd(ctxDuplicated, (char*)compressedBuffer+cSize, ZSTD_compressBound(CNBuffSize)-cSize),
                       cSize += r);
-            if (cSize != cSizeOrig) goto _output_error;   /* should be identical ==> have same size */
+            if (cSize != cSizeOrig) goto _output_error;   /* should be identical ==> same size */
         }
         DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBuffSize*100);
 
@@ -216,10 +223,8 @@ static int basicUnitTests(U32 seed, double compressibility)
 
         DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++);
         {   size_t const testSize = CNBuffSize / 3;
-            {   ZSTD_compressionParameters const cPar = ZSTD_getCParams(2, testSize, dictSize);
-                ZSTD_frameParameters const fPar = { 1 , 0 , 0 };
-                ZSTD_parameters p;
-                p.cParams = cPar; p.fParams = fPar;
+            {   ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize);
+                p.fParams.contentSizeFlag = 1;
                 CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) );
             }
             CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig) );
@@ -277,10 +282,8 @@ static int basicUnitTests(U32 seed, double compressibility)
         DISPLAYLEVEL(4, "OK \n");
 
         DISPLAYLEVEL(4, "test%3i : compress without dictID : ", testNb++);
-        {   ZSTD_frameParameters const fParams = { 0 /*contentSize*/, 0 /*checksum*/, 1 /*NoDictID*/ };
-            ZSTD_compressionParameters const cParams = ZSTD_getCParams(3, CNBuffSize, dictSize);
-            ZSTD_parameters p;
-            p.cParams = cParams; p.fParams = fParams;
+        {   ZSTD_parameters p = ZSTD_getParams(3, CNBuffSize, dictSize);
+            p.fParams.noDictIDFlag = 1;
             cSize = ZSTD_compress_advanced(cctx, compressedBuffer, ZSTD_compressBound(CNBuffSize),
                                            CNBuffer, CNBuffSize,
                                            dictBuffer, dictSize, p);
@@ -318,8 +321,9 @@ static int basicUnitTests(U32 seed, double compressibility)
     /* block API tests */
     {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
         ZSTD_DCtx* const dctx = ZSTD_createDCtx();
-        static const size_t blockSize = 100 KB;
-        static const size_t dictSize = 16 KB;
+        static const size_t dictSize = 65 KB;
+        static const size_t blockSize = 100 KB;   /* won't cause pb with small dict size */
+        size_t cSize2;
 
         /* basic block compression */
         DISPLAYLEVEL(4, "test%3i : Block compression test : ", testNb++);
@@ -330,7 +334,7 @@ static int basicUnitTests(U32 seed, double compressibility)
 
         DISPLAYLEVEL(4, "test%3i : Block decompression test : ", testNb++);
         CHECK( ZSTD_decompressBegin(dctx) );
-        { CHECKTEST(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+        { CHECK_V(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
           if (r != blockSize) goto _output_error; }
         DISPLAYLEVEL(4, "OK \n");
 
@@ -339,11 +343,20 @@ static int basicUnitTests(U32 seed, double compressibility)
         CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) );
         cSize = ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize);
         if (ZSTD_isError(cSize)) goto _output_error;
+        cSize2 = ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize);
+        if (ZSTD_isError(cSize2)) goto _output_error;
+        memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize);   /* fake non-compressed block */
+        cSize2 = ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize+blockSize, ZSTD_compressBound(blockSize),
+                                          (char*)CNBuffer+dictSize+2*blockSize, blockSize);
+        if (ZSTD_isError(cSize2)) goto _output_error;
         DISPLAYLEVEL(4, "OK \n");
 
         DISPLAYLEVEL(4, "test%3i : Dictionary Block decompression test : ", testNb++);
         CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) );
-        { CHECKTEST( r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+        { CHECK_V( r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+          if (r != blockSize) goto _output_error; }
+        ZSTD_insertBlock(dctx, (char*)decodedBuffer+blockSize, blockSize);   /* insert non-compressed block into dctx history */
+        { CHECK_V( r, ZSTD_decompressBlock(dctx, (char*)decodedBuffer+2*blockSize, CNBuffSize, (char*)compressedBuffer+cSize+blockSize, cSize2) );
           if (r != blockSize) goto _output_error; }
         DISPLAYLEVEL(4, "OK \n");
 
@@ -361,7 +374,7 @@ static int basicUnitTests(U32 seed, double compressibility)
         sampleSize += 96 KB;
         cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1);
         if (ZSTD_isError(cSize)) goto _output_error;
-        { CHECKTEST(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
+        { CHECK_V(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize));
           if (regenSize!=sampleSize) goto _output_error; }
         DISPLAYLEVEL(4, "OK \n");
     }
@@ -370,12 +383,12 @@ static int basicUnitTests(U32 seed, double compressibility)
     #define ZEROESLENGTH 100
     DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);
     memset(CNBuffer, 0, ZEROESLENGTH);
-    { CHECKTEST(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
+    { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1) );
       cSize = r; }
     DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100);
 
     DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH);
-    { CHECKTEST(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
+    { CHECK_V(r, ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize) );
       if (r != ZEROESLENGTH) goto _output_error; }
     DISPLAYLEVEL(4, "OK \n");
 
@@ -389,27 +402,29 @@ static int basicUnitTests(U32 seed, double compressibility)
         U32 rSeed = 1;
 
         /* create batch of 3-bytes sequences */
-        { int i; for (i=0; i < NB3BYTESSEQ; i++) {
-            _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&rSeed) & 255);
-            _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&rSeed) & 255);
-            _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&rSeed) & 255);
-        }}
+        {   int i;
+            for (i=0; i < NB3BYTESSEQ; i++) {
+                _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&rSeed) & 255);
+                _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&rSeed) & 255);
+                _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&rSeed) & 255);
+        }   }
 
         /* randomly fills CNBuffer with prepared 3-bytes sequences */
-        { int i; for (i=0; i < _3BYTESTESTLENGTH; i += 3) {   /* note : CNBuffer size > _3BYTESTESTLENGTH+3 */
-            U32 const id = FUZ_rand(&rSeed) & NB3BYTESSEQMASK;
-            ((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0];
-            ((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1];
-            ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2];
-    }   }}
+        {   int i;
+            for (i=0; i < _3BYTESTESTLENGTH; i += 3) {   /* note : CNBuffer size > _3BYTESTESTLENGTH+3 */
+                U32 const id = FUZ_rand(&rSeed) & NB3BYTESSEQMASK;
+                ((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0];
+                ((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1];
+                ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2];
+    }   }   }
     DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++);
-    { CHECKTEST(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
+    { CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
                                  CNBuffer, _3BYTESTESTLENGTH, 19) );
       cSize = r; }
     DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100);
 
     DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++);
-    { CHECKTEST(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
+    { CHECK_V(r, ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize) );
       if (r != _3BYTESTESTLENGTH) goto _output_error; }
     DISPLAYLEVEL(4, "OK \n");
 
@@ -555,6 +570,11 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
                   CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); }
         }   }
 
+        /* Decompressed size test */
+        {   unsigned long long const rSize = ZSTD_getDecompressedSize(cBuffer, cSize);
+            CHECK(rSize != sampleSize, "decompressed size incorrect");
+        }
+
         /* frame header decompression test */
         {   ZSTD_frameParams dParams;
             size_t const check = ZSTD_getFrameParams(&dParams, cBuffer, cSize);
@@ -691,7 +711,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
         while (totalCSize < cSize) {
             size_t const inSize = ZSTD_nextSrcSizeToDecompress(dctx);
             size_t const genSize = ZSTD_decompressContinue(dctx, dstBuffer+totalGenSize, dstBufferSize-totalGenSize, cBuffer+totalCSize, inSize);
-            CHECK (ZSTD_isError(genSize), "streaming decompression error : %s", ZSTD_getErrorName(genSize));
+            CHECK (ZSTD_isError(genSize), "ZSTD_decompressContinue error : %s", ZSTD_getErrorName(genSize));
             totalGenSize += genSize;
             totalCSize += inSize;
         }
diff --git a/programs/paramgrill.c b/programs/paramgrill.c
index 6cf4ccd89..04a55c876 100644
--- a/programs/paramgrill.c
+++ b/programs/paramgrill.c
@@ -22,33 +22,19 @@
     - zstd homepage : http://www.zstd.net/
 */
 
-/*-************************************
-*  Compiler Options
-**************************************/
-/* gettimeofday() are not supported by MSVC */
-#if defined(_MSC_VER) || defined(_WIN32)
-#  define BMK_LEGACY_TIMER 1
-#endif
-
 
 /*-************************************
 *  Dependencies
 **************************************/
-#include "util.h"         /* Compiler options, UTIL_GetFileSize */
-#include <stdlib.h>       /* malloc */
-#include <stdio.h>        /* fprintf, fopen, ftello64 */
-#include <string.h>       /* strcmp */
-#include <math.h>         /* log */
-
-/* Use ftime() if gettimeofday() is not available on your target */
-#if defined(BMK_LEGACY_TIMER)
-#  include <sys/timeb.h>  /* timeb, ftime */
-#else
-#  include <sys/time.h>   /* gettimeofday */
-#endif
+#include "util.h"      /* Compiler options, UTIL_GetFileSize */
+#include <stdlib.h>    /* malloc */
+#include <stdio.h>     /* fprintf, fopen, ftello64 */
+#include <string.h>    /* strcmp */
+#include <math.h>      /* log */
+#include <time.h>      /* clock_t */
 
 #include "mem.h"
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters, ZSTD_estimateCCtxSize */
 #include "zstd.h"
 #include "datagen.h"
 #include "xxhash.h"
@@ -67,7 +53,7 @@
 #define GB *(1ULL<<30)
 
 #define NBLOOPS    2
-#define TIMELOOP   2000
+#define TIMELOOP   (2 * CLOCKS_PER_SEC)
 
 #define NB_LEVELS_TRACKED 30
 
@@ -76,9 +62,9 @@ static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t
 #define COMPRESSIBILITY_DEFAULT 0.50
 static const size_t sampleSize = 10000000;
 
-static const int g_grillDuration = 50000000;   /* about 13 hours */
-static const int g_maxParamTime = 15000;   /* 15 sec */
-static const int g_maxVariationTime = 60000;   /* 60 sec */
+static const U32 g_grillDuration_s = 60000;   /* about 16 hours */
+static const clock_t g_maxParamTime = 15 * CLOCKS_PER_SEC;
+static const clock_t g_maxVariationTime = 60 * CLOCKS_PER_SEC;
 static const int g_maxNbVariations = 64;
 
 
@@ -111,49 +97,15 @@ void BMK_SetNbIterations(int nbLoops)
 *  Private functions
 *********************************************************/
 
-#if defined(BMK_LEGACY_TIMER)
+static clock_t BMK_clockSpan(clock_t cStart) { return clock() - cStart; }  /* works even if overflow ; max span ~ 30 mn */
 
-static int BMK_GetMilliStart(void)
-{
-  /* Based on Legacy ftime()
-  *  Rolls over every ~ 12.1 days (0x100000/24/60/60)
-  *  Use GetMilliSpan to correct for rollover */
-  struct timeb tb;
-  int nCount;
-  ftime( &tb );
-  nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000);
-  return nCount;
-}
-
-#else
-
-static int BMK_GetMilliStart(void)
-{
-  /* Based on newer gettimeofday()
-  *  Use GetMilliSpan to correct for rollover */
-  struct timeval tv;
-  int nCount;
-  gettimeofday(&tv, NULL);
-  nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000);
-  return nCount;
-}
-
-#endif
-
-
-static int BMK_GetMilliSpan( int nTimeStart )
-{
-  int nSpan = BMK_GetMilliStart() - nTimeStart;
-  if ( nSpan < 0 )
-    nSpan += 0x100000 * 1000;
-  return nSpan;
-}
+static U32 BMK_timeSpan(time_t tStart) { return (U32)difftime(time(NULL), tStart); }  /* accuracy in seconds only, span can be multiple years */
 
 
 static size_t BMK_findMaxMem(U64 requiredMem)
 {
-    size_t step = 64 MB;
-    BYTE* testmem=NULL;
+    size_t const step = 64 MB;
+    void* testmem = NULL;
 
     requiredMem = (((requiredMem >> 26) + 1) << 26);
     if (requiredMem > maxMemory) requiredMem = maxMemory;
@@ -161,7 +113,7 @@ static size_t BMK_findMaxMem(U64 requiredMem)
     requiredMem += 2*step;
     while (!testmem) {
         requiredMem -= step;
-        testmem = (BYTE*) malloc ((size_t)requiredMem);
+        testmem = malloc ((size_t)requiredMem);
     }
 
     free (testmem);
@@ -188,8 +140,8 @@ U32 FUZ_rand(U32* src)
 *********************************************************/
 typedef struct {
     size_t cSize;
-    U32 cSpeed;
-    U32 dSpeed;
+    double cSpeed;
+    double dSpeed;
 } BMK_result_t;
 
 typedef struct
@@ -265,35 +217,33 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
     RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.10, 1);
 
     /* Bench */
-    {
-        U32 loopNb;
+    {   U32 loopNb;
         size_t cSize = 0;
         double fastestC = 100000000., fastestD = 100000000.;
         double ratio = 0.;
         U64 crcCheck = 0;
-        const int startTime =BMK_GetMilliStart();
+        clock_t const benchStart = clock();
 
         DISPLAY("\r%79s\r", "");
+        memset(&params, 0, sizeof(params));
         params.cParams = cParams;
-        params.fParams.contentSizeFlag = 0;
         for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) {
             int nbLoops;
-            int milliTime;
             U32 blockNb;
-            const int totalTime = BMK_GetMilliSpan(startTime);
+            clock_t roundStart, roundClock;
 
-            /* early break (slow params) */
-            if (totalTime > g_maxParamTime) break;
+            { clock_t const benchTime = BMK_clockSpan(benchStart);
+              if (benchTime > g_maxParamTime) break; }
 
             /* Compression */
             DISPLAY("\r%1u-%s : %9u ->", loopNb, name, (U32)srcSize);
             memset(compressedBuffer, 0xE5, maxCompressedSize);
 
             nbLoops = 0;
-            milliTime = BMK_GetMilliStart();
-            while (BMK_GetMilliStart() == milliTime);
-            milliTime = BMK_GetMilliStart();
-            while (BMK_GetMilliSpan(milliTime) < TIMELOOP) {
+            roundStart = clock();
+            while (clock() == roundStart);
+            roundStart = clock();
+            while (BMK_clockSpan(roundStart) < TIMELOOP) {
                 for (blockNb=0; blockNb<nbBlocks; blockNb++)
                     blockTable[blockNb].cSize = ZSTD_compress_advanced(ctx,
                                                     blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
@@ -302,40 +252,40 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
                                                     params);
                 nbLoops++;
             }
-            milliTime = BMK_GetMilliSpan(milliTime);
+            roundClock = BMK_clockSpan(roundStart);
 
             cSize = 0;
             for (blockNb=0; blockNb<nbBlocks; blockNb++)
                 cSize += blockTable[blockNb].cSize;
-            if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops;
+            if ((double)roundClock < fastestC * CLOCKS_PER_SEC * nbLoops) fastestC = ((double)roundClock / CLOCKS_PER_SEC) / nbLoops;
             ratio = (double)srcSize / (double)cSize;
             DISPLAY("\r");
             DISPLAY("%1u-%s : %9u ->", loopNb, name, (U32)srcSize);
-            DISPLAY(" %9u (%4.3f),%7.1f MB/s", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.);
+            DISPLAY(" %9u (%4.3f),%7.1f MB/s", (U32)cSize, ratio, (double)srcSize / fastestC / 1000000.);
             resultPtr->cSize = cSize;
-            resultPtr->cSpeed = (U32)((double)srcSize / fastestC);
+            resultPtr->cSpeed = (double)srcSize / fastestC;
 
 #if 1
             /* Decompression */
             memset(resultBuffer, 0xD6, srcSize);
 
             nbLoops = 0;
-            milliTime = BMK_GetMilliStart();
-            while (BMK_GetMilliStart() == milliTime);
-            milliTime = BMK_GetMilliStart();
-            for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++) {
+            roundStart = clock();
+            while (clock() == roundStart);
+            roundStart = clock();
+            for ( ; BMK_clockSpan(roundStart) < TIMELOOP; nbLoops++) {
                 for (blockNb=0; blockNb<nbBlocks; blockNb++)
                     blockTable[blockNb].resSize = ZSTD_decompress(blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
                                                                   blockTable[blockNb].cPtr, blockTable[blockNb].cSize);
             }
-            milliTime = BMK_GetMilliSpan(milliTime);
+            roundClock = BMK_clockSpan(roundStart);
 
-            if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops;
+            if ((double)roundClock < fastestD * CLOCKS_PER_SEC * nbLoops) fastestD = ((double)roundClock / CLOCKS_PER_SEC) / nbLoops;
             DISPLAY("\r");
             DISPLAY("%1u-%s : %9u -> ", loopNb, name, (U32)srcSize);
-            DISPLAY("%9u (%4.3f),%7.1f MB/s, ", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.);
-            DISPLAY("%7.1f MB/s", (double)srcSize / fastestD / 1000.);
-            resultPtr->dSpeed = (U32)((double)srcSize / fastestD);
+            DISPLAY("%9u (%4.3f),%7.1f MB/s, ", (U32)cSize, ratio, (double)srcSize / fastestC / 1000000.);
+            DISPLAY("%7.1f MB/s", (double)srcSize / fastestD / 1000000.);
+            resultPtr->dSpeed = (double)srcSize / fastestD;
 
             /* CRC Checking */
             crcCheck = XXH64(resultBuffer, srcSize, 0);
@@ -362,6 +312,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
 
 
 const char* g_stratName[] = { "ZSTD_fast   ",
+                              "ZSTD_dfast  ",
                               "ZSTD_greedy ",
                               "ZSTD_lazy   ",
                               "ZSTD_lazy2  ",
@@ -376,11 +327,11 @@ static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compr
             params.targetLength, g_stratName[(U32)(params.strategy)]);
     fprintf(f,
             "/* level %2u */   /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
-            cLevel, (double)srcSize / result.cSize, (double)result.cSpeed / 1000., (double)result.dSpeed / 1000.);
+            cLevel, (double)srcSize / result.cSize, result.cSpeed / 1000000., result.dSpeed / 1000000.);
 }
 
 
-static U32 g_cSpeedTarget[NB_LEVELS_TRACKED] = { 0 };   /* NB_LEVELS_TRACKED : checked at main() */
+static double g_cSpeedTarget[NB_LEVELS_TRACKED] = { 0. };   /* NB_LEVELS_TRACKED : checked at main() */
 
 typedef struct {
     BMK_result_t result;
@@ -407,8 +358,6 @@ static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSiz
     BMK_printWinners2(stdout, winners, srcSize);
 }
 
-size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters params);   /* hidden interface, declared here */
-
 static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters params,
               const void* srcBuffer, size_t srcSize,
                     ZSTD_CCtx* ctx)
@@ -442,17 +391,16 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para
             double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed);
             double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed);
 
-            size_t W_CMemUsed = (1 << params.windowLog) + ZSTD_sizeofCCtx(params);
-            size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + ZSTD_sizeofCCtx(winners[cLevel].params);
+            size_t W_CMemUsed = (1 << params.windowLog) + ZSTD_estimateCCtxSize(params);
+            size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + ZSTD_estimateCCtxSize(winners[cLevel].params);
             double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
             double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);
 
-            double W_CSpeed_note = W_ratioNote * ( 30 + 10*cLevel) + log((double)testResult.cSpeed);
-            double O_CSpeed_note = O_ratioNote * ( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed);
-
-            double W_DSpeed_note = W_ratioNote * ( 20 + 2*cLevel) + log((double)testResult.dSpeed);
-            double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed);
+            double W_CSpeed_note = W_ratioNote * ( 30 + 10*cLevel) + log(testResult.cSpeed);
+            double O_CSpeed_note = O_ratioNote * ( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed);
 
+            double W_DSpeed_note = W_ratioNote * ( 20 + 2*cLevel) + log(testResult.dSpeed);
+            double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed);
 
             if (W_DMemUsed_note < O_DMemUsed_note) {
                 /* uses too much Decompression memory for too little benefit */
@@ -474,16 +422,16 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para
                 /* too large compression speed difference for the compression benefit */
                 if (W_ratio > O_ratio)
                 DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
-                         W_ratio, (double)(testResult.cSpeed) / 1000.,
-                         O_ratio, (double)(winners[cLevel].result.cSpeed) / 1000.,   cLevel);
+                         W_ratio, testResult.cSpeed / 1000000,
+                         O_ratio, winners[cLevel].result.cSpeed / 1000000.,   cLevel);
                 continue;
             }
             if (W_DSpeed_note   < O_DSpeed_note  ) {
                 /* too large decompression speed difference for the compression benefit */
                 if (W_ratio > O_ratio)
                 DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
-                         W_ratio, (double)(testResult.dSpeed) / 1000.,
-                         O_ratio, (double)(winners[cLevel].result.dSpeed) / 1000.,   cLevel);
+                         W_ratio, testResult.dSpeed / 1000000.,
+                         O_ratio, winners[cLevel].result.dSpeed / 1000000.,   cLevel);
                 continue;
             }
 
@@ -507,6 +455,8 @@ static ZSTD_compressionParameters* sanitizeParams(ZSTD_compressionParameters par
     g_params = params;
     if (params.strategy == ZSTD_fast)
         g_params.chainLog = 0, g_params.searchLog = 0;
+    if (params.strategy == ZSTD_dfast)
+        g_params.searchLog = 0;
     if (params.strategy != ZSTD_btopt )
         g_params.targetLength = 0;
     return &g_params;
@@ -577,9 +527,9 @@ static void playAround(FILE* f, winnerInfo_t* winners,
                        ZSTD_CCtx* ctx)
 {
     int nbVariations = 0;
-    const int startTime = BMK_GetMilliStart();
+    clock_t const clockStart = clock();
 
-    while (BMK_GetMilliSpan(startTime) < g_maxVariationTime) {
+    while (BMK_clockSpan(clockStart) < g_maxVariationTime) {
         ZSTD_compressionParameters p = params;
 
         if (nbVariations++ > g_maxNbVariations) break;
@@ -637,15 +587,18 @@ static void BMK_selectRandomStart(
 
 static void BMK_benchMem(void* srcBuffer, size_t srcSize)
 {
-    ZSTD_CCtx* ctx = ZSTD_createCCtx();
+    ZSTD_CCtx* const ctx = ZSTD_createCCtx();
     ZSTD_compressionParameters params;
     winnerInfo_t winners[NB_LEVELS_TRACKED];
-    int i;
-    unsigned u;
-    const char* rfName = "grillResults.txt";
-    FILE* f;
+    const char* const rfName = "grillResults.txt";
+    FILE* const f = fopen(rfName, "w");
     const size_t blockSize = g_blockSize ? g_blockSize : srcSize;
 
+    /* init */
+    if (ctx==NULL) { DISPLAY("ZSTD_createCCtx() failed \n"); exit(1); }
+    memset(winners, 0, sizeof(winners));
+    if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
+
     if (g_singleRun) {
         BMK_result_t testResult;
         g_params = ZSTD_adjustCParams(g_params, srcSize, 0);
@@ -654,41 +607,36 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
         return;
     }
 
-    /* init */
-    memset(winners, 0, sizeof(winners));
-    f = fopen(rfName, "w");
-    if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
-
     if (g_target)
-        g_cSpeedTarget[1] = g_target * 1000;
+        g_cSpeedTarget[1] = g_target * 1000000;
     else {
         /* baseline config for level 1 */
         BMK_result_t testResult;
         params = ZSTD_getCParams(1, blockSize, 0);
         BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params);
-        g_cSpeedTarget[1] = (testResult.cSpeed * 31) >> 5;
+        g_cSpeedTarget[1] = (testResult.cSpeed * 31) / 32;
     }
 
     /* establish speed objectives (relative to level 1) */
-    for (u=2; u<=ZSTD_maxCLevel(); u++)
-        g_cSpeedTarget[u] = (g_cSpeedTarget[u-1] * 25) >> 5;
+    {   unsigned u;
+        for (u=2; u<=ZSTD_maxCLevel(); u++)
+            g_cSpeedTarget[u] = (g_cSpeedTarget[u-1] * 25) / 32;
+    }
 
     /* populate initial solution */
-    {
-        const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
-        for (i=1; i<=maxSeeds; i++) {
+    {   const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
+        int i;
+        for (i=0; i<=maxSeeds; i++) {
             params = ZSTD_getCParams(i, blockSize, 0);
             BMK_seed(winners, params, srcBuffer, srcSize, ctx);
-        }
-    }
+    }   }
     BMK_printWinners(f, winners, srcSize);
 
     /* start tests */
-    {
-        const int milliStart = BMK_GetMilliStart();
+    {   const time_t grillStart = time(NULL);
         do {
             BMK_selectRandomStart(f, winners, srcBuffer, srcSize, ctx);
-        } while (BMK_GetMilliSpan(milliStart) < g_grillDuration);
+        } while (BMK_timeSpan(grillStart) < g_grillDuration_s);
     }
 
     /* end summary */
@@ -704,8 +652,8 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
 static int benchSample(void)
 {
     void* origBuff;
-    size_t benchedSize = sampleSize;
-    const char* name = "Sample 10MiB";
+    size_t const benchedSize = sampleSize;
+    const char* const name = "Sample 10MiB";
 
     /* Allocation */
     origBuff = malloc(benchedSize);
@@ -724,37 +672,31 @@ static int benchSample(void)
 }
 
 
-int benchFiles(char** fileNamesTable, int nbFiles)
+int benchFiles(const char** fileNamesTable, int nbFiles)
 {
     int fileIdx=0;
 
     /* Loop for each file */
     while (fileIdx<nbFiles) {
-        FILE* inFile;
-        char* inFileName;
-        U64   inFileSize;
+        const char* const inFileName = fileNamesTable[fileIdx++];
+        FILE* const inFile = fopen( inFileName, "rb" );
+        U64 const inFileSize = UTIL_getFileSize(inFileName);
         size_t benchedSize;
-        size_t readSize;
-        char* origBuff;
+        void* origBuff;
 
         /* Check file existence */
-        inFileName = fileNamesTable[fileIdx++];
-        inFile = fopen( inFileName, "rb" );
         if (inFile==NULL) {
             DISPLAY( "Pb opening %s\n", inFileName);
             return 11;
         }
 
-        /* Memory allocation & restrictions */
-        inFileSize = UTIL_getFileSize(inFileName);
+        /* Memory allocation */
         benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
         if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
         if (benchedSize < inFileSize)
             DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
-
-        /* Alloc */
-        origBuff = (char*) malloc((size_t)benchedSize);
-        if(!origBuff) {
+        origBuff = malloc(benchedSize);
+        if (origBuff==NULL) {
             DISPLAY("\nError: not enough memory!\n");
             fclose(inFile);
             return 12;
@@ -762,49 +704,44 @@ int benchFiles(char** fileNamesTable, int nbFiles)
 
         /* Fill input buffer */
         DISPLAY("Loading %s...       \r", inFileName);
-        readSize = fread(origBuff, 1, benchedSize, inFile);
-        fclose(inFile);
-
-        if(readSize != benchedSize) {
-            DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
-            free(origBuff);
-            return 13;
-        }
+        {   size_t const readSize = fread(origBuff, 1, benchedSize, inFile);
+            fclose(inFile);
+            if(readSize != benchedSize) {
+                DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+                free(origBuff);
+                return 13;
+        }   }
 
         /* bench */
         DISPLAY("\r%79s\r", "");
         DISPLAY("using %s : \n", inFileName);
         BMK_benchMem(origBuff, benchedSize);
+
+        /* clean */
+        free(origBuff);
     }
 
     return 0;
 }
 
 
-int optimizeForSize(char* inFileName)
+int optimizeForSize(const char* inFileName, U32 targetSpeed)
 {
-    FILE* inFile;
-    U64   inFileSize;
-    size_t benchedSize;
-    size_t readSize;
-    char* origBuff;
+    FILE* const inFile = fopen( inFileName, "rb" );
+    U64 const inFileSize = UTIL_getFileSize(inFileName);
+    size_t benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
+    void* origBuff;
 
-    /* Check file existence */
-    inFile = fopen( inFileName, "rb" );
-    if (inFile==NULL) {
-        DISPLAY( "Pb opening %s\n", inFileName);
-        return 11;
-    }
+    /* Init */
+    if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; }
 
     /* Memory allocation & restrictions */
-    inFileSize = UTIL_getFileSize(inFileName);
-    benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
     if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
     if (benchedSize < inFileSize)
         DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
 
     /* Alloc */
-    origBuff = (char*) malloc((size_t)benchedSize);
+    origBuff = malloc(benchedSize);
     if(!origBuff) {
         DISPLAY("\nError: not enough memory!\n");
         fclose(inFile);
@@ -813,39 +750,40 @@ int optimizeForSize(char* inFileName)
 
     /* Fill input buffer */
     DISPLAY("Loading %s...       \r", inFileName);
-    readSize = fread(origBuff, 1, benchedSize, inFile);
-    fclose(inFile);
-
-    if(readSize != benchedSize) {
-        DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
-        free(origBuff);
-        return 13;
-    }
+    {   size_t const readSize = fread(origBuff, 1, benchedSize, inFile);
+        fclose(inFile);
+        if(readSize != benchedSize) {
+            DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
+            free(origBuff);
+            return 13;
+    }   }
 
     /* bench */
     DISPLAY("\r%79s\r", "");
-    DISPLAY("optimizing for %s : \n", inFileName);
+    DISPLAY("optimizing for %s - limit speed %u MB/s \n", inFileName, targetSpeed);
+    targetSpeed *= 1000;
 
-    {
-        ZSTD_CCtx* ctx = ZSTD_createCCtx();
+    {   ZSTD_CCtx* const ctx = ZSTD_createCCtx();
         ZSTD_compressionParameters params;
         winnerInfo_t winner;
         BMK_result_t candidate;
         const size_t blockSize = g_blockSize ? g_blockSize : benchedSize;
-        int i;
 
         /* init */
+        if (ctx==NULL) { DISPLAY("\n ZSTD_createCCtx error \n"); free(origBuff); return 14;}
         memset(&winner, 0, sizeof(winner));
         winner.result.cSize = (size_t)(-1);
 
         /* find best solution from default params */
-        {
-            const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
+        {   const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
+            int i;
             for (i=1; i<=maxSeeds; i++) {
                 params = ZSTD_getCParams(i, blockSize, 0);
                 BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params);
+                if (candidate.cSpeed < targetSpeed)
+                    break;
                 if ( (candidate.cSize < winner.result.cSize)
-                   ||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) )
+                   | ((candidate.cSize == winner.result.cSize) & (candidate.cSpeed > winner.result.cSpeed)) )
                 {
                     winner.params = params;
                     winner.result = candidate;
@@ -855,12 +793,11 @@ int optimizeForSize(char* inFileName)
         BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
 
         /* start tests */
-        {
-            const int milliStart = BMK_GetMilliStart();
+        {   time_t const grillStart = time(NULL);
             do {
                 params = winner.params;
                 paramVariation(&params);
-                if ((FUZ_rand(&g_rand) & 15) == 1) params = randomParams();
+                if ((FUZ_rand(&g_rand) & 15) == 3) params = randomParams();
 
                 /* exclude faster if already played set of params */
                 if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(params))-1)) continue;
@@ -870,13 +807,15 @@ int optimizeForSize(char* inFileName)
                 BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params);
 
                 /* improvement found => new winner */
-                if ( (candidate.cSize < winner.result.cSize)
-                   ||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) ) {
+                if ( (candidate.cSpeed > targetSpeed)
+                   & ( (candidate.cSize < winner.result.cSize)
+                     | ((candidate.cSize == winner.result.cSize) & (candidate.cSpeed > winner.result.cSpeed)) )  )
+                {
                     winner.params = params;
                     winner.result = candidate;
                     BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
                 }
-            } while (BMK_GetMilliSpan(milliStart) < g_grillDuration);
+            } while (BMK_timeSpan(grillStart) < g_grillDuration_s);
         }
 
         /* end summary */
@@ -887,11 +826,12 @@ int optimizeForSize(char* inFileName)
         ZSTD_freeCCtx(ctx);
     }
 
+    free(origBuff);
     return 0;
 }
 
 
-static int usage(char* exename)
+static int usage(const char* exename)
 {
     DISPLAY( "Usage :\n");
     DISPLAY( "      %s [arg] file\n", exename);
@@ -904,29 +844,32 @@ static int usage(char* exename)
 static int usage_advanced(void)
 {
     DISPLAY( "\nAdvanced options :\n");
-    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
-    DISPLAY( " -B#    : cut input into blocks of size # (default : single block)\n");
-    DISPLAY( " -P#    : generated sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
-    DISPLAY( " -S     : Single run\n");
+    DISPLAY( " -T#    : set level 1 speed objective \n");
+    DISPLAY( " -B#    : cut input into blocks of size # (default : single block) \n");
+    DISPLAY( " -i#    : iteration loops [1-9](default : %i) \n", NBLOOPS);
+    DISPLAY( " -O#    : find Optimized parameters for # target speed (default : 0) \n");
+    DISPLAY( " -S     : Single run \n");
+    DISPLAY( " -P#    : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100);
     return 0;
 }
 
-static int badusage(char* exename)
+static int badusage(const char* exename)
 {
     DISPLAY("Wrong parameters\n");
     usage(exename);
     return 1;
 }
 
-int main(int argc, char** argv)
+int main(int argc, const char** argv)
 {
     int i,
         filenamesStart=0,
         result;
-    char* exename=argv[0];
-    char* input_filename=0;
+    const char* exename=argv[0];
+    const char* input_filename=0;
     U32 optimizer = 0;
     U32 main_pause = 0;
+    U32 targetSpeed = 0;
 
     /* checks */
     if (NB_LEVELS_TRACKED <= ZSTD_maxCLevel()) {
@@ -940,7 +883,7 @@ int main(int argc, char** argv)
     if (argc<1) { badusage(exename); return 1; }
 
     for(i=1; i<argc; i++) {
-        char* argument = argv[i];
+        const char* argument = argv[i];
 
         if(!argument) continue;   /* Protection if argument empty */
 
@@ -964,7 +907,7 @@ int main(int argc, char** argv)
                     /* Modify Nb Iterations */
                 case 'i':
                     argument++;
-                    if ((argument[0] >='0') && (argument[0] <='9'))
+                    if ((argument[0] >='0') & (argument[0] <='9'))
                         g_nbIterations = *argument++ - '0';
                     break;
 
@@ -972,7 +915,7 @@ int main(int argc, char** argv)
                 case 'P':
                     argument++;
                     {   U32 proba32 = 0;
-                        while ((argument[0]>= '0') && (argument[0]<= '9'))
+                        while ((argument[0]>= '0') & (argument[0]<= '9'))
                             proba32 = (proba32*10) + (*argument++ - '0');
                         g_compressibility = (double)proba32 / 100.;
                     }
@@ -981,6 +924,9 @@ int main(int argc, char** argv)
                 case 'O':
                     argument++;
                     optimizer=1;
+                    targetSpeed = 0;
+                    while ((*argument >= '0') & (*argument <= '9'))
+                        targetSpeed = (targetSpeed*10) + (*argument++ - '0');
                     break;
 
                     /* Run Single conf */
@@ -1058,7 +1004,7 @@ int main(int argc, char** argv)
                 case 'B':
                     g_blockSize = 0;
                     argument++;
-                    while ((*argument >='0') && (*argument <='9'))
+                    while ((*argument >='0') & (*argument <='9'))
                         g_blockSize = (g_blockSize*10) + (*argument++ - '0');
                     if (*argument=='K') g_blockSize<<=10, argument++;  /* allows using KB notation */
                     if (*argument=='M') g_blockSize<<=20, argument++;
@@ -1081,7 +1027,7 @@ int main(int argc, char** argv)
         result = benchSample();
     else {
         if (optimizer)
-            result = optimizeForSize(input_filename);
+            result = optimizeForSize(input_filename, targetSpeed);
         else
             result = benchFiles(argv+filenamesStart, argc-filenamesStart);
     }
diff --git a/programs/zbufftest.c b/programs/zbufftest.c
index 41dfa3384..3e36d015f 100644
--- a/programs/zbufftest.c
+++ b/programs/zbufftest.c
@@ -381,13 +381,9 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
             {   size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
                 dict = srcBuffer + dictStart;
             }
-            {   ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize);
-                U32 const checksum = FUZ_rand(&lseed) & 1;
-                U32 const noDictIDFlag = FUZ_rand(&lseed) & 1;
-                ZSTD_frameParameters const fPar = { 0, checksum, noDictIDFlag };
-                ZSTD_parameters params;
-                params.cParams = cPar;
-                params.fParams = fPar;
+            {   ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize);
+                params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
+                params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
                 {   size_t const initError = ZBUFF_compressInit_advanced(zc, dict, dictSize, params, 0);
                     CHECK (ZBUFF_isError(initError),"init error : %s", ZBUFF_getErrorName(initError));
         }   }   }
diff --git a/programs/zstd.1 b/programs/zstd.1
index d7760f78f..7201f76c3 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -33,12 +33,12 @@ It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages.
 It also features a very fast decoder, with speed > 500 MB/s per core.
 
 \fBzstd\fR command line is generally similar to gzip, but features the following differences :
- - Original files are preserved
+ - Source files are preserved by default
+   It's possible to remove them automatically by using \fB--rm\fR command
  - By default, when compressing a single file, \fBzstd\fR displays progress notifications and result summary.
      Use \fB-q\fR to turn them off
 
 
-\fBzstd\fR supports the following options :
 
 .SH OPTIONS
 .TP
@@ -57,6 +57,19 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
 .BR \-f ", " --force
  overwrite output without prompting
 .TP
+.BR \-c ", " --stdout
+ force write to standard output, even if it is the console
+.TP
+.BR \--rm
+ remove source file(s) after successful compression or decompression
+.TP
+.BR \-k ", " --keep
+ keep source file(s) after successful compression or decompression.
+ This is the default behavior.
+.TP
+.BR \-r
+ operate recursively on directories
+.TP
 .BR \-h/\-H ", " --help
  display help/long help and exit
 .TP
@@ -69,14 +82,11 @@ It also features a very fast decoder, with speed > 500 MB/s per core.
 .BR \-q ", " --quiet
  suppress warnings and notifications; specify twice to suppress errors too
 .TP
-.BR \-c ", " --stdout
- force write to standard output, even if it is the console
-.TP
 .BR \-C ", " --check
  add integrity check computed from uncompressed data
 .TP
 .BR \-t ", " --test
- Test the integrity of compressed files.  This option is equivalent to \fB--decompress --stdout > /dev/null\fR.
+ Test the integrity of compressed files. This option is equivalent to \fB--decompress --stdout > /dev/null\fR.
  No files are created or removed.
 
 .SH DICTIONARY
@@ -121,9 +131,6 @@ Typical gains range from ~10% (at 64KB) to x5 better (at <1KB).
 .TP
 .B \-B#
  cut file into independent blocks of size # (default: no block)
-.TP
-.B \-r#
- test all compression levels from 1 to # (default: disabled)
 
 
 .SH BUGS
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index bf40dadf9..4fa802698 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -28,12 +28,21 @@
 */
 
 
+/*-************************************
+*  Tuning parameters
+**************************************/
+#ifndef ZSTDCLI_CLEVEL_DEFAULT
+#  define ZSTDCLI_CLEVEL_DEFAULT 3
+#endif
+
+
 /*-************************************
 *  Includes
 **************************************/
 #include "util.h"     /* Compiler options, UTIL_HAS_CREATEFILELIST */
 #include <string.h>   /* strcmp, strlen */
 #include <ctype.h>    /* toupper */
+#include <errno.h>    /* errno */
 #include "fileio.h"
 #ifndef ZSTD_NOBENCH
 #  include "bench.h"  /* BMK_benchFiles, BMK_SetNbIterations */
@@ -45,7 +54,6 @@
 #include "zstd.h"     /* ZSTD_VERSION_STRING */
 
 
-
 /*-************************************
 *  OS-specific Includes
 **************************************/
@@ -53,12 +61,12 @@
 #  include <io.h>       /* _isatty */
 #  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
 #else
-#if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_SOURCE)
-#  include <unistd.h>   /* isatty */
-#  define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
-#else
-#  define IS_CONSOLE(stdStream) 0
-#endif
+#  if defined(_POSIX_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_SOURCE)
+#    include <unistd.h>   /* isatty */
+#    define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#  else
+#    define IS_CONSOLE(stdStream) 0
+#  endif
 #endif
 
 
@@ -115,6 +123,8 @@ static int usage(const char* programName)
     DISPLAY( " -D file: use `file` as Dictionary \n");
     DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n");
     DISPLAY( " -f     : overwrite output without prompting \n");
+    DISPLAY( "--rm    : remove source file(s) after successful de/compression \n");
+    DISPLAY( " -k     : preserve source file(s) (default) \n");
     DISPLAY( " -h/-H  : display help/long help and exit\n");
     return 0;
 }
@@ -132,7 +142,6 @@ static int usage_advanced(const char* programName)
 #ifdef UTIL_HAS_CREATEFILELIST
     DISPLAY( " -r     : operate recursively on directories\n");
 #endif
-    DISPLAY( "--rm    : remove source files after successful de/compression \n");
 #ifndef ZSTD_NOCOMPRESS
     DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n");
     DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n");
@@ -169,7 +178,6 @@ static int badusage(const char* programName)
     return 1;
 }
 
-
 static void waitEnter(void)
 {
     int unused;
@@ -181,7 +189,7 @@ static void waitEnter(void)
 /*! readU32FromChar() :
     @return : unsigned integer value reach from input in `char` format
     Will also modify `*stringPtr`, advancing it to position where it stopped reading.
-    Note : this function can overflow if result > MAX_UNIT */
+    Note : this function can overflow if result > MAX_UINT */
 static unsigned readU32FromChar(const char** stringPtr)
 {
     unsigned result = 0;
@@ -205,8 +213,9 @@ int main(int argCount, const char** argv)
         dictBuild=0,
         nextArgumentIsOutFileName=0,
         nextArgumentIsMaxDict=0,
-        nextArgumentIsDictID=0;
-    unsigned cLevel = 1;
+        nextArgumentIsDictID=0,
+        nextArgumentIsFile=0;
+    unsigned cLevel = ZSTDCLI_CLEVEL_DEFAULT;
     unsigned cLevelLast = 1;
     unsigned recursive = 0;
     const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));   /* argCount >= 1 */
@@ -229,7 +238,7 @@ int main(int argCount, const char** argv)
     (void)recursive; (void)cLevelLast;    /* not used when ZSTD_NOBENCH set */
     (void)dictCLevel; (void)dictSelect; (void)dictID;  /* not used when ZSTD_NODICT set */
     (void)decode; (void)cLevel; /* not used when ZSTD_NOCOMPRESS set */
-    if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); }
+    if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
     filenameTable[0] = stdinmark;
     displayOut = stderr;
     /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
@@ -247,142 +256,165 @@ int main(int argCount, const char** argv)
         const char* argument = argv[argNb];
         if(!argument) continue;   /* Protection if argument empty */
 
-        /* long commands (--long-word) */
-        if (!strcmp(argument, "--decompress")) { decode=1; continue; }
-        if (!strcmp(argument, "--force")) {  FIO_overwriteMode(); continue; }
-        if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
-        if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
-        if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; }
-        if (!strcmp(argument, "--quiet")) { displayLevel--; continue; }
-        if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; }
-        if (!strcmp(argument, "--ultra")) { FIO_setMaxWLog(0); continue; }
-        if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; }
-        if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; }
-        if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
-        if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
-        if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
-        if (!strcmp(argument, "--test")) { decode=1; outFileName=nulmark; FIO_overwriteMode(); continue; }
-        if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
-        if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
-        if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; continue; }
-        if (!strcmp(argument, "--keep")) { continue; }   /* does nothing, since preserving input is default; for gzip/xz compatibility */
-        if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
+        if (nextArgumentIsFile==0) {
 
-        /* '-' means stdin/stdout */
-        if (!strcmp(argument, "-")){
-            if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; outFileName=stdoutmark; continue; }
-        }
+            /* long commands (--long-word) */
+            if (!strcmp(argument, "--")) { nextArgumentIsFile=1; continue; }
+            if (!strcmp(argument, "--decompress")) { decode=1; continue; }
+            if (!strcmp(argument, "--force")) {  FIO_overwriteMode(); continue; }
+            if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
+            if (!strcmp(argument, "--help")) { displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
+            if (!strcmp(argument, "--verbose")) { displayLevel++; continue; }
+            if (!strcmp(argument, "--quiet")) { displayLevel--; continue; }
+            if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); continue; }
+            if (!strcmp(argument, "--ultra")) { FIO_setMaxWLog(0); continue; }
+            if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; }
+            if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; }
+            if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; }
+            if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; }
+            if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(0); continue; }
+            if (!strcmp(argument, "--test")) { decode=1; outFileName=nulmark; FIO_overwriteMode(); continue; }
+            if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
+            if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
+            if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; continue; }
+            if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; }
+            if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
 
-        /* Decode commands (note : aggregated commands are allowed) */
-        if (argument[0]=='-') {
-            argument++;
-
-            while (argument[0]!=0) {
-#ifndef ZSTD_NOCOMPRESS
-                /* compression Level */
-                if ((*argument>='0') && (*argument<='9')) {
-                    cLevel = readU32FromChar(&argument);
-                    dictCLevel = cLevel;
-                    if (dictCLevel > ZSTD_maxCLevel())
-                        CLEAN_RETURN(badusage(programName));
+            /* '-' means stdin/stdout */
+            if (!strcmp(argument, "-")){
+                if (!filenameIdx) {
+                    filenameIdx=1, filenameTable[0]=stdinmark;
+                    outFileName=stdoutmark;
+                    displayLevel-=(displayLevel==2);
                     continue;
-                }
-#endif
+            }   }
 
-                switch(argument[0])
-                {
-                    /* Display help */
-                case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0);   /* Version Only */
-                case 'H':
-                case 'h': displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
+            /* Decode commands (note : aggregated commands are allowed) */
+            if (argument[0]=='-') {
+                argument++;
 
-                     /* Decoding */
-                case 'd': decode=1; argument++; break;
+                while (argument[0]!=0) {
+    #ifndef ZSTD_NOCOMPRESS
+                    /* compression Level */
+                    if ((*argument>='0') && (*argument<='9')) {
+                        cLevel = readU32FromChar(&argument);
+                        dictCLevel = cLevel;
+                        if (dictCLevel > ZSTD_maxCLevel())
+                            CLEAN_RETURN(badusage(programName));
+                        continue;
+                    }
+    #endif
 
-                    /* Force stdout, even if stdout==console */
-                case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
+                    switch(argument[0])
+                    {
+                        /* Display help */
+                    case 'V': displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0);   /* Version Only */
+                    case 'H':
+                    case 'h': displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
 
-                    /* Use file content as dictionary */
-                case 'D': nextEntryIsDictionary = 1; argument++; break;
+                         /* Decoding */
+                    case 'd': decode=1; argument++; break;
 
-                    /* Overwrite */
-                case 'f': FIO_overwriteMode(); forceStdout=1; argument++; break;
+                        /* Force stdout, even if stdout==console */
+                    case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); argument++; break;
 
-                    /* Verbose mode */
-                case 'v': displayLevel=4; argument++; break;
+                        /* Use file content as dictionary */
+                    case 'D': nextEntryIsDictionary = 1; argument++; break;
 
-                    /* Quiet mode */
-                case 'q': displayLevel--; argument++; break;
+                        /* Overwrite */
+                    case 'f': FIO_overwriteMode(); forceStdout=1; argument++; break;
 
-                    /* keep source file (default anyway, so useless; for gzip/xz compatibility) */
-                case 'k': argument++; break;
+                        /* Verbose mode */
+                    case 'v': displayLevel++; argument++; break;
 
-                    /* Checksum */
-                case 'C': argument++; FIO_setChecksumFlag(2); break;
+                        /* Quiet mode */
+                    case 'q': displayLevel--; argument++; break;
 
-                    /* test compressed file */
-                case 't': decode=1; outFileName=nulmark; argument++; break;
+                        /* keep source file (default); for gzip/xz compatibility */
+                    case 'k': FIO_setRemoveSrcFile(0); argument++; break;
 
-                    /* dictionary name */
-                case 'o': nextArgumentIsOutFileName=1; argument++; break;
+                        /* Checksum */
+                    case 'C': argument++; FIO_setChecksumFlag(2); break;
 
-                    /* recursive */
-                case 'r': recursive=1; argument++; break;
+                        /* test compressed file */
+                    case 't': decode=1; outFileName=nulmark; argument++; break;
 
-#ifndef ZSTD_NOBENCH
-                    /* Benchmark */
-                case 'b': bench=1; argument++; break;
+                        /* destination file name */
+                    case 'o': nextArgumentIsOutFileName=1; argument++; break;
 
-                    /* range bench (benchmark only) */
-                case 'e':
-                        /* compression Level */
+                        /* recursive */
+                    case 'r': recursive=1; argument++; break;
+
+    #ifndef ZSTD_NOBENCH
+                        /* Benchmark */
+                    case 'b': bench=1; argument++; break;
+
+                        /* range bench (benchmark only) */
+                    case 'e':
+                            /* compression Level */
+                            argument++;
+                            cLevelLast = readU32FromChar(&argument);
+                            break;
+
+                        /* Modify Nb Iterations (benchmark only) */
+                    case 'i':
                         argument++;
-                        cLevelLast = readU32FromChar(&argument);
+                        {   U32 const iters = readU32FromChar(&argument);
+                            BMK_setNotificationLevel(displayLevel);
+                            BMK_SetNbIterations(iters);
+                        }
                         break;
 
-                    /* Modify Nb Iterations (benchmark only) */
-                case 'i':
-                    argument++;
-                    {   U32 const iters = readU32FromChar(&argument);
-                        BMK_setNotificationLevel(displayLevel);
-                        BMK_SetNbIterations(iters);
+                        /* cut input into blocks (benchmark only) */
+                    case 'B':
+                        argument++;
+                        {   size_t bSize = readU32FromChar(&argument);
+                            if (toupper(*argument)=='K') bSize<<=10, argument++;  /* allows using KB notation */
+                            if (toupper(*argument)=='M') bSize<<=20, argument++;
+                            if (toupper(*argument)=='B') argument++;
+                            BMK_setNotificationLevel(displayLevel);
+                            BMK_SetBlockSize(bSize);
+                        }
+                        break;
+    #endif   /* ZSTD_NOBENCH */
+
+                        /* Dictionary Selection level */
+                    case 's':
+                        argument++;
+                        dictSelect = readU32FromChar(&argument);
+                        break;
+
+                        /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
+                    case 'p': argument++;
+    #ifndef ZSTD_NOBENCH
+                        if ((*argument>='0') && (*argument<='9')) {
+                            BMK_setAdditionalParam(readU32FromChar(&argument));
+                        } else
+    #endif
+                            main_pause=1;
+                        break;
+                        /* unknown command */
+                    default : CLEAN_RETURN(badusage(programName));
                     }
-                    break;
-
-                    /* cut input into blocks (benchmark only) */
-                case 'B':
-                    argument++;
-                    {   size_t bSize = readU32FromChar(&argument);
-                        if (toupper(*argument)=='K') bSize<<=10, argument++;  /* allows using KB notation */
-                        if (toupper(*argument)=='M') bSize<<=20, argument++;
-                        if (toupper(*argument)=='B') argument++;
-                        BMK_setNotificationLevel(displayLevel);
-                        BMK_SetBlockSize(bSize);
-                    }
-                    break;
-#endif   /* ZSTD_NOBENCH */
-
-                    /* Dictionary Selection level */
-                case 's':
-                    argument++;
-                    dictSelect = readU32FromChar(&argument);
-                    break;
-
-                    /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
-                case 'p': argument++;
-#ifndef ZSTD_NOBENCH
-                    if ((*argument>='0') && (*argument<='9')) {
-                        BMK_setAdditionalParam(readU32FromChar(&argument));
-                    } else
-#endif
-                        main_pause=1;
-                    break;
-                    /* unknown command */
-                default : CLEAN_RETURN(badusage(programName));
                 }
+                continue;
+            }   /* if (argument[0]=='-') */
+
+            if (nextArgumentIsMaxDict) {
+                nextArgumentIsMaxDict = 0;
+                maxDictSize = readU32FromChar(&argument);
+                if (toupper(*argument)=='K') maxDictSize <<= 10;
+                if (toupper(*argument)=='M') maxDictSize <<= 20;
+                continue;
             }
-            continue;
-        }   /* if (argument[0]=='-') */
+
+            if (nextArgumentIsDictID) {
+                nextArgumentIsDictID = 0;
+                dictID = readU32FromChar(&argument);
+                continue;
+            }
+
+        }   /* if (nextArgumentIsAFile==0) */
 
         if (nextEntryIsDictionary) {
             nextEntryIsDictionary = 0;
@@ -397,20 +429,6 @@ int main(int argCount, const char** argv)
             continue;
         }
 
-        if (nextArgumentIsMaxDict) {
-            nextArgumentIsMaxDict = 0;
-            maxDictSize = readU32FromChar(&argument);
-            if (toupper(*argument)=='K') maxDictSize <<= 10;
-            if (toupper(*argument)=='M') maxDictSize <<= 20;
-            continue;
-        }
-
-        if (nextArgumentIsDictID) {
-            nextArgumentIsDictID = 0;
-            dictID = readU32FromChar(&argument);
-            continue;
-        }
-
         /* add filename to list */
         filenameTable[filenameIdx++] = argument;
     }
@@ -444,6 +462,7 @@ int main(int argCount, const char** argv)
     if (dictBuild) {
 #ifndef ZSTD_NODICT
         ZDICT_params_t dictParams;
+        memset(&dictParams, 0, sizeof(dictParams));
         dictParams.compressionLevel = dictCLevel;
         dictParams.selectivityLevel = dictSelect;
         dictParams.notificationLevel = displayLevel;
diff --git a/projects/README.md b/projects/README.md
index b6831ce29..c2fa7478f 100644
--- a/projects/README.md
+++ b/projects/README.md
@@ -1,4 +1,4 @@
-projects for various integrated development environments (IDE) 
+projects for various integrated development environments (IDE)
 ================================
 
 #### Included projects
@@ -7,3 +7,4 @@ The following projects are included with the zstd distribution:
 - cmake - CMake project contributed by Artyom Dymchenko
 - VS2008 - Visual Studio 2008 project
 - VS2010 - Visual Studio 2010 project (which also works well with Visual Studio 2012, 2013, 2015)
+- build - command line scripts prepared for Visual Studio compilation without IDE
diff --git a/projects/VS2008/fullbench/fullbench.vcproj b/projects/VS2008/fullbench/fullbench.vcproj
index 50cbcc2cf..b66953954 100644
--- a/projects/VS2008/fullbench/fullbench.vcproj
+++ b/projects/VS2008/fullbench/fullbench.vcproj
@@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -120,7 +120,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -194,7 +194,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -271,7 +271,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -427,7 +427,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd.h"
+				RelativePath="..\..\..\lib\zstd.h"
 				>
 			</File>
 			<File
diff --git a/projects/VS2008/fuzzer/fuzzer.vcproj b/projects/VS2008/fuzzer/fuzzer.vcproj
index ab0bab2c6..b88ae6dfa 100644
--- a/projects/VS2008/fuzzer/fuzzer.vcproj
+++ b/projects/VS2008/fuzzer/fuzzer.vcproj
@@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -120,7 +120,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -194,7 +194,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -271,7 +271,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -439,7 +439,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd.h"
+				RelativePath="..\..\..\lib\zstd.h"
 				>
 			</File>
 			<File
diff --git a/projects/VS2008/zstd/zstd.vcproj b/projects/VS2008/zstd/zstd.vcproj
index b9b0d1ec6..129192a88 100644
--- a/projects/VS2008/zstd/zstd.vcproj
+++ b/projects/VS2008/zstd/zstd.vcproj
@@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -121,7 +121,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -196,7 +196,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -274,7 +274,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -495,7 +495,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd.h"
+				RelativePath="..\..\..\lib\zstd.h"
 				>
 			</File>
 			<File
diff --git a/projects/VS2008/zstdlib/zstdlib.vcproj b/projects/VS2008/zstdlib/zstdlib.vcproj
index 2051da585..db596b432 100644
--- a/projects/VS2008/zstdlib/zstdlib.vcproj
+++ b/projects/VS2008/zstdlib/zstdlib.vcproj
@@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;ZSTD_LEGACY_SUPPORT=0;WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -120,7 +120,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;ZSTD_LEGACY_SUPPORT=0;WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -194,7 +194,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;ZSTD_LEGACY_SUPPORT=0;WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -271,7 +271,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
 				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_HEAPMODE=0;ZSTD_LEGACY_SUPPORT=0;WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -443,7 +443,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd.h"
+				RelativePath="..\..\..\lib\zstd.h"
 				>
 			</File>
 			<File
diff --git a/projects/VS2010/datagen/datagen.vcxproj.filters b/projects/VS2010/datagen/datagen.vcxproj.filters
deleted file mode 100644
index 1ebbd6b03..000000000
--- a/projects/VS2010/datagen/datagen.vcxproj.filters
+++ /dev/null
@@ -1,26 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\programs\datagen.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\datagencli.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\programs\datagen.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/projects/VS2010/fullbench/fullbench.vcxproj b/projects/VS2010/fullbench/fullbench.vcxproj
index 0cd32d86d..159a58d5a 100644
--- a/projects/VS2010/fullbench/fullbench.vcxproj
+++ b/projects/VS2010/fullbench/fullbench.vcxproj
@@ -65,24 +65,24 @@
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@@ -175,7 +175,7 @@
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\common\zbuff.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
@@ -185,4 +185,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/projects/VS2010/fullbench/fullbench.vcxproj.filters b/projects/VS2010/fullbench/fullbench.vcxproj.filters
deleted file mode 100644
index a81b2511d..000000000
--- a/projects/VS2010/fullbench/fullbench.vcxproj.filters
+++ /dev/null
@@ -1,86 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\lib\common\zstd_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\fse_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\fullbench.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\datagen.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\huf_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zbuff_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zbuff_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\fse_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\entropy_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\xxhash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\lib\common\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\datagen.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\huf.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zbuff.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\util.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\xxhash.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/projects/VS2010/fuzzer/fuzzer.vcxproj b/projects/VS2010/fuzzer/fuzzer.vcxproj
index 560525757..5c8d800bf 100644
--- a/projects/VS2010/fuzzer/fuzzer.vcxproj
+++ b/projects/VS2010/fuzzer/fuzzer.vcxproj
@@ -66,24 +66,24 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
     <RunCodeAnalysis>false</RunCodeAnalysis>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
     <RunCodeAnalysis>false</RunCodeAnalysis>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <RunCodeAnalysis>false</RunCodeAnalysis>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <RunCodeAnalysis>false</RunCodeAnalysis>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
@@ -176,7 +176,7 @@
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\common\zbuff.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h" />
@@ -187,4 +187,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/projects/VS2010/fuzzer/fuzzer.vcxproj.filters b/projects/VS2010/fuzzer/fuzzer.vcxproj.filters
deleted file mode 100644
index 5161ea0ec..000000000
--- a/projects/VS2010/fuzzer/fuzzer.vcxproj.filters
+++ /dev/null
@@ -1,92 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\programs\fuzzer.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\datagen.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\zstd_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\fse_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\huf_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\fse_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\entropy_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\xxhash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\programs\datagen.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\huf.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zbuff.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\util.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\xxhash.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/projects/VS2010/zstd/zstd.vcxproj b/projects/VS2010/zstd/zstd.vcxproj
index 3c1e80b5c..ed25f93d4 100644
--- a/projects/VS2010/zstd/zstd.vcxproj
+++ b/projects/VS2010/zstd/zstd.vcxproj
@@ -52,7 +52,7 @@
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\zbuff.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
@@ -116,27 +116,27 @@
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <LibraryPath>$(LibraryPath)</LibraryPath>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <LibraryPath>$(LibraryPath);</LibraryPath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <LibraryPath>$(LibraryPath)</LibraryPath>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <LibraryPath>$(LibraryPath);</LibraryPath>
   </PropertyGroup>
@@ -217,4 +217,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/projects/VS2010/zstd/zstd.vcxproj.filters b/projects/VS2010/zstd/zstd.vcxproj.filters
deleted file mode 100644
index 0e1e92798..000000000
--- a/projects/VS2010/zstd/zstd.vcxproj.filters
+++ /dev/null
@@ -1,158 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\programs\bench.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\fileio.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\zstdcli.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\dibio.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\datagen.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\programs\legacy\fileio_legacy.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v04.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v05.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v06.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\zstd_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\fse_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\huf_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zbuff_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zbuff_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\fse_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\entropy_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\xxhash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\programs\bench.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\fileio.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\datagen.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\legacy\fileio_legacy.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v04.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v05.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\legacy\zstd_v06.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\dibio.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\huf.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zbuff.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\util.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\xxhash.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/projects/VS2010/zstdlib/zstdlib.vcxproj b/projects/VS2010/zstdlib/zstdlib.vcxproj
index 70f80647b..8a5bc8b1d 100644
--- a/projects/VS2010/zstdlib/zstdlib.vcxproj
+++ b/projects/VS2010/zstdlib/zstdlib.vcxproj
@@ -40,7 +40,7 @@
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\common\zbuff.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd.h" />
+    <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
     <ClInclude Include="..\..\..\programs\util.h" />
@@ -97,28 +97,28 @@
     <LinkIncremental>true</LinkIncremental>
     <TargetName>zstdlib_x86</TargetName>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
     <TargetName>zstdlib_x64</TargetName>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <TargetName>zstdlib_x86</TargetName>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <TargetName>zstdlib_x64</TargetName>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@@ -208,4 +208,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/projects/VS2010/zstdlib/zstdlib.vcxproj.filters b/projects/VS2010/zstdlib/zstdlib.vcxproj.filters
deleted file mode 100644
index 439e3cea2..000000000
--- a/projects/VS2010/zstdlib/zstdlib.vcxproj.filters
+++ /dev/null
@@ -1,95 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Source Files">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\lib\common\zstd_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\fse_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\huf_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zbuff_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\compress\zstd_compress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zbuff_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\fse_decompress.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\entropy_common.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\common\xxhash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\lib\common\bitstream.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\error_private.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\error_public.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\mem.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\fse.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\huf.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zbuff.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\zstd_internal.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\compress\zstd_opt.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\programs\util.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\common\xxhash.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-  </ItemGroup>
-  <ItemGroup>
-    <ResourceCompile Include="zstdlib.rc" />
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/projects/cmake/lib/CMakeLists.txt b/projects/cmake/lib/CMakeLists.txt
index 35553b994..ca841a966 100644
--- a/projects/cmake/lib/CMakeLists.txt
+++ b/projects/cmake/lib/CMakeLists.txt
@@ -47,10 +47,10 @@ SET(ROOT_DIR ../../..)
 
 # Define library directory, where sources and header files are located
 SET(LIBRARY_DIR ${ROOT_DIR}/lib)
-INCLUDE_DIRECTORIES(${LIBRARY_DIR}/common)
+INCLUDE_DIRECTORIES(${LIBRARY_DIR} ${LIBRARY_DIR}/common)
 
 # Read file content
-FILE(READ ${LIBRARY_DIR}/common/zstd.h HEADER_CONTENT)
+FILE(READ ${LIBRARY_DIR}/zstd.h HEADER_CONTENT)
 
 # Parse version
 GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
@@ -80,7 +80,7 @@ SET(Headers
         ${LIBRARY_DIR}/common/mem.h
         ${LIBRARY_DIR}/common/zbuff.h
         ${LIBRARY_DIR}/common/zstd_internal.h
-        ${LIBRARY_DIR}/common/zstd.h
+        ${LIBRARY_DIR}/zstd.h
         ${LIBRARY_DIR}/dictBuilder/zdict.h)
 
 IF (ZSTD_LEGACY_SUPPORT)
@@ -162,7 +162,7 @@ IF (UNIX)
     SET(INSTALL_INCLUDE_DIR ${PREFIX}/include)
 
     # install target
-    INSTALL(FILES ${LIBRARY_DIR}/common/zstd.h ${LIBRARY_DIR}/common/zbuff.h ${LIBRARY_DIR}/dictBuilder/zdict.h DESTINATION ${INSTALL_INCLUDE_DIR})
+    INSTALL(FILES ${LIBRARY_DIR}/zstd.h ${LIBRARY_DIR}/common/zbuff.h ${LIBRARY_DIR}/dictBuilder/zdict.h DESTINATION ${INSTALL_INCLUDE_DIR})
     INSTALL(TARGETS libzstd_static DESTINATION ${INSTALL_LIBRARY_DIR})
     INSTALL(TARGETS libzstd_shared LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR})
 
diff --git a/projects/cmake/programs/CMakeLists.txt b/projects/cmake/programs/CMakeLists.txt
index c8fe5d2a8..fddfc7df2 100644
--- a/projects/cmake/programs/CMakeLists.txt
+++ b/projects/cmake/programs/CMakeLists.txt
@@ -40,7 +40,7 @@ SET(ROOT_DIR ../../..)
 # Define programs directory, where sources and header files are located
 SET(LIBRARY_DIR ${ROOT_DIR}/lib)
 SET(PROGRAMS_DIR ${ROOT_DIR}/programs)
-INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/dictBuilder)
+INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/dictBuilder)
 
 IF (ZSTD_LEGACY_SUPPORT)
     SET(PROGRAMS_LEGACY_DIR ${PROGRAMS_DIR}/legacy)
diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile
index 21d56c5e0..9ad1c01dd 100644
--- a/zlibWrapper/Makefile
+++ b/zlibWrapper/Makefile
@@ -17,8 +17,8 @@ endif
 
 ZLIBWRAPPER_PATH = .
 EXAMPLE_PATH = examples
-CC = gcc
-CFLAGS = $(LOC) -I../lib/common -I$(ZLIBDIR) -I$(ZLIBWRAPPER_PATH) -O3 -std=gnu90
+CC ?= gcc
+CFLAGS = $(LOC) -I../lib -I../lib/common -I$(ZLIBDIR) -I$(ZLIBWRAPPER_PATH) -O3 -std=gnu90
 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef
 LDFLAGS = $(LOC)
 RM = rm -f
diff --git a/zstd.rb b/zstd.rb
new file mode 100644
index 000000000..999238331
--- /dev/null
+++ b/zstd.rb
@@ -0,0 +1,18 @@
+class Zstd < Formula
+  desc "Zstandard - Fast real-time compression algorithm"
+  homepage "http://www.zstd.net/"
+  url "https://github.com/Cyan4973/zstd/archive/v0.7.4.tar.gz"
+  sha256 "35ab3a5084d0194e9ff08e702edb6f507eab1bfb8c09c913639241cec852e2b7"
+
+  def install
+    system "make", "install", "PREFIX=#{prefix}"
+  end
+
+  test do
+    (testpath/"input.txt").write("Hello, world." * 10)
+    system "#{bin}/zstd", "input.txt", "-o", "compressed.zst"
+    system "#{bin}/zstd", "--test", "compressed.zst"
+    system "#{bin}/zstd", "-d", "compressed.zst", "-o", "decompressed.txt"
+    system "cmp", "input.txt", "decompressed.txt"
+  end
+end
diff --git a/zstd_compression_format.md b/zstd_compression_format.md
new file mode 100644
index 000000000..3a4ba4c01
--- /dev/null
+++ b/zstd_compression_format.md
@@ -0,0 +1,1170 @@
+Zstandard Compression Format
+============================
+
+### Notices
+
+Copyright (c) 2016 Yann Collet
+
+Permission is granted to copy and distribute this document
+for any  purpose and without charge,
+including translations into other  languages
+and incorporation into compilations,
+provided that the copyright notice and this notice are preserved,
+and that any substantive changes or deletions from the original
+are clearly marked.
+Distribution of this document is unlimited.
+
+### Version
+
+0.1.2 (15/07/16)
+
+
+Introduction
+------------
+
+The purpose of this document is to define a lossless compressed data format,
+that is independent of CPU type, operating system,
+file system and character set, suitable for
+file compression, pipe and streaming compression,
+using the [Zstandard algorithm](http://www.zstandard.org).
+
+The data can be produced or consumed,
+even for an arbitrarily long sequentially presented input data stream,
+using only an a priori bounded amount of intermediate storage,
+and hence can be used in data communications.
+The format uses the Zstandard compression method,
+and optional [xxHash-64 checksum method](http://www.xxhash.org),
+for detection of data corruption.
+
+The data format defined by this specification
+does not attempt to allow random access to compressed data.
+
+This specification is intended for use by implementers of software
+to compress data into Zstandard format and/or decompress data from Zstandard format.
+The text of the specification assumes a basic background in programming
+at the level of bits and other primitive data representations.
+
+Unless otherwise indicated below,
+a compliant compressor must produce data sets
+that conform to the specifications presented here.
+It doesn’t need to support all options though.
+
+A compliant decompressor must be able to decompress
+at least one working set of parameters
+that conforms to the specifications presented here.
+It may also ignore informative fields, such as checksum.
+Whenever it does not support a parameter defined in the compressed stream,
+it must produce a non-ambiguous error code and associated error message
+explaining which parameter is unsupported.
+
+
+Definitions
+-----------
+A content compressed by Zstandard is transformed into a Zstandard __frame__.
+Multiple frames can be appended into a single file or stream.
+A frame is totally independent, has a defined beginning and end,
+and a set of parameters which tells the decoder how to decompress it.
+
+A frame encapsulates one or multiple __blocks__.
+Each block can be compressed or not,
+and has a guaranteed maximum content size, which depends on frame parameters.
+Unlike frames, each block depends on previous blocks for proper decoding.
+However, each block can be decompressed without waiting for its successor,
+allowing streaming operations.
+
+
+General Structure of Zstandard Frame format
+-------------------------------------------
+
+| MagicNb |  Frame Header | Block | (More blocks) | EndMark |
+|:-------:|:-------------:| ----- | ------------- | ------- |
+| 4 bytes |  2-14 bytes   |       |               | 3 bytes |
+
+__Magic Number__
+
+4 Bytes, Little endian format.
+Value : 0xFD2FB527
+
+__Frame Header__
+
+2 to 14 Bytes, detailed in [next part](#frame-header).
+
+__Data Blocks__
+
+Detailed in [next chapter](#data-blocks).
+That’s where compressed data is stored.
+
+__EndMark__
+
+The flow of blocks ends when the last block header brings an _end signal_ .
+This last block header may optionally host a __Content Checksum__ .
+
+##### __Content Checksum__
+
+Content Checksum verify that frame content has been regenerated correctly.
+The content checksum is the result
+of [xxh64() hash function](https://www.xxHash.com)
+digesting the original (decoded) data as input, and a seed of zero.
+Bits from 11 to 32 (included) are extracted to form a 22 bits checksum
+stored into the endmark body.
+```
+mask22bits = (1<<22)-1;
+contentChecksum = (XXH64(content, size, 0) >> 11) & mask22bits;
+```
+Content checksum is only present when its associated flag
+is set in the frame descriptor.
+Its usage is optional.
+
+__Frame Concatenation__
+
+In some circumstances, it may be required to append multiple frames,
+for example in order to add new data to an existing compressed file
+without re-framing it.
+
+In such case, each frame brings its own set of descriptor flags.
+Each frame is considered independent.
+The only relation between frames is their sequential order.
+
+The ability to decode multiple concatenated frames
+within a single stream or file is left outside of this specification.
+As an example, the reference `zstd` command line utility is able
+to decode all concatenated frames in their sequential order,
+delivering the final decompressed result as if it was a single content.
+
+
+Frame Header
+-------------
+
+| FHD     | (WD)      | (dictID)  | (Content Size) |
+| ------- | --------- | --------- |:--------------:|
+| 1 byte  | 0-1 byte  | 0-4 bytes |  0 - 8 bytes   |
+
+Frame header has a variable size, which uses a minimum of 2 bytes,
+and up to 14 bytes depending on optional parameters.
+
+__FHD byte__ (Frame Header Descriptor)
+
+The first Header's byte is called the Frame Header Descriptor.
+It tells which other fields are present.
+Decoding this byte is enough to tell the size of Frame Header.
+
+|  BitNb  |   7-6  |    5    |   4    |    3     |    2     |  1-0   |
+| ------- | ------ | ------- | ------ | -------- | -------- | ------ |
+|FieldName| FCSize | Segment | Unused | Reserved | Checksum | dictID |
+
+In this table, bit 7 is highest bit, while bit 0 is lowest.
+
+__Frame Content Size flag__
+
+This is a 2-bits flag (`= FHD >> 6`),
+specifying if decompressed data size is provided within the header.
+
+|  Value  |  0  |  1  |  2  |  3  |
+| ------- | --- | --- | --- | --- |
+|FieldSize| 0-1 |  2  |  4  |  8  |
+
+Value 0 meaning depends on _single segment_ mode :
+it either means `0` (size not provided) _if_ the `WD` byte is present,
+or `1` (frame content size <= 255 bytes) otherwise.
+
+__Single Segment__
+
+If this flag is set,
+data shall be regenerated within a single continuous memory segment.
+
+In which case, `WD` byte __is not present__,
+but `Frame Content Size` field necessarily is.
+As a consequence, the decoder must allocate a memory segment
+of size `>= Frame Content Size`.
+
+In order to preserve the decoder from unreasonable memory requirement,
+a decoder can reject a compressed frame
+which requests a memory size beyond decoder's authorized range.
+
+For broader compatibility, decoders are recommended to support
+memory sizes of at least 8 MB.
+This is just a recommendation,
+each decoder is free to support higher or lower limits,
+depending on local limitations.
+
+__Unused bit__
+
+The value of this bit should be set to zero.
+A decoder compliant with this specification version should not interpret it.
+It might be used in a future version,
+to signal a property which is not mandatory to properly decode the frame.
+
+__Reserved bit__
+
+This bit is reserved for some future feature.
+Its value _must be zero_.
+A decoder compliant with this specification version must ensure it is not set.
+This bit may be used in a future revision,
+to signal a feature that must be interpreted in order to decode the frame.
+
+__Content checksum flag__
+
+If this flag is set, a content checksum will be present into the EndMark.
+The checksum is a 22 bits value extracted from the XXH64() of data,
+and stored into endMark. See [__Content Checksum__](#content-checksum) .
+
+__Dictionary ID flag__
+
+This is a 2-bits flag (`= FHD & 3`),
+telling if a dictionary ID is provided within the header.
+It also specifies the size of this field.
+
+|  Value  |  0  |  1  |  2  |  3  |
+| ------- | --- | --- | --- | --- |
+|FieldSize|  0  |  1  |  2  |  4  |
+
+__WD byte__ (Window Descriptor)
+
+Provides guarantees on maximum back-reference distance
+that will be present within compressed data.
+This information is useful for decoders to allocate enough memory.
+
+`WD` byte is optional. It's not present in `single segment` mode.
+In which case, the maximum back-reference distance is the content size itself,
+which can be any value from 1 to 2^64-1 bytes (16 EB).
+
+|   BitNb   |    7-3   |    0-2   |
+| --------- | -------- | -------- |
+| FieldName | Exponent | Mantissa |
+
+Maximum distance is given by the following formulae :
+```
+windowLog = 10 + Exponent;
+windowBase = 1 << windowLog;
+windowAdd = (windowBase / 8) * Mantissa;
+windowSize = windowBase + windowAdd;
+```
+The minimum window size is 1 KB.
+The maximum size is `15*(1<<38)` bytes, which is 1.875 TB.
+
+To properly decode compressed data,
+a decoder will need to allocate a buffer of at least `windowSize` bytes.
+
+In order to preserve decoder from unreasonable memory requirements,
+a decoder can refuse a compressed frame
+which requests a memory size beyond decoder's authorized range.
+
+For improved interoperability,
+decoders are recommended to be compatible with window sizes of 8 MB.
+Encoders are recommended to not request more than 8 MB.
+It's merely a recommendation though,
+decoders are free to support larger or lower limits,
+depending on local limitations.
+
+__Dictionary ID__
+
+This is a variable size field, which contains
+the ID of the dictionary required to properly decode the frame.
+Note that this field is optional. When it's not present,
+it's up to the caller to make sure it uses the correct dictionary.
+
+Field size depends on __Dictionary ID flag__.
+1 byte can represent an ID 0-255.
+2 bytes can represent an ID 0-65535.
+4 bytes can represent an ID 0-4294967295.
+
+It's allowed to represent a small ID (for example `13`)
+with a large 4-bytes dictionary ID, losing some compacity in the process.
+
+_Reserved ranges :_
+If the frame is going to be distributed in a private environment,
+any dictionary ID can be used.
+However, for public distribution of compressed frames using a dictionary,
+some ranges are reserved for future use :
+- low : 1 - 32767 : reserved
+- high : >= (2^31) : reserved
+
+
+__Frame Content Size__
+
+This is the original (uncompressed) size.
+This information is optional, and only present if associated flag is set.
+Content size is provided using 1, 2, 4 or 8 Bytes.
+Format is Little endian.
+
+| Field Size |    Range   |
+| ---------- | ---------- |
+|     0      |      0     |
+|     1      |   0 - 255  |
+|     2      | 256 - 65791|
+|     4      | 0 - 2^32-1 |
+|     8      | 0 - 2^64-1 |
+
+When field size is 1, 4 or 8 bytes, the value is read directly.
+When field size is 2, _an offset of 256 is added_.
+It's allowed to represent a small size (ex: `18`) using any compatible variant.
+A size of `0` means `content size is unknown`.
+In which case, the `WD` byte will necessarily be present,
+and becomes the only hint to guide memory allocation.
+
+In order to preserve decoder from unreasonable memory requirement,
+a decoder can refuse a compressed frame
+which requests a memory size beyond decoder's authorized range.
+
+
+Data Blocks
+-----------
+
+| B. Header |  data  |
+|:---------:| ------ |
+|  3 bytes  |        |
+
+
+__Block Header__
+
+This field uses 3-bytes, format is __big-endian__.
+
+The 2 highest bits represent the `block type`,
+while the remaining 22 bits represent the (compressed) block size.
+
+There are 4 block types :
+
+|    Value   |      0     |  1  |  2  |    3    |
+| ---------- | ---------- | --- | --- | ------- |
+| Block Type | Compressed | Raw | RLE | EndMark |
+
+- Compressed : this is a [Zstandard compressed block](#compressed-block-format),
+  detailed in another section of this specification.
+  "block size" is the compressed size.
+  Decompressed size is unknown,
+  but its maximum possible value is guaranteed (see below)
+- Raw : this is an uncompressed block.
+  "block size" is the number of bytes to read and copy.
+- RLE : this is a single byte, repeated N times.
+  In which case, "block size" is the size to regenerate,
+  while the "compressed" block is just 1 byte (the byte to repeat).
+- EndMark : this is not a block. Signal the end of the frame.
+  The rest of the field may be optionally filled by a checksum
+  (see [Content Checksum](#content-checksum)).
+
+Block sizes must respect a few rules :
+- In compressed mode, compressed size if always strictly `< decompressed size`.
+- Block decompressed size is always <= maximum back-reference distance .
+- Block decompressed size is always <= 128 KB
+
+
+__Data__
+
+Where the actual data to decode stands.
+It might be compressed or not, depending on previous field indications.
+A data block is not necessarily "full" :
+since an arbitrary “flush” may happen anytime,
+block decompressed content can be any size,
+up to Block Maximum Decompressed Size, which is the smallest of :
+- Maximum back-reference distance
+- 128 KB
+
+
+Skippable Frames
+----------------
+
+| Magic Number | Frame Size | User Data |
+|:------------:|:----------:| --------- |
+|   4 bytes    |  4 bytes   |           |
+
+Skippable frames allow the insertion of user-defined data
+into a flow of concatenated frames.
+Its design is pretty straightforward,
+with the sole objective to allow the decoder to quickly skip
+over user-defined data and continue decoding.
+
+Skippable frames defined in this specification are compatible with [LZ4] ones.
+
+[LZ4]:http://www.lz4.org
+
+__Magic Number__ :
+
+4 Bytes, Little endian format.
+Value : 0x184D2A5X, which means any value from 0x184D2A50 to 0x184D2A5F.
+All 16 values are valid to identify a skippable frame.
+
+__Frame Size__ :
+
+This is the size, in bytes, of the following User Data
+(without including the magic number nor the size field itself).
+4 Bytes, Little endian format, unsigned 32-bits.
+This means User Data can’t be bigger than (2^32-1) Bytes.
+
+__User Data__ :
+
+User Data can be anything. Data will just be skipped by the decoder.
+
+
+Compressed block format
+-----------------------
+This specification details the content of a _compressed block_.
+A compressed block has a size, which must be known.
+It also has a guaranteed maximum regenerated size,
+in order to properly allocate destination buffer.
+See [Data Blocks](#data-blocks) for more details.
+
+A compressed block consists of 2 sections :
+- [Literals section](#literals-section)
+- [Sequences section](#sequences-section)
+
+### Prerequisites
+To decode a compressed block, the following elements are necessary :
+- Previous decoded blocks, up to a distance of `windowSize`,
+  or all previous blocks in "single segment" mode.
+- List of "recent offsets" from previous compressed block.
+- Decoding tables of previous compressed block for each symbol type
+  (literals, litLength, matchLength, offset).
+
+
+### Literals section
+
+Literals are compressed using Huffman prefix codes.
+During sequence phase, literals will be entangled with match copy operations.
+All literals are regrouped in the first part of the block.
+They can be decoded first, and then copied during sequence operations,
+or they can be decoded on the flow, as needed by sequence commands.
+
+| Header | (Tree Description) | Stream1 | (Stream2) | (Stream3) | (Stream4) |
+| ------ | ------------------ | ------- | --------- | --------- | --------- |
+
+Literals can be compressed, or uncompressed.
+When compressed, an optional tree description can be present,
+followed by 1 or 4 streams.
+
+#### Literals section header
+
+Header is in charge of describing how literals are packed.
+It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
+using big-endian convention.
+
+| BlockType | sizes format | (compressed size) | regenerated size |
+| --------- | ------------ | ----------------- | ---------------- |
+|   2 bits  |  1 - 2 bits  |    0 - 18 bits    |    5 - 20 bits   |
+
+__Block Type__ :
+
+This is a 2-bits field, describing 4 different block types :
+
+|    Value   |      0     |    1   |  2  |    3    |
+| ---------- | ---------- | ------ | --- | ------- |
+| Block Type | Compressed | Repeat | Raw |   RLE   |
+
+- Compressed : This is a standard huffman-compressed block,
+        starting with a huffman tree description.
+        See details below.
+- Repeat Stats : This is a huffman-compressed block,
+        using huffman tree _from previous huffman-compressed literals block_.
+        Huffman tree description will be skipped.
+- Raw : Literals are stored uncompressed.
+- RLE : Literals consist of a single byte value repeated N times.
+
+__Sizes format__ :
+
+Sizes format are divided into 2 families :
+
+- For compressed block, it requires to decode both the compressed size
+  and the decompressed size. It will also decode the number of streams.
+- For Raw or RLE blocks, it's enough to decode the size to regenerate.
+
+For values spanning several bytes, convention is Big-endian.
+
+__Sizes format for Raw or RLE literals block__ :
+
+- Value : 0x : Regenerated size uses 5 bits (0-31).
+               Total literal header size is 1 byte.
+               `size = h[0] & 31;`
+- Value : 10 : Regenerated size uses 12 bits (0-4095).
+               Total literal header size is 2 bytes.
+               `size = ((h[0] & 15) << 8) + h[1];`
+- Value : 11 : Regenerated size uses 20 bits (0-1048575).
+               Total literal header size is 3 bytes.
+               `size = ((h[0] & 15) << 16) + (h[1]<<8) + h[2];`
+
+Note : it's allowed to represent a short value (ex : `13`)
+using a long format, accepting the reduced compacity.
+
+__Sizes format for Compressed literals block__ :
+
+Note : also applicable to "repeat-stats" blocks.
+- Value : 00 : 4 streams.
+               Compressed and regenerated sizes use 10 bits (0-1023).
+               Total literal header size is 3 bytes.
+- Value : 01 : _Single stream_.
+               Compressed and regenerated sizes use 10 bits (0-1023).
+               Total literal header size is 3 bytes.
+- Value : 10 : 4 streams.
+               Compressed and regenerated sizes use 14 bits (0-16383).
+               Total literal header size is 4 bytes.
+- Value : 10 : 4 streams.
+               Compressed and regenerated sizes use 18 bits (0-262143).
+               Total literal header size is 5 bytes.
+
+Compressed and regenerated size fields follow big endian convention.
+
+#### Huffman Tree description
+
+This section is only present when literals block type is `Compressed` (`0`).
+
+Prefix coding represents symbols from an a priori known alphabet
+by bit sequences (codewords), one codeword for each symbol,
+in a manner such that different symbols may be represented
+by bit sequences of different lengths,
+but a parser can always parse an encoded string
+unambiguously symbol-by-symbol.
+
+Given an alphabet with known symbol frequencies,
+the Huffman algorithm allows the construction of an optimal prefix code
+using the fewest bits of any possible prefix codes for that alphabet.
+
+Prefix code must not exceed a maximum code length.
+More bits improve accuracy but cost more header size,
+and require more memory or more complex decoding operations.
+This specification limits maximum code length to 11 bits.
+
+
+##### Representation
+
+All literal values from zero (included) to last present one (excluded)
+are represented by `weight` values, from 0 to `maxBits`.
+Transformation from `weight` to `nbBits` follows this formulae :
+`nbBits = weight ? maxBits + 1 - weight : 0;` .
+The last symbol's weight is deduced from previously decoded ones,
+by completing to the nearest power of 2.
+This power of 2 gives `maxBits`, the depth of the current tree.
+
+__Example__ :
+Let's presume the following huffman tree must be described :
+
+| literal |  0  |  1  |  2  |  3  |  4  |  5  |
+| ------- | --- | --- | --- | --- | --- | --- |
+| nbBits  |  1  |  2  |  3  |  0  |  4  |  4  |
+
+The tree depth is 4, since its smallest element uses 4 bits.
+Value `5` will not be listed, nor will values above `5`.
+Values from `0` to `4` will be listed using `weight` instead of `nbBits`.
+Weight formula is : `weight = nbBits ? maxBits + 1 - nbBits : 0;`
+It gives the following serie of weights :
+
+| weights |  4  |  3  |  2  |  0  |  1  |
+| ------- | --- | --- | --- | --- | --- |
+| literal |  0  |  1  |  2  |  3  |  4  |
+
+The decoder will do the inverse operation :
+having collected weights of literals from `0` to `4`,
+it knows the last literal, `5`, is present with a non-zero weight.
+The weight of `5` can be deducted by joining to the nearest power of 2.
+Sum of 2^(weight-1) (excluding 0) is :
+`8 + 4 + 2 + 0 + 1 = 15`
+Nearest power of 2 is 16.
+Therefore, `maxBits = 4` and `weight[5] = 1`.
+
+##### Huffman Tree header
+
+This is a single byte value (0-255),
+which tells how to decode the list of weights.
+
+- if headerByte >= 242 : this is one of 14 pre-defined weight distributions :
+
+| value    |242|243|244|245|246|247|248|249|250|251|252|253|254|255|
+| -------- |---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+| Nb of 1s | 1 | 2 | 3 | 4 | 7 | 8 | 15| 16| 31| 32| 63| 64|127|128|
+|Complement| 1 | 2 | 1 | 4 | 1 | 8 | 1 | 16| 1 | 32| 1 | 64| 1 |128|
+
+_Note_ : complement is found by using "join to nearest power of 2" rule.
+
+- if headerByte >= 128 : this is a direct representation,
+  where each weight is written directly as a 4 bits field (0-15).
+  The full representation occupies `((nbSymbols+1)/2)` bytes,
+  meaning it uses a last full byte even if nbSymbols is odd.
+  `nbSymbols = headerByte - 127;`.
+  Note that maximum nbSymbols is 241-127 = 114.
+  A larger serie must necessarily use FSE compression.
+
+- if headerByte < 128 :
+  the serie of weights is compressed by FSE.
+  The length of the FSE-compressed serie is `headerByte` (0-127).
+
+##### FSE (Finite State Entropy) compression of huffman weights
+
+The serie of weights is compressed using FSE compression.
+It's a single bitstream with 2 interleaved states,
+sharing a single distribution table.
+
+To decode an FSE bitstream, it is necessary to know its compressed size.
+Compressed size is provided by `headerByte`.
+It's also necessary to know its maximum decompressed size,
+which is `255`, since literal values span from `0` to `255`,
+and last symbol value is not represented.
+
+An FSE bitstream starts by a header, describing probabilities distribution.
+It will create a Decoding Table.
+Table must be pre-allocated, which requires to support a maximum accuracy.
+For a list of huffman weights, recommended maximum is 7 bits.
+
+FSE header is [described in relevant chapter](#fse-distribution-table--condensed-format),
+and so is [FSE bitstream](#bitstream).
+The main difference is that Huffman header compression uses 2 states,
+which share the same FSE distribution table.
+Bitstream contains only FSE symbols, there are no interleaved "raw bitfields".
+The number of symbols to decode is discovered
+by tracking bitStream overflow condition.
+When both states have overflowed the bitstream, end is reached.
+
+
+##### Conversion from weights to huffman prefix codes
+
+All present symbols shall now have a `weight` value.
+Symbols are sorted by weight.
+Symbols with a weight of zero are removed.
+Within same weight, symbols keep natural order.
+Starting from lowest weight,
+symbols are being allocated to a `range`.
+A `weight` directly represents a `range`,
+following the formulae : `range = weight ? 1 << (weight-1) : 0 ;`
+Similarly, it is possible to transform weights into nbBits :
+`nbBits = nbBits ? maxBits + 1 - weight : 0;` .
+
+
+__Example__ :
+Let's presume the following list of weights has been decoded :
+
+| Literal |  0  |  1  |  2  |  3  |  4  |  5  |
+| ------- | --- | --- | --- | --- | --- | --- |
+|  weight |  4  |  3  |  2  |  0  |  1  |  1  |
+
+Sorted by weight and then natural order,
+it gives the following distribution :
+
+| Literal      |  3  |  4  |  5  |  2  |  1  |   0  |
+| ------------ | --- | --- | --- | --- | --- | ---- |
+| weight       |  0  |  1  |  1  |  2  |  3  |   4  |
+| range        |  0  |  1  |  1  |  2  |  4  |   8  |
+| table entries| N/A |  0  |  1  | 2-3 | 4-7 | 8-15 |
+| nb bits      |  0  |  4  |  4  |  3  |  2  |   1  |
+| prefix codes | N/A | 0000| 0001| 001 | 01  |   1  |
+
+
+#### Literals bitstreams
+
+##### Bitstreams sizes
+
+As seen in a previous paragraph,
+there are 2 flavors of huffman-compressed literals :
+single stream, and 4-streams.
+
+4-streams is useful for CPU with multiple execution units and OoO operations.
+Since each stream can be decoded independently,
+it's possible to decode them up to 4x faster than a single stream,
+presuming the CPU has enough parallelism available.
+
+For single stream, header provides both the compressed and regenerated size.
+For 4-streams though,
+header only provides compressed and regenerated size of all 4 streams combined.
+In order to properly decode the 4 streams,
+it's necessary to know the compressed and regenerated size of each stream.
+
+Regenerated size is easiest :
+each stream has a size of `(totalSize+3)/4`,
+except the last one, which is up to 3 bytes smaller, to reach `totalSize`.
+
+Compressed size must be provided explicitly : in the 4-streams variant,
+bitstreams are preceded by 3 unsigned Little Endian 16-bits values.
+Each value represents the compressed size of one stream, in order.
+The last stream size is deducted from total compressed size
+and from already known stream sizes :
+`stream4CSize = totalCSize - 6 - stream1CSize - stream2CSize - stream3CSize;`
+
+##### Bitstreams read and decode
+
+Each bitstream must be read _backward_,
+that is starting from the end down to the beginning.
+Therefore it's necessary to know the size of each bitstream.
+
+It's also necessary to know exactly which _bit_ is the latest.
+This is detected by a final bit flag :
+the highest bit of latest byte is a final-bit-flag.
+Consequently, a last byte of `0` is not possible.
+And the final-bit-flag itself is not part of the useful bitstream.
+Hence, the last byte contain between 0 and 7 useful bits.
+
+Starting from the end,
+it's possible to read the bitstream in a little-endian fashion,
+keeping track of already used bits.
+
+Reading the last `maxBits` bits,
+it's then possible to compare extracted value to decoding table,
+determining the symbol to decode and number of bits to discard.
+
+The process continues up to reading the required number of symbols per stream.
+If a bitstream is not entirely and exactly consumed,
+hence reaching exactly its beginning position with _all_ bits consumed,
+the decoding process is considered faulty.
+
+
+### Sequences section
+
+A compressed block is a succession of _sequences_ .
+A sequence is a literal copy command, followed by a match copy command.
+A literal copy command specifies a length.
+It is the number of bytes to be copied (or extracted) from the literal section.
+A match copy command specifies an offset and a length.
+The offset gives the position to copy from,
+which can be within a previous block.
+
+There are 3 symbol types, `literalLength`, `matchLength` and `offset`,
+which are encoded together, interleaved in a single _bitstream_.
+
+Each symbol is a _code_ in its own context,
+which specifies a baseline and a number of bits to add.
+_Codes_ are FSE compressed,
+and interleaved with raw additional bits in the same bitstream.
+
+The Sequences section starts by a header,
+followed by optional Probability tables for each symbol type,
+followed by the bitstream.
+
+| Header | (LitLengthTable) | (OffsetTable) | (MatchLengthTable) | bitStream |
+| ------ | ---------------- | ------------- | ------------------ | --------- |
+
+To decode the Sequence section, it's required to know its size.
+This size is deducted from `blockSize - literalSectionSize`.
+
+
+#### Sequences section header
+
+Consists in 2 items :
+- Nb of Sequences
+- Flags providing Symbol compression types
+
+__Nb of Sequences__
+
+This is a variable size field, `nbSeqs`, using between 1 and 3 bytes.
+Let's call its first byte `byte0`.
+- `if (byte0 == 0)` : there are no sequences.
+            The sequence section stops there.
+            Regenerated content is defined entirely by literals section.
+- `if (byte0 < 128)` : `nbSeqs = byte0;` . Uses 1 byte.
+- `if (byte0 < 255)` : `nbSeqs = ((byte0-128) << 8) + byte1;` . Uses 2 bytes.
+- `if (byte0 == 255)`: `nbSeqs = byte1 + (byte2<<8) + 0x7F00;` . Uses 3 bytes.
+
+__Symbol compression modes__
+
+This is a single byte, defining the compression mode of each symbol type.
+
+|  BitNb  |   7-6  |   5-4  |   3-2  |    1-0   |
+| ------- | ------ | ------ | ------ | -------- |
+|FieldName| LLtype | OFType | MLType | Reserved |
+
+The last field, `Reserved`, must be all-zeroes.
+
+`LLtype`, `OFType` and `MLType` define the compression mode of
+Literal Lengths, Offsets and Match Lengths respectively.
+
+They follow the same enumeration :
+
+|       Value      |    0   |  1  |    2   |  3  |
+| ---------------- | ------ | --- | ------ | --- |
+| Compression Mode | predef | RLE | Repeat | FSE |
+
+- "predef" : uses a pre-defined distribution table.
+- "RLE" : it's a single code, repeated `nbSeqs` times.
+- "Repeat" : re-use distribution table from previous compressed block.
+- "FSE" : standard FSE compression.
+          A distribution table will be present.
+          It will be described in [next part](#distribution-tables).
+
+#### Symbols decoding
+
+##### Literal Lengths codes
+
+Literal lengths codes are values ranging from `0` to `35` included.
+They define lengths from 0 to 131071 bytes.
+
+|  Code  | 0-15 |
+| ------ | ---- |
+| length | Code |
+| nbBits |   0  |
+
+
+|   Code   |  16  |  17  |  18  |  19  |  20  |  21  |  22  |  23  |
+| -------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| Baseline |  16  |  18  |  20  |  22  |  24  |  28  |  32  |  40  |
+| nb Bits  |   1  |   1  |   1  |   1  |   2  |   2  |   3  |   3  |
+
+|   Code   |  24  |  25  |  26  |  27  |  28  |  29  |  30  |  31  |
+| -------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| Baseline |  48  |  64  |  128 |  256 |  512 | 1024 | 2048 | 4096 |
+| nb Bits  |   4  |   6  |   7  |   8  |   9  |  10  |  11  |  12  |
+
+|   Code   |  32  |  33  |  34  |  35  |
+| -------- | ---- | ---- | ---- | ---- |
+| Baseline | 8192 |16384 |32768 |65536 |
+| nb Bits  |  13  |  14  |  15  |  16  |
+
+__Default distribution__
+
+When "compression mode" is "predef"",
+a pre-defined distribution is used for FSE compression.
+
+Below is its definition. It uses an accuracy of 6 bits (64 states).
+```
+short literalLengths_defaultDistribution[36] =
+        { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+          2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+         -1,-1,-1,-1 };
+```
+
+##### Match Lengths codes
+
+Match lengths codes are values ranging from `0` to `52` included.
+They define lengths from 3 to 131074 bytes.
+
+|  Code  |   0-31   |
+| ------ | -------- |
+| value  | Code + 3 |
+| nbBits |     0    |
+
+|   Code   |  32  |  33  |  34  |  35  |  36  |  37  |  38  |  39  |
+| -------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| Baseline |  35  |  37  |  39  |  41  |  43  |  47  |  51  |  59  |
+| nb Bits  |   1  |   1  |   1  |   1  |   2  |   2  |   3  |   3  |
+
+|   Code   |  40  |  41  |  42  |  43  |  44  |  45  |  46  |  47  |
+| -------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
+| Baseline |  67  |  83  |  99  |  131 |  258 |  514 | 1026 | 2050 |
+| nb Bits  |   4  |   4  |   5  |   7  |   8  |   9  |  10  |  11  |
+
+|   Code   |  48  |  49  |  50  |  51  |  52  |
+| -------- | ---- | ---- | ---- | ---- | ---- |
+| Baseline | 4098 | 8194 |16486 |32770 |65538 |
+| nb Bits  |  12  |  13  |  14  |  15  |  16  |
+
+__Default distribution__
+
+When "compression mode" is defined as "predef",
+a pre-defined distribution is used for FSE compression.
+
+Here is its definition. It uses an accuracy of 6 bits (64 states).
+```
+short matchLengths_defaultDistribution[53] =
+        { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
+         -1,-1,-1,-1,-1 };
+```
+
+##### Offset codes
+
+Offset codes are values ranging from `0` to `N`,
+with `N` being limited by maximum backreference distance.
+
+A decoder is free to limit its maximum `N` supported.
+Recommendation is to support at least up to `22`.
+For information, at the time of this writing.
+the reference decoder supports a maximum `N` value of `28` in 64-bits mode.
+
+An offset code is also the nb of additional bits to read,
+and can be translated into an `OFValue` using the following formulae :
+
+```
+OFValue = (1 << offsetCode) + readNBits(offsetCode);
+if (OFValue > 3) offset = OFValue - 3;
+```
+
+OFValue from 1 to 3 are special : they define "repeat codes",
+which means one of the previous offsets will be repeated.
+They are sorted in recency order, with 1 meaning the most recent one.
+See [Repeat offsets](#repeat-offsets) paragraph.
+
+__Default distribution__
+
+When "compression mode" is defined as "predef",
+a pre-defined distribution is used for FSE compression.
+
+Here is its definition. It uses an accuracy of 5 bits (32 states),
+and supports a maximum `N` of 28, allowing offset values up to 536,870,908 .
+
+If any sequence in the compressed block requires an offset larger than this,
+it's not possible to use the default distribution to represent it.
+
+```
+short offsetCodes_defaultDistribution[53] =
+        { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+          1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
+```
+
+#### Distribution tables
+
+Following the header, up to 3 distribution tables can be described.
+They are, in order :
+- Literal lengthes
+- Offsets
+- Match Lengthes
+
+The content to decode depends on their respective compression mode :
+- Repeat mode : no content. Re-use distribution from previous compressed block.
+- Predef : no content. Use pre-defined distribution table.
+- RLE : 1 byte. This is the only code to use across the whole compressed block.
+- FSE : A distribution table is present.
+
+##### FSE distribution table : condensed format
+
+An FSE distribution table describes the probabilities of all symbols
+from `0` to the last present one (included)
+on a normalized scale of `1 << AccuracyLog` .
+
+It's a bitstream which is read forward, in little-endian fashion.
+It's not necessary to know its exact size,
+since it will be discovered and reported by the decoding process.
+
+The bitstream starts by reporting on which scale it operates.
+`AccuracyLog = low4bits + 5;`
+In theory, it can define a scale from 5 to 20.
+In practice, decoders are allowed to limit the maximum supported `AccuracyLog`.
+Recommended maximum are `9` for literal and match lengthes, and `8` for offsets.
+The reference decoder uses these limits.
+
+Then follow each symbol value, from `0` to last present one.
+The nb of bits used by each field is variable.
+It depends on :
+
+- Remaining probabilities + 1 :
+  __example__ :
+  Presuming an AccuracyLog of 8,
+  and presuming 100 probabilities points have already been distributed,
+  the decoder may read any value from `0` to `255 - 100 + 1 == 156` (included).
+  Therefore, it must read `log2sup(156) == 8` bits.
+
+- Value decoded : small values use 1 less bit :
+  __example__ :
+  Presuming values from 0 to 156 (included) are possible,
+  255-156 = 99 values are remaining in an 8-bits field.
+  They are used this way :
+  first 99 values (hence from 0 to 98) use only 7 bits,
+  values from 99 to 156 use 8 bits.
+  This is achieved through this scheme :
+
+  | Value read | Value decoded | nb Bits used |
+  | ---------- | ------------- | ------------ |
+  |   0 -  98  |   0 -  98     |  7           |
+  |  99 - 127  |  99 - 127     |  8           |
+  | 128 - 226  |   0 -  98     |  7           |
+  | 227 - 255  | 128 - 156     |  8           |
+
+Symbols probabilities are read one by one, in order.
+
+Probability is obtained from Value decoded by following formulae :
+`Proba = value - 1;`
+
+It means value `0` becomes negative probability `-1`.
+`-1` is a special probability, which means `less than 1`.
+Its effect on distribution table is described in [next paragraph].
+For the purpose of calculating cumulated distribution, it counts as one.
+
+[next paragraph]:#fse-decoding--from-normalized-distribution-to-decoding-tables
+
+When a symbol has a probability of `zero`,
+it is followed by a 2-bits repeat flag.
+This repeat flag tells how many probabilities of zeroes follow the current one.
+It provides a number ranging from 0 to 3.
+If it is a 3, another 2-bits repeat flag follows, and so on.
+
+When last symbol reaches cumulated total of `1 << AccuracyLog`,
+decoding is complete.
+Then the decoder can tell how many bytes were used in this process,
+and how many symbols are present.
+
+The bitstream consumes a round number of bytes.
+Any remaining bit within the last byte is just unused.
+
+If the last symbol makes cumulated total go above `1 << AccuracyLog`,
+distribution is considered corrupted.
+
+##### FSE decoding : from normalized distribution to decoding tables
+
+The distribution of normalized probabilities is enough
+to create a unique decoding table.
+
+It follows the following build rule :
+
+The table has a size of `tableSize = 1 << AccuracyLog;`.
+Each cell describes the symbol decoded,
+and instructions to get the next state.
+
+Symbols are scanned in their natural order for `less than 1` probabilities.
+Symbols with this probability are being attributed a single cell,
+starting from the end of the table.
+These symbols define a full state reset, reading `AccuracyLog` bits.
+
+All remaining symbols are sorted in their natural order.
+Starting from symbol `0` and table position `0`,
+each symbol gets attributed as many cells as its probability.
+Cell allocation is spreaded, not linear :
+each successor position follow this rule :
+
+```
+position += (tableSize>>1) + (tableSize>>3) + 3;
+position &= tableSize-1;
+```
+
+A position is skipped if already occupied,
+typically by a "less than 1" probability symbol.
+
+The result is a list of state values.
+Each state will decode the current symbol.
+
+To get the Number of bits and baseline required for next state,
+it's first necessary to sort all states in their natural order.
+The lower states will need 1 more bit than higher ones.
+
+__Example__ :
+Presuming a symbol has a probability of 5.
+It receives 5 state values. States are sorted in natural order.
+
+Next power of 2 is 8.
+Space of probabilities is divided into 8 equal parts.
+Presuming the AccuracyLog is 7, it defines 128 states.
+Divided by 8, each share is 16 large.
+
+In order to reach 8, 8-5=3 lowest states will count "double",
+taking shares twice larger,
+requiring one more bit in the process.
+
+Numbering starts from higher states using less bits.
+
+| state order |   0   |   1   |    2   |   3  |   4   |
+| ----------- | ----- | ----- | ------ | ---- | ----- |
+| width       |  32   |  32   |   32   |  16  |  16   |
+| nb Bits     |   5   |   5   |    5   |   4  |   4   |
+| range nb    |   2   |   4   |    6   |   0  |   1   |
+| baseline    |  32   |  64   |   96   |   0  |  16   |
+| range       | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
+
+Next state is determined from current state
+by reading the required number of bits, and adding the specified baseline.
+
+
+#### Bitstream
+
+All sequences are stored in a single bitstream, read _backward_.
+It is therefore necessary to know the bitstream size,
+which is deducted from compressed block size.
+
+The last useful bit of the stream is followed by an end-bit-flag.
+Highest bit of last byte is this flag.
+It does not belong to the useful part of the bitstream.
+Therefore, last byte has 0-7 useful bits.
+Note that it also means that last byte cannot be `0`.
+
+##### Starting states
+
+The bitstream starts with initial state values,
+each using the required number of bits in their respective _accuracy_,
+decoded previously from their normalized distribution.
+
+It starts by `Literal Length State`,
+followed by `Offset State`,
+and finally `Match Length State`.
+
+Reminder : always keep in mind that all values are read _backward_.
+
+##### Decoding a sequence
+
+A state gives a code.
+A code provides a baseline and number of bits to add.
+See [Symbol Decoding] section for details on each symbol.
+
+Decoding starts by reading the nb of bits required to decode offset.
+It then does the same for match length,
+and then for literal length.
+
+Offset / matchLength / litLength define a sequence.
+It starts by inserting the number of literals defined by `litLength`,
+then continue by copying `matchLength` bytes from `currentPos - offset`.
+
+The next operation is to update states.
+Using rules pre-calculated in the decoding tables,
+`Literal Length State` is updated,
+followed by `Match Length State`,
+and then `Offset State`.
+
+This operation will be repeated `NbSeqs` times.
+At the end, the bitstream shall be entirely consumed,
+otherwise bitstream is considered corrupted.
+
+[Symbol Decoding]:#symbols-decoding
+
+##### Repeat offsets
+
+As seen in [Offset Codes], the first 3 values define a repeated offset.
+They are sorted in recency order, with 1 meaning "most recent one".
+
+There is an exception though, when current sequence's literal length is `0`.
+In which case, 1 would just make previous match longer.
+Therefore, in such case, 1 means in fact 2, and 2 is impossible.
+Meaning of 3 is unmodified.
+
+Repeat offsets start with the following values : 1, 4 and 8 (in order).
+
+Then each block receives its start value from previous compressed block.
+Note that non-compressed blocks are skipped,
+they do not contribute to offset history.
+
+[Offset Codes]: #offset-codes
+
+###### Offset updates rules
+
+When the new offset is a normal one,
+offset history is simply translated by one position,
+with the new offset taking first spot.
+
+- When repeat offset 1 (most recent) is used, history is unmodified.
+- When repeat offset 2 is used, it's swapped with offset 1.
+- When repeat offset 3 is used, it takes first spot,
+  pushing the other ones by one position.
+
+
+Dictionary format
+-----------------
+
+`zstd` is compatible with "pure content" dictionaries, free of any format restriction.
+But dictionaries created by `zstd --train` follow a format, described here.
+
+__Pre-requisites__ : a dictionary has a known length,
+                     defined either by a buffer limit, or a file size.
+
+| Header | DictID | Stats | Content |
+| ------ | ------ | ----- | ------- |
+
+__Header__ : 4 bytes ID, value 0xEC30A437, Little Endian format
+
+__Dict_ID__ : 4 bytes, stored in Little Endian format.
+              DictID can be any value, except 0 (which means no DictID).
+              It's used by decoders to check if they use the correct dictionary.
+              _Reserved ranges :_
+              If the frame is going to be distributed in a private environment,
+              any dictionary ID can be used.
+              However, for public distribution of compressed frames,
+              some ranges are reserved for future use :
+
+              - low range : 1 - 32767 : reserved
+              - high range : >= (2^31) : reserved
+
+__Stats__ : Entropy tables, following the same format as a [compressed blocks].
+            They are stored in following order :
+            Huffman tables for literals, FSE table for offset,
+            FSE table for matchLenth, and FSE table for litLength.
+            It's finally followed by 3 offset values, populating recent offsets,
+            stored in order, 4-bytes little endian each, for a total of 12 bytes.
+
+__Content__ : Where the actual dictionary content is.
+              Content size depends on Dictionary size.
+
+[compressed blocks]: #compressed-block-format
+
+
+Version changes
+---------------
+- 0.1.2 : limit huffman tree depth to 11 bits
+- 0.1.1 : reserved dictID ranges
+- 0.1.0 : initial release