diff --git a/.buckconfig b/.buckconfig
new file mode 100644
index 000000000..b2b9c036f
--- /dev/null
+++ b/.buckconfig
@@ -0,0 +1,9 @@
+[cxx]
+  cppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=1
+  cflags = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith
+  cxxppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=1
+  cxxflags = -std=c++11 -Wno-format-security -Wno-deprecated-declarations
+  gtest_dep = //contrib/pzstd:gtest
+
+[httpserver]
+  port = 0
diff --git a/.buckversion b/.buckversion
new file mode 100644
index 000000000..892fad966
--- /dev/null
+++ b/.buckversion
@@ -0,0 +1 @@
+c8dec2e8da52d483f6dd7c6cd2ad694e8e6fed2b
diff --git a/.gitignore b/.gitignore
index dd7a74519..e02119883 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,5 @@ googletest/
 
 # Directories
 bin/
+.buckd/
+buck-out/
diff --git a/NEWS b/NEWS
index 46bdb25a2..f404f6e37 100644
--- a/NEWS
+++ b/NEWS
@@ -1,11 +1,16 @@
 v1.1.3
+cli : new : experimental target `make zstdmt`, with multi-threading support
 cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski
 cli : fix zstdless on Mac OS-X, by Andrew Janke
+cli : fix #232 "compress non-files"
 dictBuilder : improved dictionary generation quality, thanks to Nick Terrell
-API : fix : all symbols properly exposed in libzstd, by Nick Terrell
-API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
+API : new : lib/compress/ZSTDMT_compress.h multithreading API (experimental)
 API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul
 API : new : ZDICT_finalizeDictionary()
+API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
+API : fix : all symbols properly exposed in libzstd, by Nick Terrell
+build : support for Solaris target, by Przemyslaw Skibinski
+doc : clarified specification, by Andrew Purcell
 
 v1.1.2
 API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init
diff --git a/build/VS2005/zstd/zstd.vcproj b/build/VS2005/zstd/zstd.vcproj
index 5ef7a98f8..58f254bc8 100644
--- a/build/VS2005/zstd/zstd.vcproj
+++ b/build/VS2005/zstd/zstd.vcproj
@@ -43,7 +43,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
 				PreprocessorDefinitions="ZSTD_LEGACY_SUPPORT=1;WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -120,7 +120,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
 				PreprocessorDefinitions="ZSTD_LEGACY_SUPPORT=1;WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -195,7 +195,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
 				PreprocessorDefinitions="ZSTD_LEGACY_SUPPORT=1;WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -273,7 +273,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
 				PreprocessorDefinitions="ZSTD_LEGACY_SUPPORT=1;WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -395,6 +395,10 @@
 				RelativePath="..\..\..\lib\compress\zstd_compress.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
 				>
@@ -533,6 +537,10 @@
 				RelativePath="..\..\..\lib\legacy\zstd_v07.h"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
+				>
+			</File>
 		</Filter>
 	</Files>
 	<Globals>
diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj
index 0beb59dd7..2dfaf3937 100644
--- a/build/VS2008/zstd/zstd.vcproj
+++ b/build/VS2008/zstd/zstd.vcproj
@@ -44,7 +44,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
 				PreprocessorDefinitions="ZSTD_LEGACY_SUPPORT=1;WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -121,7 +121,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
 				PreprocessorDefinitions="ZSTD_LEGACY_SUPPORT=1;WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
@@ -196,7 +196,7 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
 				PreprocessorDefinitions="ZSTD_LEGACY_SUPPORT=1;WIN32;_DEBUG;_CONSOLE"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
@@ -274,7 +274,7 @@
 				Optimization="2"
 				EnableIntrinsicFunctions="true"
 				OmitFramePointers="true"
-				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
 				PreprocessorDefinitions="ZSTD_LEGACY_SUPPORT=1;WIN32;NDEBUG;_CONSOLE"
 				RuntimeLibrary="0"
 				EnableFunctionLevelLinking="true"
diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj
index 3939c5540..62c0fe10f 100644
--- a/build/VS2010/zstd/zstd.vcxproj
+++ b/build/VS2010/zstd/zstd.vcxproj
@@ -127,19 +127,19 @@
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\compress;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <LibraryPath>$(LibraryPath)</LibraryPath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\compress;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <LibraryPath>$(LibraryPath);</LibraryPath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
+    <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\compress;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
     <LibraryPath>$(LibraryPath)</LibraryPath>
   </PropertyGroup>
@@ -227,4 +227,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index db752784b..da9c58fd4 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -66,6 +66,7 @@ SET(Headers
         ${LIBRARY_DIR}/common/huf.h
         ${LIBRARY_DIR}/common/mem.h
         ${LIBRARY_DIR}/common/zstd_internal.h
+        ${LIBRARY_DIR}/compress/zstdmt_compress.h
         ${LIBRARY_DIR}/dictBuilder/zdict.h
         ${LIBRARY_DIR}/deprecated/zbuff.h)
 
diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt
index 9b3c3acc9..cb3dc6e89 100644
--- a/build/cmake/programs/CMakeLists.txt
+++ b/build/cmake/programs/CMakeLists.txt
@@ -20,7 +20,7 @@ SET(ROOT_DIR ../../..)
 # Define programs directory, where sources and header files are located
 SET(LIBRARY_DIR ${ROOT_DIR}/lib)
 SET(PROGRAMS_DIR ${ROOT_DIR}/programs)
-INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compression ${LIBRARY_DIR}/dictBuilder)
+INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compress ${LIBRARY_DIR}/dictBuilder)
 
 IF (ZSTD_LEGACY_SUPPORT)
     SET(PROGRAMS_LEGACY_DIR ${PROGRAMS_DIR}/legacy)
diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt
index 7f9c38e1a..53a699449 100644
--- a/build/cmake/tests/CMakeLists.txt
+++ b/build/cmake/tests/CMakeLists.txt
@@ -41,7 +41,7 @@ SET(ROOT_DIR ../../..)
 SET(LIBRARY_DIR ${ROOT_DIR}/lib)
 SET(PROGRAMS_DIR ${ROOT_DIR}/programs)
 SET(TESTS_DIR ${ROOT_DIR}/tests)
-INCLUDE_DIRECTORIES(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/dictBuilder)
+INCLUDE_DIRECTORIES(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compress ${LIBRARY_DIR}/dictBuilder)
 
 ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/fullbench.c)
 TARGET_LINK_LIBRARIES(fullbench libzstd_static)
diff --git a/contrib/pzstd/BUCK b/contrib/pzstd/BUCK
new file mode 100644
index 000000000..d04eeedd8
--- /dev/null
+++ b/contrib/pzstd/BUCK
@@ -0,0 +1,72 @@
+cxx_library(
+    name='libpzstd',
+    visibility=['PUBLIC'],
+    header_namespace='',
+    exported_headers=[
+        'ErrorHolder.h',
+        'Logging.h',
+        'Pzstd.h',
+    ],
+    headers=[
+        'SkippableFrame.h',
+    ],
+    srcs=[
+        'Pzstd.cpp',
+        'SkippableFrame.cpp',
+    ],
+    deps=[
+        ':options',
+        '//contrib/pzstd/utils:utils',
+        '//lib:mem',
+        '//lib:zstd',
+    ],
+)
+
+cxx_library(
+    name='options',
+    visibility=['PUBLIC'],
+    header_namespace='',
+    exported_headers=['Options.h'],
+    srcs=['Options.cpp'],
+    deps=[
+        '//contrib/pzstd/utils:scope_guard',
+        '//lib:zstd',
+        '//programs:util',
+    ],
+)
+
+cxx_binary(
+    name='pzstd',
+    visibility=['PUBLIC'],
+    srcs=['main.cpp'],
+    deps=[
+        ':libpzstd',
+        ':options',
+    ],
+)
+
+# Must run "make googletest" first
+cxx_library(
+    name='gtest',
+    srcs=glob([
+        'googletest/googletest/src/gtest-all.cc',
+        'googletest/googlemock/src/gmock-all.cc',
+        'googletest/googlemock/src/gmock_main.cc',
+    ]),
+    header_namespace='',
+    exported_headers=subdir_glob([
+        ('googletest/googletest/include', '**/*.h'),
+        ('googletest/googlemock/include', '**/*.h'),
+    ]),
+    headers=subdir_glob([
+        ('googletest/googletest', 'src/*.cc'),
+        ('googletest/googletest', 'src/*.h'),
+        ('googletest/googlemock', 'src/*.cc'),
+        ('googletest/googlemock', 'src/*.h'),
+    ]),
+    platform_linker_flags=[
+        ('android', []),
+        ('', ['-lpthread']),
+    ],
+    visibility=['PUBLIC'],
+)
diff --git a/contrib/pzstd/Options.cpp b/contrib/pzstd/Options.cpp
index 0b1403354..a0d969393 100644
--- a/contrib/pzstd/Options.cpp
+++ b/contrib/pzstd/Options.cpp
@@ -7,6 +7,7 @@
  * of patent rights can be found in the PATENTS file in the same directory.
  */
 #include "Options.h"
+#include "util.h"
 #include "utils/ScopeGuard.h"
 
 #include <algorithm>
@@ -15,7 +16,6 @@
 #include <cstring>
 #include <iterator>
 #include <thread>
-#include <util.h>
 #include <vector>
 
 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) ||     \
diff --git a/contrib/pzstd/main.cpp b/contrib/pzstd/main.cpp
index 279cbfb5e..7d8dbfbcf 100644
--- a/contrib/pzstd/main.cpp
+++ b/contrib/pzstd/main.cpp
@@ -9,11 +9,6 @@
 #include "ErrorHolder.h"
 #include "Options.h"
 #include "Pzstd.h"
-#include "utils/FileSystem.h"
-#include "utils/Range.h"
-#include "utils/ScopeGuard.h"
-#include "utils/ThreadPool.h"
-#include "utils/WorkQueue.h"
 
 using namespace pzstd;
 
diff --git a/contrib/pzstd/test/BUCK b/contrib/pzstd/test/BUCK
new file mode 100644
index 000000000..6d3fdd3c2
--- /dev/null
+++ b/contrib/pzstd/test/BUCK
@@ -0,0 +1,37 @@
+cxx_test(
+    name='options_test',
+    srcs=['OptionsTest.cpp'],
+    deps=['//contrib/pzstd:options'],
+)
+
+cxx_test(
+    name='pzstd_test',
+    srcs=['PzstdTest.cpp'],
+    deps=[
+        ':round_trip',
+        '//contrib/pzstd:libpzstd',
+        '//contrib/pzstd/utils:scope_guard',
+        '//programs:datagen',
+    ],
+)
+
+cxx_binary(
+    name='round_trip_test',
+    srcs=['RoundTripTest.cpp'],
+    deps=[
+        ':round_trip',
+        '//contrib/pzstd/utils:scope_guard',
+        '//programs:datagen',
+    ]
+)
+
+cxx_library(
+    name='round_trip',
+    header_namespace='test',
+    exported_headers=['RoundTrip.h'],
+    deps=[
+        '//contrib/pzstd:libpzstd',
+        '//contrib/pzstd:options',
+        '//contrib/pzstd/utils:scope_guard',
+    ]
+)
diff --git a/contrib/pzstd/test/PzstdTest.cpp b/contrib/pzstd/test/PzstdTest.cpp
index c85f73a39..cadfa83f7 100644
--- a/contrib/pzstd/test/PzstdTest.cpp
+++ b/contrib/pzstd/test/PzstdTest.cpp
@@ -41,23 +41,20 @@ TEST(Pzstd, SmallSizes) {
       std::fclose(fd);
       ASSERT_EQ(written, len);
     }
-    for (unsigned headers = 0; headers <= 1; ++headers) {
-      for (unsigned numThreads = 1; numThreads <= 2; ++numThreads) {
-        for (unsigned level = 1; level <= 4; level *= 4) {
-          auto errorGuard = makeScopeGuard([&] {
-            std::fprintf(stderr, "pzstd headers: %u\n", headers);
-            std::fprintf(stderr, "# threads: %u\n", numThreads);
-            std::fprintf(stderr, "compression level: %u\n", level);
-          });
-          Options options;
-          options.overwrite = true;
-          options.inputFiles = {inputFile};
-          options.numThreads = numThreads;
-          options.compressionLevel = level;
-          options.verbosity = 1;
-          ASSERT_TRUE(roundTrip(options));
-          errorGuard.dismiss();
-        }
+    for (unsigned numThreads = 1; numThreads <= 2; ++numThreads) {
+      for (unsigned level = 1; level <= 4; level *= 4) {
+        auto errorGuard = makeScopeGuard([&] {
+          std::fprintf(stderr, "# threads: %u\n", numThreads);
+          std::fprintf(stderr, "compression level: %u\n", level);
+        });
+        Options options;
+        options.overwrite = true;
+        options.inputFiles = {inputFile};
+        options.numThreads = numThreads;
+        options.compressionLevel = level;
+        options.verbosity = 1;
+        ASSERT_TRUE(roundTrip(options));
+        errorGuard.dismiss();
       }
     }
   }
@@ -79,29 +76,26 @@ TEST(Pzstd, LargeSizes) {
       std::fclose(fd);
       ASSERT_EQ(written, len);
     }
-    for (unsigned headers = 0; headers <= 1; ++headers) {
-      for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) {
-        for (unsigned level = 1; level <= 4; level *= 2) {
-          auto errorGuard = makeScopeGuard([&] {
-            std::fprintf(stderr, "pzstd headers: %u\n", headers);
-            std::fprintf(stderr, "# threads: %u\n", numThreads);
-            std::fprintf(stderr, "compression level: %u\n", level);
-          });
-          Options options;
-          options.overwrite = true;
-          options.inputFiles = {inputFile};
-          options.numThreads = std::min(numThreads, options.numThreads);
-          options.compressionLevel = level;
-          options.verbosity = 1;
-          ASSERT_TRUE(roundTrip(options));
-          errorGuard.dismiss();
-        }
+    for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) {
+      for (unsigned level = 1; level <= 4; level *= 4) {
+        auto errorGuard = makeScopeGuard([&] {
+          std::fprintf(stderr, "# threads: %u\n", numThreads);
+          std::fprintf(stderr, "compression level: %u\n", level);
+        });
+        Options options;
+        options.overwrite = true;
+        options.inputFiles = {inputFile};
+        options.numThreads = std::min(numThreads, options.numThreads);
+        options.compressionLevel = level;
+        options.verbosity = 1;
+        ASSERT_TRUE(roundTrip(options));
+        errorGuard.dismiss();
       }
     }
   }
 }
 
-TEST(Pzstd, ExtremelyLargeSize) {
+TEST(Pzstd, DISABLED_ExtremelyLargeSize) {
   unsigned seed = std::random_device{}();
   std::fprintf(stderr, "Pzstd.ExtremelyLargeSize seed: %u\n", seed);
   std::mt19937 gen(seed);
diff --git a/contrib/pzstd/utils/BUCK b/contrib/pzstd/utils/BUCK
new file mode 100644
index 000000000..e757f4120
--- /dev/null
+++ b/contrib/pzstd/utils/BUCK
@@ -0,0 +1,75 @@
+cxx_library(
+    name='buffer',
+    visibility=['PUBLIC'],
+    header_namespace='utils',
+    exported_headers=['Buffer.h'],
+    deps=[':range'],
+)
+
+cxx_library(
+    name='file_system',
+    visibility=['PUBLIC'],
+    header_namespace='utils',
+    exported_headers=['FileSystem.h'],
+    deps=[':range'],
+)
+
+cxx_library(
+    name='likely',
+    visibility=['PUBLIC'],
+    header_namespace='utils',
+    exported_headers=['Likely.h'],
+)
+
+cxx_library(
+    name='range',
+    visibility=['PUBLIC'],
+    header_namespace='utils',
+    exported_headers=['Range.h'],
+    deps=[':likely'],
+)
+
+cxx_library(
+    name='resource_pool',
+    visibility=['PUBLIC'],
+    header_namespace='utils',
+    exported_headers=['ResourcePool.h'],
+)
+
+cxx_library(
+    name='scope_guard',
+    visibility=['PUBLIC'],
+    header_namespace='utils',
+    exported_headers=['ScopeGuard.h'],
+)
+
+cxx_library(
+    name='thread_pool',
+    visibility=['PUBLIC'],
+    header_namespace='utils',
+    exported_headers=['ThreadPool.h'],
+    deps=[':work_queue'],
+)
+
+cxx_library(
+    name='work_queue',
+    visibility=['PUBLIC'],
+    header_namespace='utils',
+    exported_headers=['WorkQueue.h'],
+    deps=[':buffer'],
+)
+
+cxx_library(
+    name='utils',
+    visibility=['PUBLIC'],
+    deps=[
+        ':buffer',
+        ':file_system',
+        ':likely',
+        ':range',
+        ':resource_pool',
+        ':scope_guard',
+        ':thread_pool',
+        ':work_queue',
+    ],
+)
diff --git a/contrib/pzstd/utils/test/BUCK b/contrib/pzstd/utils/test/BUCK
new file mode 100644
index 000000000..a5113cab6
--- /dev/null
+++ b/contrib/pzstd/utils/test/BUCK
@@ -0,0 +1,35 @@
+cxx_test(
+    name='buffer_test',
+    srcs=['BufferTest.cpp'],
+    deps=['//contrib/pzstd/utils:buffer'],
+)
+
+cxx_test(
+    name='range_test',
+    srcs=['RangeTest.cpp'],
+    deps=['//contrib/pzstd/utils:range'],
+)
+
+cxx_test(
+    name='resource_pool_test',
+    srcs=['ResourcePoolTest.cpp'],
+    deps=['//contrib/pzstd/utils:resource_pool'],
+)
+
+cxx_test(
+    name='scope_guard_test',
+    srcs=['ScopeGuardTest.cpp'],
+    deps=['//contrib/pzstd/utils:scope_guard'],
+)
+
+cxx_test(
+    name='thread_pool_test',
+    srcs=['ThreadPoolTest.cpp'],
+    deps=['//contrib/pzstd/utils:thread_pool'],
+)
+
+cxx_test(
+    name='work_queue_test',
+    srcs=['RangeTest.cpp'],
+    deps=['//contrib/pzstd/utils:work_queue'],
+)
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index b48b39104..df983284f 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
 
 ### Version
 
-0.2.2 (14/09/16)
+0.2.3 (27/01/17)
 
 
 Introduction
@@ -57,7 +57,6 @@ Whenever it does not support a parameter defined in the compressed stream,
 it must produce a non-ambiguous error code and associated error message
 explaining which parameter is unsupported.
 
-
 Overall conventions
 -----------
 In this document:
@@ -117,7 +116,7 @@ Skippable frames defined in this specification are compatible with [LZ4] ones.
 __`Magic_Number`__
 
 4 Bytes, little-endian format.
-Value : 0x184D2A5X, which means any value from 0x184D2A50 to 0x184D2A5F.
+Value : 0x184D2A5?, which means any value from 0x184D2A50 to 0x184D2A5F.
 All 16 values are valid to identify a skippable frame.
 
 __`Frame_Size`__
@@ -267,7 +266,7 @@ The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag
 In this case, the maximum back-reference distance is the content size itself,
 which can be any value from 1 to 2^64-1 bytes (16 EB).
 
-| Bit numbers |     7-3    |     0-2    |
+| Bit numbers |     7-3    |     2-0    |
 | ----------- | ---------- | ---------- |
 | Field name  | `Exponent` | `Mantissa` |
 
@@ -381,9 +380,9 @@ There are 4 block types :
   This value cannot be used with current version of this specification.
 
 Block sizes must respect a few rules :
-- In compressed mode, compressed size if always strictly `< decompressed size`.
-- Block decompressed size is always <= maximum back-reference distance .
-- Block decompressed size is always <= 128 KB
+- In compressed mode, compressed size is always strictly less than decompressed size.
+- Block decompressed size is always <= maximum back-reference distance.
+- Block decompressed size is always <= 128 KB.
 
 
 __`Block_Content`__
@@ -478,13 +477,16 @@ For values spanning several bytes, convention is little-endian.
 
 __`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ :
 
-- Value x0 : `Regenerated_Size` uses 5 bits (0-31).
+- Value ?0 : `Size_Format` uses 1 bit.
+               `Regenerated_Size` uses 5 bits (0-31).
                `Literals_Section_Header` has 1 byte.
                `Regenerated_Size = Header[0]>>3`
-- Value 01 : `Regenerated_Size` uses 12 bits (0-4095).
+- Value 01 : `Size_Format` uses 2 bits.
+               `Regenerated_Size` uses 12 bits (0-4095).
                `Literals_Section_Header` has 2 bytes.
                `Regenerated_Size = (Header[0]>>4) + (Header[1]<<4)`
-- Value 11 : `Regenerated_Size` uses 20 bits (0-1048575).
+- Value 11 : `Size_Format` uses 2 bits.
+               `Regenerated_Size` uses 20 bits (0-1048575).
                `Literals_Section_Header` has 3 bytes.
                `Regenerated_Size = (Header[0]>>4) + (Header[1]<<4) + (Header[2]<<12)`
 
@@ -507,7 +509,8 @@ __`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block
                `Literals_Section_Header` has 5 bytes.
 
 Both `Compressed_Size` and `Regenerated_Size` fields follow little-endian convention.
-
+Note: `Compressed_Size` __includes__ the size of the Huffman Tree description if it
+is present.
 
 #### `Huffman_Tree_Description`
 
@@ -550,23 +553,24 @@ Let's presume the following Huffman tree must be described :
 | `Number_of_Bits` |  1  |  2  |  3  |  0  |  4  |  4  |
 
 The tree depth is 4, since its smallest element uses 4 bits.
-Value `5` will not be listed, nor will values above `5`.
+Value `5` will not be listed as it can be determined from the values for 0-4,
+nor will values above `5` as they are all 0.
 Values from `0` to `4` will be listed using `Weight` instead of `Number_of_Bits`.
 Weight formula is :
 ```
 Weight = Number_of_Bits ? (Max_Number_of_Bits + 1 - Number_of_Bits) : 0
 ```
-It gives the following serie of weights :
+It gives the following series of weights :
 
-| `Weight` |  4  |  3  |  2  |  0  |  1  |
-| -------- | --- | --- | --- | --- | --- |
 | literal  |  0  |  1  |  2  |  3  |  4  |
+| -------- | --- | --- | --- | --- | --- |
+| `Weight` |  4  |  3  |  2  |  0  |  1  |
 
 The decoder will do the inverse operation :
 having collected weights of literals from `0` to `4`,
 it knows the last literal, `5`, is present with a non-zero weight.
-The weight of `5` can be deducted by joining to the nearest power of 2.
-Sum of `2^(Weight-1)` (excluding 0) is :
+The weight of `5` can be determined by advancing to the next power of 2.
+The sum of `2^(Weight-1)` (excluding 0's) is :
 `8 + 4 + 2 + 0 + 1 = 15`.
 Nearest power of 2 is 16.
 Therefore, `Max_Number_of_Bits = 4` and `Weight[5] = 1`.
@@ -574,23 +578,39 @@ Therefore, `Max_Number_of_Bits = 4` and `Weight[5] = 1`.
 ##### Huffman Tree header
 
 This is a single byte value (0-255),
-which tells how to decode the list of weights.
+which describes how to decode the list of weights.
 
 - if `headerByte` >= 128 : this is a direct representation,
   where each `Weight` is written directly as a 4 bits field (0-15).
+  They are encoded forward, 2 weights to a byte with the first weight taking
+  the top four bits and the second taking the bottom four (e.g. the following
+  operations could be used to read the weights:
+  `Weight[0] = (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf)`, etc.).
   The full representation occupies `((Number_of_Symbols+1)/2)` bytes,
   meaning it uses a last full byte even if `Number_of_Symbols` is odd.
   `Number_of_Symbols = headerByte - 127`.
   Note that maximum `Number_of_Symbols` is 255-127 = 128.
-  A larger serie must necessarily use FSE compression.
+  A larger series must necessarily use FSE compression.
 
 - if `headerByte` < 128 :
-  the serie of weights is compressed by FSE.
-  The length of the FSE-compressed serie is equal to `headerByte` (0-127).
+  the series of weights is compressed by FSE.
+  The length of the FSE-compressed series is equal to `headerByte` (0-127).
 
 ##### Finite State Entropy (FSE) compression of Huffman weights
 
-The serie of weights is compressed using FSE compression.
+FSE decoding uses three operations: `Init_State`, `Decode_Symbol`, and `Update_State`.
+`Init_State` reads in the initial state value from a bitstream,
+`Decode_Symbol` outputs a symbol based on the current state,
+and `Update_State` goes to a new state based on the current state and some number of consumed bits.
+
+FSE streams must be read in reverse from the order they're encoded in,
+so bitstreams start at a certain offset and works backwards towards their base.
+
+For more on how FSE bitstreams work, see [Finite State Entropy].
+
+[Finite State Entropy]:https://github.com/Cyan4973/FiniteStateEntropy/
+
+The series of Huffman weights is compressed using FSE compression.
 It's a single bitstream with 2 interleaved states,
 sharing a single distribution table.
 
@@ -598,22 +618,27 @@ To decode an FSE bitstream, it is necessary to know its compressed size.
 Compressed size is provided by `headerByte`.
 It's also necessary to know its _maximum possible_ decompressed size,
 which is `255`, since literal values span from `0` to `255`,
-and last symbol value is not represented.
+and last symbol's weight is not represented.
 
 An FSE bitstream starts by a header, describing probabilities distribution.
 It will create a Decoding Table.
-Table must be pre-allocated, which requires to support a maximum accuracy.
+The table must be pre-allocated, so a maximum accuracy must be fixed.
 For a list of Huffman weights, maximum accuracy is 7 bits.
 
-FSE header is [described in relevant chapter](#fse-distribution-table--condensed-format),
-and so is [FSE bitstream](#bitstream).
+The FSE header format is [described in a relevant chapter](#fse-distribution-table--condensed-format),
+as well as the [FSE bitstream](#bitstream).
 The main difference is that Huffman header compression uses 2 states,
 which share the same FSE distribution table.
-Bitstream contains only FSE symbols (no interleaved "raw bitfields").
-The number of symbols to decode is discovered
-by tracking bitStream overflow condition.
-When both states have overflowed the bitstream, end is reached.
+The first state (`State1`) encodes the even indexed symbols,
+and the second (`State2`) encodes the odd indexes.
+State1 is initialized first, and then State2, and they take turns decoding
+a single symbol and updating their state.
 
+The number of symbols to decode is determined
+by tracking bitStream overflow condition:
+If updating state after decoding a symbol would require more bits than
+remain in the stream, it is assumed the extra bits are 0.  Then,
+the symbols for each of the final states are decoded and the process is complete.
 
 ##### Conversion from weights to Huffman prefix codes
 
@@ -687,9 +712,20 @@ Consequently, a last byte of `0` is not possible.
 And the final-bit-flag itself is not part of the useful bitstream.
 Hence, the last byte contains between 0 and 7 useful bits.
 
+For example, if the literal sequence "0145" was encoded using the prefix codes above,
+it would be encoded as:
+```
+00000001 01110000
+```
+
+|Symbol  |   5  |   4  |  1 | 0 | Padding |
+|--------|------|------|----|---|---------|
+|Encoding|`0000`|`0001`|`01`|`1`| `10000` |
+
 Starting from the end,
 it's possible to read the bitstream in a little-endian fashion,
-keeping track of already used bits.
+keeping track of already used bits.  Since the bitstream is encoded in reverse
+order, by starting at the end the symbols can be read in forward order.
 
 Reading the last `Max_Number_of_Bits` bits,
 it's then possible to compare extracted value to decoding table,
@@ -700,7 +736,6 @@ If a bitstream is not entirely and exactly consumed,
 hence reaching exactly its beginning position with _all_ bits consumed,
 the decoding process is considered faulty.
 
-
 ### `Sequences_Section`
 
 A compressed block is a succession of _sequences_ .
@@ -712,7 +747,7 @@ The offset gives the position to copy from,
 which can be within a previous block.
 
 When all _sequences_ are decoded,
-if there is any literal left in the _literal section_,
+if there is are any literals left in the _literal section_,
 these bytes are added at the end of the block.
 
 The `Sequences_Section` regroup all symbols required to decode commands.
@@ -810,7 +845,7 @@ They define lengths from 0 to 131071 bytes.
 When `Compression_Mode` is `Predefined_Mode`,
 a predefined distribution is used for FSE compression.
 
-Below is its definition. It uses an accuracy of 6 bits (64 states).
+Its definition is below. It uses an accuracy of 6 bits (64 states).
 ```
 short literalsLength_defaultDistribution[36] =
         { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
@@ -835,12 +870,12 @@ They define lengths from 3 to 131074 bytes.
 
 | `Match_Length_Code` |  40  |  41  |  42  |  43  |  44  |  45  |  46  |  47  |
 | ------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
-| `Baseline`          |  67  |  83  |  99  |  131 |  258 |  514 | 1026 | 2050 |
+| `Baseline`          |  67  |  83  |  99  |  131 |  259 |  515 | 1027 | 2051 |
 | `Number_of_Bits`    |   4  |   4  |   5  |   7  |   8  |   9  |  10  |  11  |
 
 | `Match_Length_Code` |  48  |  49  |  50  |  51  |  52  |
 | ------------------- | ---- | ---- | ---- | ---- | ---- |
-| `Baseline`          | 4098 | 8194 |16486 |32770 |65538 |
+| `Baseline`          | 4099 | 8195 |16387 |32771 |65539 |
 | `Number_of_Bits`    |  12  |  13  |  14  |  15  |  16  |
 
 ##### Default distribution for match length codes
@@ -848,7 +883,7 @@ They define lengths from 3 to 131074 bytes.
 When `Compression_Mode` is defined as `Predefined_Mode`,
 a predefined distribution is used for FSE compression.
 
-Below is its definition. It uses an accuracy of 6 bits (64 states).
+Its definition is below. It uses an accuracy of 6 bits (64 states).
 ```
 short matchLengths_defaultDistribution[53] =
         { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
@@ -908,7 +943,7 @@ When present, they are in this order :
 - Match Lengths
 
 The content to decode depends on their respective encoding mode :
-- `Predefined_Mode` : no content. Use predefined distribution table.
+- `Predefined_Mode` : no content. Use the predefined distribution table.
 - `RLE_Mode` : 1 byte. This is the only code to use across the whole compressed block.
 - `FSE_Compressed_Mode` : A distribution table is present.
 - `Repeat_Mode` : no content. Re-use distribution from previous compressed block.
@@ -936,12 +971,12 @@ It depends on :
   __example__ :
   Presuming an `Accuracy_Log` of 8,
   and presuming 100 probabilities points have already been distributed,
-  the decoder may read any value from `0` to `255 - 100 + 1 == 156` (included).
+  the decoder may read any value from `0` to `255 - 100 + 1 == 156` (inclusive).
   Therefore, it must read `log2sup(156) == 8` bits.
 
 - Value decoded : small values use 1 less bit :
   __example__ :
-  Presuming values from 0 to 156 (included) are possible,
+  Presuming values from 0 to 156 (inclusive) are possible,
   255-156 = 99 values are remaining in an 8-bits field.
   They are used this way :
   first 99 values (hence from 0 to 98) use only 7 bits,
@@ -967,7 +1002,7 @@ For the purpose of calculating cumulated distribution, it counts as one.
 
 [next paragraph]:#fse-decoding--from-normalized-distribution-to-decoding-tables
 
-When a symbol has a probability of `zero`,
+When a symbol has a __probability__ of `zero`,
 it is followed by a 2-bits repeat flag.
 This repeat flag tells how many probabilities of zeroes follow the current one.
 It provides a number ranging from 0 to 3.
@@ -1012,6 +1047,9 @@ position &= tableSize-1;
 
 A position is skipped if already occupied,
 typically by a "less than 1" probability symbol.
+`position` does not reset between symbols, it simply iterates through
+each position in the table, switching to the next symbol when enough
+states have been allocated to the current one.
 
 The result is a list of state values.
 Each state will decode the current symbol.
@@ -1043,7 +1081,7 @@ Numbering starts from higher states using less bits.
 | `Baseline`       |  32   |  64   |   96   |   0  |  16   |
 | range            | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
 
-Next state is determined from current state
+The next state is determined from current state
 by reading the required `Number_of_Bits`, and adding the specified `Baseline`.
 
 
@@ -1093,15 +1131,16 @@ and then for `Literals_Length`.
 It starts by inserting the number of literals defined by `Literals_Length`,
 then continue by copying `Match_Length` bytes from `currentPos - Offset`.
 
-The next operation is to update states.
-Using rules pre-calculated in the decoding tables,
+If it is not the last sequence in the block,
+the next operation is to update states.
+Using the rules pre-calculated in the decoding tables,
 `Literals_Length_State` is updated,
 followed by `Match_Length_State`,
 and then `Offset_State`.
 
 This operation will be repeated `Number_of_Sequences` times.
 At the end, the bitstream shall be entirely consumed,
-otherwise bitstream is considered corrupted.
+otherwise the bitstream is considered corrupted.
 
 [Symbol Decoding]:#the-codes-for-literals-lengths-match-lengths-and-offsets
 
@@ -1111,13 +1150,13 @@ As seen in [Offset Codes], the first 3 values define a repeated offset and we wi
 They are sorted in recency order, with `Repeated_Offset1` meaning "most recent one".
 
 There is an exception though, when current sequence's literals length is `0`.
-In which case, repeated offsets are "pushed by one",
+In this case, repeated offsets are shifted by one,
 so `Repeated_Offset1` becomes `Repeated_Offset2`, `Repeated_Offset2` becomes `Repeated_Offset3`,
 and `Repeated_Offset3` becomes `Repeated_Offset1 - 1_byte`.
 
-On first block, offset history is populated by the following values : 1, 4 and 8 (in order).
+In the first block, the offset history is populated with the following values : 1, 4 and 8 (in order).
 
-Then each block receives its start value from previous compressed block.
+Then each block gets its starting offset history from the ending values of the most recent compressed block.
 Note that non-compressed blocks are skipped,
 they do not contribute to offset history.
 
@@ -1125,11 +1164,12 @@ they do not contribute to offset history.
 
 ###### Offset updates rules
 
-New offset take the lead in offset history,
-up to its previous place if it was already present.
+The newest offset takes the lead in offset history,
+shifting others back (up to its previous place if it was already present).
 
-It means that when `Repeated_Offset1` (most recent) is used, history is unmodified.
+This means that when `Repeated_Offset1` (most recent) is used, history is unmodified.
 When `Repeated_Offset2` is used, it's swapped with `Repeated_Offset1`.
+If any other offset is used, it becomes `Repeated_Offset1` and the rest are shift back by one.
 
 
 Dictionary format
@@ -1137,6 +1177,9 @@ Dictionary format
 
 `zstd` is compatible with "raw content" dictionaries, free of any format restriction,
 except that they must be at least 8 bytes.
+These dictionaries function as if they were just the `Content` block of a formatted
+dictionary.
+
 But dictionaries created by `zstd --train` follow a format, described here.
 
 __Pre-requisites__ : a dictionary has a size,
@@ -1160,16 +1203,17 @@ _Reserved ranges :_
               - low range : 1 - 32767
               - high range : >= (2^31)
 
-__`Entropy_Tables`__ : following the same format as a [compressed blocks].
+__`Entropy_Tables`__ : following the same format as the tables in [compressed blocks].
               They are stored in following order :
               Huffman tables for literals, FSE table for offsets,
               FSE table for match lengths, and FSE table for literals lengths.
-              It's finally followed by 3 offset values, populating recent offsets,
+              It's finally followed by 3 offset values, populating recent offsets (instead of using `{1,4,8}`),
               stored in order, 4-bytes little-endian each, for a total of 12 bytes.
               Each recent offset must have a value < dictionary size.
 
 __`Content`__ : The rest of the dictionary is its content.
-              The content act as a "past" in front of data to compress or decompress.
+              The content act as a "past" in front of data to compress or decompress,
+              so it can be referenced in sequence commands.
 
 [compressed blocks]: #the-format-of-compressed_block
 
@@ -1358,6 +1402,7 @@ to crosscheck that an implementation implements the decoding table generation al
 
 Version changes
 ---------------
+- 0.2.3 : clarified several details, by Sean Purcell
 - 0.2.2 : added predefined codes, by Johannes Rudolph
 - 0.2.1 : clarify field names, by Przemyslaw Skibinski
 - 0.2.0 : numerous format adjustments for zstd v0.8
diff --git a/lib/BUCK b/lib/BUCK
new file mode 100644
index 000000000..6812c1b1e
--- /dev/null
+++ b/lib/BUCK
@@ -0,0 +1,186 @@
+cxx_library(
+    name='zstd',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    deps=[
+        ':common',
+        ':compress',
+        ':decompress',
+        ':deprecated',
+    ],
+)
+
+cxx_library(
+    name='compress',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('compress', 'zstdmt_compress.h'),
+    ]),
+    headers=subdir_glob([
+        ('compress', 'zstd_opt.h'),
+    ]),
+    srcs=[
+        'compress/zstd_compress.c',
+        'compress/zstdmt_compress.c',
+    ],
+    deps=[':common'],
+)
+
+cxx_library(
+    name='decompress',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    srcs=['decompress/zstd_decompress.c'],
+    deps=[
+        ':common',
+        ':legacy',
+    ],
+)
+
+cxx_library(
+    name='deprecated',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('decprecated', '*.h'),
+    ]),
+    srcs=glob(['deprecated/*.c']),
+    deps=[':common'],
+)
+
+cxx_library(
+    name='legacy',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('legacy', '*.h'),
+    ]),
+    srcs=glob(['legacy/*.c']),
+    deps=[':common'],
+)
+
+cxx_library(
+    name='zdict',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('dictBuilder', 'zdict.h'),
+    ]),
+    headers=subdir_glob([
+        ('dictBuilder', 'divsufsort.h'),
+    ]),
+    srcs=glob(['dictBuilder/*.c']),
+    deps=[':common'],
+)
+
+cxx_library(
+    name='bitstream',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('common', 'bitstream.h'),
+    ]),
+)
+
+cxx_library(
+    name='entropy',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('common', 'fse.h'),
+        ('common', 'huf.h'),
+    ]),
+    srcs=[
+        'common/entropy_common.c',
+        'common/fse_decompress.c',
+        'compress/fse_compress.c',
+        'compress/huf_compress.c',
+        'decompress/huf_decompress.c',
+    ],
+    deps=[
+        ':bitstream',
+        ':errors',
+        ':mem',
+    ],
+)
+
+cxx_library(
+    name='errors',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('common', 'error_private.h'),
+        ('common', 'zstd_errors.h'),
+    ]),
+    srcs=['common/error_private.c'],
+)
+
+cxx_library(
+    name='mem',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('common', 'mem.h'),
+    ]),
+)
+
+cxx_library(
+    name='pool',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('common', 'pool.h'),
+    ]),
+    srcs=['common/pool.c'],
+    deps=[':threading'],
+)
+
+cxx_library(
+    name='threading',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('common', 'threading.h'),
+    ]),
+    srcs=['common/threading.c'],
+)
+
+cxx_library(
+    name='xxhash',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('common', 'xxhash.h'),
+    ]),
+    srcs=['common/xxhash.c'],
+)
+
+cxx_library(
+    name='zstd_common',
+    header_namespace='',
+    visibility=['PUBLIC'],
+    exported_headers=subdir_glob([
+        ('', 'zstd.h'),
+        ('common', 'zstd_internal.h'),
+    ]),
+    srcs=['common/zstd_common.c'],
+    deps=[
+        ':errors',
+        ':mem',
+    ],
+)
+
+cxx_library(
+    name='common',
+    deps=[
+        ':bitstream',
+        ':entropy',
+        ':errors',
+        ':mem',
+        ':pool',
+        ':threading',
+        ':xxhash',
+        ':zstd_common',
+    ]
+)
diff --git a/lib/common/pool.c b/lib/common/pool.c
index 693217f24..e439fe1b0 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -21,7 +21,7 @@
 
 #ifdef ZSTD_MULTITHREAD
 
-#include <threading.h>   /* pthread adaptation */
+#include "threading.h"   /* pthread adaptation */
 
 /* A job is a function and an opaque argument */
 typedef struct POOL_job_s {
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index bd011319c..e9509070d 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -3016,12 +3016,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
     {
         U32 add = *dumps++;
         if (add < 255) litLength += add;
-        else
-        {
+        else {
             litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
             dumps += 3;
         }
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+        if (dumps >= de) { dumps = de-1; litLength = MaxLL+255; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
     }
 
     /* Offset */
@@ -3043,16 +3042,14 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
 
     /* MatchLength */
     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
-    if (matchLength == MaxML)
-    {
+    if (matchLength == MaxML) {
         U32 add = *dumps++;
         if (add < 255) matchLength += add;
-        else
-        {
+        else {
             matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
             dumps += 3;
         }
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+        if (dumps >= de) { dumps = de-1; matchLength = MaxML+255; }   /* late correction, to avoid read overflow (data is now corrupted anyway) */
     }
     matchLength += MINMATCH;
 
@@ -3116,8 +3113,7 @@ static size_t ZSTD_execSequence(BYTE* op,
     /* Requirement: op <= oend_8 */
 
     /* match within prefix */
-    if (sequence.offset < 8)
-    {
+    if (sequence.offset < 8) {
         /* close range match, overlap */
         const int sub2 = dec64table[sequence.offset];
         op[0] = match[0];
@@ -3127,9 +3123,7 @@ static size_t ZSTD_execSequence(BYTE* op,
         match += dec32table[sequence.offset];
         ZSTD_copy4(op+4, match);
         match -= sub2;
-    }
-    else
-    {
+    } else {
         ZSTD_copy8(op, match);
     }
     op += 8; match += 8;
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 3dd740e5f..43943d81a 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -3230,7 +3230,7 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
             if (litLength&1) litLength>>=1, dumps += 3;
             else litLength = (U16)(litLength)>>1, dumps += 2;
         }
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+        if (dumps >= de) { dumps = de-1; litLength = MaxLL+255; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
     }
 
     /* Offset */
@@ -3263,7 +3263,7 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
             if (matchLength&1) matchLength>>=1, dumps += 3;
             else matchLength = (U16)(matchLength)>>1, dumps += 2;
         }
-        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
+        if (dumps >= de) { dumps = de-1; matchLength = MaxML+255; }   /* late correction, to avoid read overflow (data is now corrupted anyway) */
     }
     matchLength += MINMATCH;
 
diff --git a/programs/BUCK b/programs/BUCK
new file mode 100644
index 000000000..069403042
--- /dev/null
+++ b/programs/BUCK
@@ -0,0 +1,63 @@
+cxx_binary(
+    name='zstd',
+    headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']),
+    srcs=glob(['*.c'], excludes=['datagen.c']),
+    deps=[
+        ':datagen',
+        ':util',
+        '//lib:zstd',
+        '//lib:zdict',
+        '//lib:mem',
+        '//lib:xxhash',
+    ],
+)
+
+cxx_binary(
+    name='zstdmt',
+    headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']),
+    srcs=glob(['*.c'], excludes=['datagen.c']),
+    deps=[
+        ':datagen',
+        ':util',
+        '//lib:zstd',
+        '//lib:zdict',
+        '//lib:mem',
+        '//lib:xxhash',
+    ],
+    preprocessor_flags=['-DZSTD_MULTITHREAD'],
+    linker_flags=['-lpthread'],
+)
+
+cxx_binary(
+    name='gzstd',
+    headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']),
+    srcs=glob(['*.c'], excludes=['datagen.c']),
+    deps=[
+        ':datagen',
+        ':util',
+        '//lib:zstd',
+        '//lib:zdict',
+        '//lib:mem',
+        '//lib:xxhash',
+    ],
+    preprocessor_flags=['-DZSTD_GZDECOMPRESS'],
+    linker_flags=['-lz'],
+)
+
+cxx_library(
+    name='datagen',
+    visibility=['PUBLIC'],
+    header_namespace='',
+    exported_headers=['datagen.h'],
+    srcs=['datagen.c'],
+    deps=['//lib:mem'],
+)
+
+
+cxx_library(
+    name='util',
+    visibility=['PUBLIC'],
+    header_namespace='',
+    exported_headers=['util.h', 'platform.h'],
+    deps=['//lib:mem'],
+)
diff --git a/programs/bench.c b/programs/bench.c
index 1ca40d6b9..dcb23b1f2 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -40,6 +40,7 @@
 #include "zstd.h"
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"
+#include "zstdmt_compress.h"
 
 
 /* *************************************
@@ -148,8 +149,6 @@ typedef struct {
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
 
-#include "compress/zstdmt_compress.h"
-
 static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                         const char* displayName, int cLevel,
                         const size_t* fileSizes, U32 nbFiles,
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index bef8734c7..0fdb1ee12 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -691,10 +691,13 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
                 size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
                 size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
                 size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize);
+                size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize);
                 outBuff.size = outBuff.pos + adjustedDstSize;
-                inBuff.size  = inBuff.pos + randomCSrcSize;
+                inBuff.size  = inBuff.pos + adjustedCSrcSize;
                 {   size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff);
                     if (ZSTD_isError(decompressError)) break;   /* error correctly detected */
+                    /* No forward progress possible */
+                    if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break;
     }   }   }   }
     DISPLAY("\r%u fuzzer tests completed   \n", testNb);
 
@@ -933,10 +936,13 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
                 size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
                 size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
                 size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize);
+                size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize);
                 outBuff.size = outBuff.pos + adjustedDstSize;
-                inBuff.size  = inBuff.pos + randomCSrcSize;
+                inBuff.size  = inBuff.pos + adjustedCSrcSize;
                 {   size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff);
                     if (ZSTD_isError(decompressError)) break;   /* error correctly detected */
+                    /* No forward progress possible */
+                    if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break;
     }   }   }   }
     DISPLAY("\r%u fuzzer tests completed   \n", testNb);
 
diff --git a/zlibWrapper/BUCK b/zlibWrapper/BUCK
new file mode 100644
index 000000000..a3b74ac3f
--- /dev/null
+++ b/zlibWrapper/BUCK
@@ -0,0 +1,22 @@
+cxx_library(
+    name='zlib_wrapper',
+    visibility=['PUBLIC'],
+    exported_linker_flags=['-lz'],
+    header_namespace='',
+    exported_headers=['zstd_zlibwrapper.h'],
+    headers=[
+        'gzcompatibility.h',
+        'gzguts.h',
+    ],
+    srcs=glob(['*.c']),
+    deps=[
+        '//lib:zstd',
+        '//lib:zstd_common',
+    ]
+)
+
+cxx_binary(
+    name='minigzip',
+    srcs=['examples/minigzip.c'],
+    deps=[':zlib_wrapper'],
+)