diff --git a/.buckconfig b/.buckconfig new file mode 100644 index 000000000..b2b9c036f --- /dev/null +++ b/.buckconfig @@ -0,0 +1,9 @@ +[cxx] + cppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=1 + cflags = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith + cxxppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=1 + cxxflags = -std=c++11 -Wno-format-security -Wno-deprecated-declarations + gtest_dep = //contrib/pzstd:gtest + +[httpserver] + port = 0 diff --git a/.buckversion b/.buckversion new file mode 100644 index 000000000..892fad966 --- /dev/null +++ b/.buckversion @@ -0,0 +1 @@ +c8dec2e8da52d483f6dd7c6cd2ad694e8e6fed2b diff --git a/.gitignore b/.gitignore index dd7a74519..e02119883 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,5 @@ googletest/ # Directories bin/ +.buckd/ +buck-out/ diff --git a/NEWS b/NEWS index 46bdb25a2..f404f6e37 100644 --- a/NEWS +++ b/NEWS @@ -1,11 +1,16 @@ v1.1.3 +cli : new : experimental target `make zstdmt`, with multi-threading support cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski cli : fix zstdless on Mac OS-X, by Andrew Janke +cli : fix #232 "compress non-files" dictBuilder : improved dictionary generation quality, thanks to Nick Terrell -API : fix : all symbols properly exposed in libzstd, by Nick Terrell -API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511) +API : new : lib/compress/ZSTDMT_compress.h multithreading API (experimental) API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul API : new : ZDICT_finalizeDictionary() +API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511) +API : fix : all symbols properly exposed in libzstd, by Nick Terrell +build : support for Solaris target, by Przemyslaw Skibinski +doc : clarified specification, by Andrew Purcell v1.1.2 API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init diff --git a/build/VS2005/zstd/zstd.vcproj b/build/VS2005/zstd/zstd.vcproj index 5ef7a98f8..58f254bc8 100644 --- a/build/VS2005/zstd/zstd.vcproj +++ b/build/VS2005/zstd/zstd.vcproj @@ -43,7 +43,7 @@ + + @@ -533,6 +537,10 @@ RelativePath="..\..\..\lib\legacy\zstd_v07.h" > + + diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj index 0beb59dd7..2dfaf3937 100644 --- a/build/VS2008/zstd/zstd.vcproj +++ b/build/VS2008/zstd/zstd.vcproj @@ -44,7 +44,7 @@ true - $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath); + $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\compress;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath); false $(LibraryPath) true - $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath); + $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\compress;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath); false $(LibraryPath); false - $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath); + $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\compress;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath); false $(LibraryPath) @@ -227,4 +227,4 @@ - \ No newline at end of file + diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index db752784b..da9c58fd4 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -66,6 +66,7 @@ SET(Headers ${LIBRARY_DIR}/common/huf.h ${LIBRARY_DIR}/common/mem.h ${LIBRARY_DIR}/common/zstd_internal.h + ${LIBRARY_DIR}/compress/zstdmt_compress.h ${LIBRARY_DIR}/dictBuilder/zdict.h ${LIBRARY_DIR}/deprecated/zbuff.h) diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt index 9b3c3acc9..cb3dc6e89 100644 --- a/build/cmake/programs/CMakeLists.txt +++ b/build/cmake/programs/CMakeLists.txt @@ -20,7 +20,7 @@ SET(ROOT_DIR ../../..) # Define programs directory, where sources and header files are located SET(LIBRARY_DIR ${ROOT_DIR}/lib) SET(PROGRAMS_DIR ${ROOT_DIR}/programs) -INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compression ${LIBRARY_DIR}/dictBuilder) +INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compress ${LIBRARY_DIR}/dictBuilder) IF (ZSTD_LEGACY_SUPPORT) SET(PROGRAMS_LEGACY_DIR ${PROGRAMS_DIR}/legacy) diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt index 7f9c38e1a..53a699449 100644 --- a/build/cmake/tests/CMakeLists.txt +++ b/build/cmake/tests/CMakeLists.txt @@ -41,7 +41,7 @@ SET(ROOT_DIR ../../..) SET(LIBRARY_DIR ${ROOT_DIR}/lib) SET(PROGRAMS_DIR ${ROOT_DIR}/programs) SET(TESTS_DIR ${ROOT_DIR}/tests) -INCLUDE_DIRECTORIES(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/dictBuilder) +INCLUDE_DIRECTORIES(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compress ${LIBRARY_DIR}/dictBuilder) ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/fullbench.c) TARGET_LINK_LIBRARIES(fullbench libzstd_static) diff --git a/contrib/pzstd/BUCK b/contrib/pzstd/BUCK new file mode 100644 index 000000000..d04eeedd8 --- /dev/null +++ b/contrib/pzstd/BUCK @@ -0,0 +1,72 @@ +cxx_library( + name='libpzstd', + visibility=['PUBLIC'], + header_namespace='', + exported_headers=[ + 'ErrorHolder.h', + 'Logging.h', + 'Pzstd.h', + ], + headers=[ + 'SkippableFrame.h', + ], + srcs=[ + 'Pzstd.cpp', + 'SkippableFrame.cpp', + ], + deps=[ + ':options', + '//contrib/pzstd/utils:utils', + '//lib:mem', + '//lib:zstd', + ], +) + +cxx_library( + name='options', + visibility=['PUBLIC'], + header_namespace='', + exported_headers=['Options.h'], + srcs=['Options.cpp'], + deps=[ + '//contrib/pzstd/utils:scope_guard', + '//lib:zstd', + '//programs:util', + ], +) + +cxx_binary( + name='pzstd', + visibility=['PUBLIC'], + srcs=['main.cpp'], + deps=[ + ':libpzstd', + ':options', + ], +) + +# Must run "make googletest" first +cxx_library( + name='gtest', + srcs=glob([ + 'googletest/googletest/src/gtest-all.cc', + 'googletest/googlemock/src/gmock-all.cc', + 'googletest/googlemock/src/gmock_main.cc', + ]), + header_namespace='', + exported_headers=subdir_glob([ + ('googletest/googletest/include', '**/*.h'), + ('googletest/googlemock/include', '**/*.h'), + ]), + headers=subdir_glob([ + ('googletest/googletest', 'src/*.cc'), + ('googletest/googletest', 'src/*.h'), + ('googletest/googlemock', 'src/*.cc'), + ('googletest/googlemock', 'src/*.h'), + ]), + platform_linker_flags=[ + ('android', []), + ('', ['-lpthread']), + ], + visibility=['PUBLIC'], +) diff --git a/contrib/pzstd/Options.cpp b/contrib/pzstd/Options.cpp index 0b1403354..a0d969393 100644 --- a/contrib/pzstd/Options.cpp +++ b/contrib/pzstd/Options.cpp @@ -7,6 +7,7 @@ * of patent rights can be found in the PATENTS file in the same directory. */ #include "Options.h" +#include "util.h" #include "utils/ScopeGuard.h" #include @@ -15,7 +16,6 @@ #include #include #include -#include #include #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || \ diff --git a/contrib/pzstd/main.cpp b/contrib/pzstd/main.cpp index 279cbfb5e..7d8dbfbcf 100644 --- a/contrib/pzstd/main.cpp +++ b/contrib/pzstd/main.cpp @@ -9,11 +9,6 @@ #include "ErrorHolder.h" #include "Options.h" #include "Pzstd.h" -#include "utils/FileSystem.h" -#include "utils/Range.h" -#include "utils/ScopeGuard.h" -#include "utils/ThreadPool.h" -#include "utils/WorkQueue.h" using namespace pzstd; diff --git a/contrib/pzstd/test/BUCK b/contrib/pzstd/test/BUCK new file mode 100644 index 000000000..6d3fdd3c2 --- /dev/null +++ b/contrib/pzstd/test/BUCK @@ -0,0 +1,37 @@ +cxx_test( + name='options_test', + srcs=['OptionsTest.cpp'], + deps=['//contrib/pzstd:options'], +) + +cxx_test( + name='pzstd_test', + srcs=['PzstdTest.cpp'], + deps=[ + ':round_trip', + '//contrib/pzstd:libpzstd', + '//contrib/pzstd/utils:scope_guard', + '//programs:datagen', + ], +) + +cxx_binary( + name='round_trip_test', + srcs=['RoundTripTest.cpp'], + deps=[ + ':round_trip', + '//contrib/pzstd/utils:scope_guard', + '//programs:datagen', + ] +) + +cxx_library( + name='round_trip', + header_namespace='test', + exported_headers=['RoundTrip.h'], + deps=[ + '//contrib/pzstd:libpzstd', + '//contrib/pzstd:options', + '//contrib/pzstd/utils:scope_guard', + ] +) diff --git a/contrib/pzstd/test/PzstdTest.cpp b/contrib/pzstd/test/PzstdTest.cpp index c85f73a39..cadfa83f7 100644 --- a/contrib/pzstd/test/PzstdTest.cpp +++ b/contrib/pzstd/test/PzstdTest.cpp @@ -41,23 +41,20 @@ TEST(Pzstd, SmallSizes) { std::fclose(fd); ASSERT_EQ(written, len); } - for (unsigned headers = 0; headers <= 1; ++headers) { - for (unsigned numThreads = 1; numThreads <= 2; ++numThreads) { - for (unsigned level = 1; level <= 4; level *= 4) { - auto errorGuard = makeScopeGuard([&] { - std::fprintf(stderr, "pzstd headers: %u\n", headers); - std::fprintf(stderr, "# threads: %u\n", numThreads); - std::fprintf(stderr, "compression level: %u\n", level); - }); - Options options; - options.overwrite = true; - options.inputFiles = {inputFile}; - options.numThreads = numThreads; - options.compressionLevel = level; - options.verbosity = 1; - ASSERT_TRUE(roundTrip(options)); - errorGuard.dismiss(); - } + for (unsigned numThreads = 1; numThreads <= 2; ++numThreads) { + for (unsigned level = 1; level <= 4; level *= 4) { + auto errorGuard = makeScopeGuard([&] { + std::fprintf(stderr, "# threads: %u\n", numThreads); + std::fprintf(stderr, "compression level: %u\n", level); + }); + Options options; + options.overwrite = true; + options.inputFiles = {inputFile}; + options.numThreads = numThreads; + options.compressionLevel = level; + options.verbosity = 1; + ASSERT_TRUE(roundTrip(options)); + errorGuard.dismiss(); } } } @@ -79,29 +76,26 @@ TEST(Pzstd, LargeSizes) { std::fclose(fd); ASSERT_EQ(written, len); } - for (unsigned headers = 0; headers <= 1; ++headers) { - for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) { - for (unsigned level = 1; level <= 4; level *= 2) { - auto errorGuard = makeScopeGuard([&] { - std::fprintf(stderr, "pzstd headers: %u\n", headers); - std::fprintf(stderr, "# threads: %u\n", numThreads); - std::fprintf(stderr, "compression level: %u\n", level); - }); - Options options; - options.overwrite = true; - options.inputFiles = {inputFile}; - options.numThreads = std::min(numThreads, options.numThreads); - options.compressionLevel = level; - options.verbosity = 1; - ASSERT_TRUE(roundTrip(options)); - errorGuard.dismiss(); - } + for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) { + for (unsigned level = 1; level <= 4; level *= 4) { + auto errorGuard = makeScopeGuard([&] { + std::fprintf(stderr, "# threads: %u\n", numThreads); + std::fprintf(stderr, "compression level: %u\n", level); + }); + Options options; + options.overwrite = true; + options.inputFiles = {inputFile}; + options.numThreads = std::min(numThreads, options.numThreads); + options.compressionLevel = level; + options.verbosity = 1; + ASSERT_TRUE(roundTrip(options)); + errorGuard.dismiss(); } } } } -TEST(Pzstd, ExtremelyLargeSize) { +TEST(Pzstd, DISABLED_ExtremelyLargeSize) { unsigned seed = std::random_device{}(); std::fprintf(stderr, "Pzstd.ExtremelyLargeSize seed: %u\n", seed); std::mt19937 gen(seed); diff --git a/contrib/pzstd/utils/BUCK b/contrib/pzstd/utils/BUCK new file mode 100644 index 000000000..e757f4120 --- /dev/null +++ b/contrib/pzstd/utils/BUCK @@ -0,0 +1,75 @@ +cxx_library( + name='buffer', + visibility=['PUBLIC'], + header_namespace='utils', + exported_headers=['Buffer.h'], + deps=[':range'], +) + +cxx_library( + name='file_system', + visibility=['PUBLIC'], + header_namespace='utils', + exported_headers=['FileSystem.h'], + deps=[':range'], +) + +cxx_library( + name='likely', + visibility=['PUBLIC'], + header_namespace='utils', + exported_headers=['Likely.h'], +) + +cxx_library( + name='range', + visibility=['PUBLIC'], + header_namespace='utils', + exported_headers=['Range.h'], + deps=[':likely'], +) + +cxx_library( + name='resource_pool', + visibility=['PUBLIC'], + header_namespace='utils', + exported_headers=['ResourcePool.h'], +) + +cxx_library( + name='scope_guard', + visibility=['PUBLIC'], + header_namespace='utils', + exported_headers=['ScopeGuard.h'], +) + +cxx_library( + name='thread_pool', + visibility=['PUBLIC'], + header_namespace='utils', + exported_headers=['ThreadPool.h'], + deps=[':work_queue'], +) + +cxx_library( + name='work_queue', + visibility=['PUBLIC'], + header_namespace='utils', + exported_headers=['WorkQueue.h'], + deps=[':buffer'], +) + +cxx_library( + name='utils', + visibility=['PUBLIC'], + deps=[ + ':buffer', + ':file_system', + ':likely', + ':range', + ':resource_pool', + ':scope_guard', + ':thread_pool', + ':work_queue', + ], +) diff --git a/contrib/pzstd/utils/test/BUCK b/contrib/pzstd/utils/test/BUCK new file mode 100644 index 000000000..a5113cab6 --- /dev/null +++ b/contrib/pzstd/utils/test/BUCK @@ -0,0 +1,35 @@ +cxx_test( + name='buffer_test', + srcs=['BufferTest.cpp'], + deps=['//contrib/pzstd/utils:buffer'], +) + +cxx_test( + name='range_test', + srcs=['RangeTest.cpp'], + deps=['//contrib/pzstd/utils:range'], +) + +cxx_test( + name='resource_pool_test', + srcs=['ResourcePoolTest.cpp'], + deps=['//contrib/pzstd/utils:resource_pool'], +) + +cxx_test( + name='scope_guard_test', + srcs=['ScopeGuardTest.cpp'], + deps=['//contrib/pzstd/utils:scope_guard'], +) + +cxx_test( + name='thread_pool_test', + srcs=['ThreadPoolTest.cpp'], + deps=['//contrib/pzstd/utils:thread_pool'], +) + +cxx_test( + name='work_queue_test', + srcs=['RangeTest.cpp'], + deps=['//contrib/pzstd/utils:work_queue'], +) diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md index b48b39104..df983284f 100644 --- a/doc/zstd_compression_format.md +++ b/doc/zstd_compression_format.md @@ -16,7 +16,7 @@ Distribution of this document is unlimited. ### Version -0.2.2 (14/09/16) +0.2.3 (27/01/17) Introduction @@ -57,7 +57,6 @@ Whenever it does not support a parameter defined in the compressed stream, it must produce a non-ambiguous error code and associated error message explaining which parameter is unsupported. - Overall conventions ----------- In this document: @@ -117,7 +116,7 @@ Skippable frames defined in this specification are compatible with [LZ4] ones. __`Magic_Number`__ 4 Bytes, little-endian format. -Value : 0x184D2A5X, which means any value from 0x184D2A50 to 0x184D2A5F. +Value : 0x184D2A5?, which means any value from 0x184D2A50 to 0x184D2A5F. All 16 values are valid to identify a skippable frame. __`Frame_Size`__ @@ -267,7 +266,7 @@ The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag In this case, the maximum back-reference distance is the content size itself, which can be any value from 1 to 2^64-1 bytes (16 EB). -| Bit numbers | 7-3 | 0-2 | +| Bit numbers | 7-3 | 2-0 | | ----------- | ---------- | ---------- | | Field name | `Exponent` | `Mantissa` | @@ -381,9 +380,9 @@ There are 4 block types : This value cannot be used with current version of this specification. Block sizes must respect a few rules : -- In compressed mode, compressed size if always strictly `< decompressed size`. -- Block decompressed size is always <= maximum back-reference distance . -- Block decompressed size is always <= 128 KB +- In compressed mode, compressed size is always strictly less than decompressed size. +- Block decompressed size is always <= maximum back-reference distance. +- Block decompressed size is always <= 128 KB. __`Block_Content`__ @@ -478,13 +477,16 @@ For values spanning several bytes, convention is little-endian. __`Size_Format` for `Raw_Literals_Block` and `RLE_Literals_Block`__ : -- Value x0 : `Regenerated_Size` uses 5 bits (0-31). +- Value ?0 : `Size_Format` uses 1 bit. + `Regenerated_Size` uses 5 bits (0-31). `Literals_Section_Header` has 1 byte. `Regenerated_Size = Header[0]>>3` -- Value 01 : `Regenerated_Size` uses 12 bits (0-4095). +- Value 01 : `Size_Format` uses 2 bits. + `Regenerated_Size` uses 12 bits (0-4095). `Literals_Section_Header` has 2 bytes. `Regenerated_Size = (Header[0]>>4) + (Header[1]<<4)` -- Value 11 : `Regenerated_Size` uses 20 bits (0-1048575). +- Value 11 : `Size_Format` uses 2 bits. + `Regenerated_Size` uses 20 bits (0-1048575). `Literals_Section_Header` has 3 bytes. `Regenerated_Size = (Header[0]>>4) + (Header[1]<<4) + (Header[2]<<12)` @@ -507,7 +509,8 @@ __`Size_Format` for `Compressed_Literals_Block` and `Repeat_Stats_Literals_Block `Literals_Section_Header` has 5 bytes. Both `Compressed_Size` and `Regenerated_Size` fields follow little-endian convention. - +Note: `Compressed_Size` __includes__ the size of the Huffman Tree description if it +is present. #### `Huffman_Tree_Description` @@ -550,23 +553,24 @@ Let's presume the following Huffman tree must be described : | `Number_of_Bits` | 1 | 2 | 3 | 0 | 4 | 4 | The tree depth is 4, since its smallest element uses 4 bits. -Value `5` will not be listed, nor will values above `5`. +Value `5` will not be listed as it can be determined from the values for 0-4, +nor will values above `5` as they are all 0. Values from `0` to `4` will be listed using `Weight` instead of `Number_of_Bits`. Weight formula is : ``` Weight = Number_of_Bits ? (Max_Number_of_Bits + 1 - Number_of_Bits) : 0 ``` -It gives the following serie of weights : +It gives the following series of weights : -| `Weight` | 4 | 3 | 2 | 0 | 1 | -| -------- | --- | --- | --- | --- | --- | | literal | 0 | 1 | 2 | 3 | 4 | +| -------- | --- | --- | --- | --- | --- | +| `Weight` | 4 | 3 | 2 | 0 | 1 | The decoder will do the inverse operation : having collected weights of literals from `0` to `4`, it knows the last literal, `5`, is present with a non-zero weight. -The weight of `5` can be deducted by joining to the nearest power of 2. -Sum of `2^(Weight-1)` (excluding 0) is : +The weight of `5` can be determined by advancing to the next power of 2. +The sum of `2^(Weight-1)` (excluding 0's) is : `8 + 4 + 2 + 0 + 1 = 15`. Nearest power of 2 is 16. Therefore, `Max_Number_of_Bits = 4` and `Weight[5] = 1`. @@ -574,23 +578,39 @@ Therefore, `Max_Number_of_Bits = 4` and `Weight[5] = 1`. ##### Huffman Tree header This is a single byte value (0-255), -which tells how to decode the list of weights. +which describes how to decode the list of weights. - if `headerByte` >= 128 : this is a direct representation, where each `Weight` is written directly as a 4 bits field (0-15). + They are encoded forward, 2 weights to a byte with the first weight taking + the top four bits and the second taking the bottom four (e.g. the following + operations could be used to read the weights: + `Weight[0] = (Byte[0] >> 4), Weight[1] = (Byte[0] & 0xf)`, etc.). The full representation occupies `((Number_of_Symbols+1)/2)` bytes, meaning it uses a last full byte even if `Number_of_Symbols` is odd. `Number_of_Symbols = headerByte - 127`. Note that maximum `Number_of_Symbols` is 255-127 = 128. - A larger serie must necessarily use FSE compression. + A larger series must necessarily use FSE compression. - if `headerByte` < 128 : - the serie of weights is compressed by FSE. - The length of the FSE-compressed serie is equal to `headerByte` (0-127). + the series of weights is compressed by FSE. + The length of the FSE-compressed series is equal to `headerByte` (0-127). ##### Finite State Entropy (FSE) compression of Huffman weights -The serie of weights is compressed using FSE compression. +FSE decoding uses three operations: `Init_State`, `Decode_Symbol`, and `Update_State`. +`Init_State` reads in the initial state value from a bitstream, +`Decode_Symbol` outputs a symbol based on the current state, +and `Update_State` goes to a new state based on the current state and some number of consumed bits. + +FSE streams must be read in reverse from the order they're encoded in, +so bitstreams start at a certain offset and works backwards towards their base. + +For more on how FSE bitstreams work, see [Finite State Entropy]. + +[Finite State Entropy]:https://github.com/Cyan4973/FiniteStateEntropy/ + +The series of Huffman weights is compressed using FSE compression. It's a single bitstream with 2 interleaved states, sharing a single distribution table. @@ -598,22 +618,27 @@ To decode an FSE bitstream, it is necessary to know its compressed size. Compressed size is provided by `headerByte`. It's also necessary to know its _maximum possible_ decompressed size, which is `255`, since literal values span from `0` to `255`, -and last symbol value is not represented. +and last symbol's weight is not represented. An FSE bitstream starts by a header, describing probabilities distribution. It will create a Decoding Table. -Table must be pre-allocated, which requires to support a maximum accuracy. +The table must be pre-allocated, so a maximum accuracy must be fixed. For a list of Huffman weights, maximum accuracy is 7 bits. -FSE header is [described in relevant chapter](#fse-distribution-table--condensed-format), -and so is [FSE bitstream](#bitstream). +The FSE header format is [described in a relevant chapter](#fse-distribution-table--condensed-format), +as well as the [FSE bitstream](#bitstream). The main difference is that Huffman header compression uses 2 states, which share the same FSE distribution table. -Bitstream contains only FSE symbols (no interleaved "raw bitfields"). -The number of symbols to decode is discovered -by tracking bitStream overflow condition. -When both states have overflowed the bitstream, end is reached. +The first state (`State1`) encodes the even indexed symbols, +and the second (`State2`) encodes the odd indexes. +State1 is initialized first, and then State2, and they take turns decoding +a single symbol and updating their state. +The number of symbols to decode is determined +by tracking bitStream overflow condition: +If updating state after decoding a symbol would require more bits than +remain in the stream, it is assumed the extra bits are 0. Then, +the symbols for each of the final states are decoded and the process is complete. ##### Conversion from weights to Huffman prefix codes @@ -687,9 +712,20 @@ Consequently, a last byte of `0` is not possible. And the final-bit-flag itself is not part of the useful bitstream. Hence, the last byte contains between 0 and 7 useful bits. +For example, if the literal sequence "0145" was encoded using the prefix codes above, +it would be encoded as: +``` +00000001 01110000 +``` + +|Symbol | 5 | 4 | 1 | 0 | Padding | +|--------|------|------|----|---|---------| +|Encoding|`0000`|`0001`|`01`|`1`| `10000` | + Starting from the end, it's possible to read the bitstream in a little-endian fashion, -keeping track of already used bits. +keeping track of already used bits. Since the bitstream is encoded in reverse +order, by starting at the end the symbols can be read in forward order. Reading the last `Max_Number_of_Bits` bits, it's then possible to compare extracted value to decoding table, @@ -700,7 +736,6 @@ If a bitstream is not entirely and exactly consumed, hence reaching exactly its beginning position with _all_ bits consumed, the decoding process is considered faulty. - ### `Sequences_Section` A compressed block is a succession of _sequences_ . @@ -712,7 +747,7 @@ The offset gives the position to copy from, which can be within a previous block. When all _sequences_ are decoded, -if there is any literal left in the _literal section_, +if there is are any literals left in the _literal section_, these bytes are added at the end of the block. The `Sequences_Section` regroup all symbols required to decode commands. @@ -810,7 +845,7 @@ They define lengths from 0 to 131071 bytes. When `Compression_Mode` is `Predefined_Mode`, a predefined distribution is used for FSE compression. -Below is its definition. It uses an accuracy of 6 bits (64 states). +Its definition is below. It uses an accuracy of 6 bits (64 states). ``` short literalsLength_defaultDistribution[36] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, @@ -835,12 +870,12 @@ They define lengths from 3 to 131074 bytes. | `Match_Length_Code` | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | | ------------------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | -| `Baseline` | 67 | 83 | 99 | 131 | 258 | 514 | 1026 | 2050 | +| `Baseline` | 67 | 83 | 99 | 131 | 259 | 515 | 1027 | 2051 | | `Number_of_Bits` | 4 | 4 | 5 | 7 | 8 | 9 | 10 | 11 | | `Match_Length_Code` | 48 | 49 | 50 | 51 | 52 | | ------------------- | ---- | ---- | ---- | ---- | ---- | -| `Baseline` | 4098 | 8194 |16486 |32770 |65538 | +| `Baseline` | 4099 | 8195 |16387 |32771 |65539 | | `Number_of_Bits` | 12 | 13 | 14 | 15 | 16 | ##### Default distribution for match length codes @@ -848,7 +883,7 @@ They define lengths from 3 to 131074 bytes. When `Compression_Mode` is defined as `Predefined_Mode`, a predefined distribution is used for FSE compression. -Below is its definition. It uses an accuracy of 6 bits (64 states). +Its definition is below. It uses an accuracy of 6 bits (64 states). ``` short matchLengths_defaultDistribution[53] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, @@ -908,7 +943,7 @@ When present, they are in this order : - Match Lengths The content to decode depends on their respective encoding mode : -- `Predefined_Mode` : no content. Use predefined distribution table. +- `Predefined_Mode` : no content. Use the predefined distribution table. - `RLE_Mode` : 1 byte. This is the only code to use across the whole compressed block. - `FSE_Compressed_Mode` : A distribution table is present. - `Repeat_Mode` : no content. Re-use distribution from previous compressed block. @@ -936,12 +971,12 @@ It depends on : __example__ : Presuming an `Accuracy_Log` of 8, and presuming 100 probabilities points have already been distributed, - the decoder may read any value from `0` to `255 - 100 + 1 == 156` (included). + the decoder may read any value from `0` to `255 - 100 + 1 == 156` (inclusive). Therefore, it must read `log2sup(156) == 8` bits. - Value decoded : small values use 1 less bit : __example__ : - Presuming values from 0 to 156 (included) are possible, + Presuming values from 0 to 156 (inclusive) are possible, 255-156 = 99 values are remaining in an 8-bits field. They are used this way : first 99 values (hence from 0 to 98) use only 7 bits, @@ -967,7 +1002,7 @@ For the purpose of calculating cumulated distribution, it counts as one. [next paragraph]:#fse-decoding--from-normalized-distribution-to-decoding-tables -When a symbol has a probability of `zero`, +When a symbol has a __probability__ of `zero`, it is followed by a 2-bits repeat flag. This repeat flag tells how many probabilities of zeroes follow the current one. It provides a number ranging from 0 to 3. @@ -1012,6 +1047,9 @@ position &= tableSize-1; A position is skipped if already occupied, typically by a "less than 1" probability symbol. +`position` does not reset between symbols, it simply iterates through +each position in the table, switching to the next symbol when enough +states have been allocated to the current one. The result is a list of state values. Each state will decode the current symbol. @@ -1043,7 +1081,7 @@ Numbering starts from higher states using less bits. | `Baseline` | 32 | 64 | 96 | 0 | 16 | | range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 | -Next state is determined from current state +The next state is determined from current state by reading the required `Number_of_Bits`, and adding the specified `Baseline`. @@ -1093,15 +1131,16 @@ and then for `Literals_Length`. It starts by inserting the number of literals defined by `Literals_Length`, then continue by copying `Match_Length` bytes from `currentPos - Offset`. -The next operation is to update states. -Using rules pre-calculated in the decoding tables, +If it is not the last sequence in the block, +the next operation is to update states. +Using the rules pre-calculated in the decoding tables, `Literals_Length_State` is updated, followed by `Match_Length_State`, and then `Offset_State`. This operation will be repeated `Number_of_Sequences` times. At the end, the bitstream shall be entirely consumed, -otherwise bitstream is considered corrupted. +otherwise the bitstream is considered corrupted. [Symbol Decoding]:#the-codes-for-literals-lengths-match-lengths-and-offsets @@ -1111,13 +1150,13 @@ As seen in [Offset Codes], the first 3 values define a repeated offset and we wi They are sorted in recency order, with `Repeated_Offset1` meaning "most recent one". There is an exception though, when current sequence's literals length is `0`. -In which case, repeated offsets are "pushed by one", +In this case, repeated offsets are shifted by one, so `Repeated_Offset1` becomes `Repeated_Offset2`, `Repeated_Offset2` becomes `Repeated_Offset3`, and `Repeated_Offset3` becomes `Repeated_Offset1 - 1_byte`. -On first block, offset history is populated by the following values : 1, 4 and 8 (in order). +In the first block, the offset history is populated with the following values : 1, 4 and 8 (in order). -Then each block receives its start value from previous compressed block. +Then each block gets its starting offset history from the ending values of the most recent compressed block. Note that non-compressed blocks are skipped, they do not contribute to offset history. @@ -1125,11 +1164,12 @@ they do not contribute to offset history. ###### Offset updates rules -New offset take the lead in offset history, -up to its previous place if it was already present. +The newest offset takes the lead in offset history, +shifting others back (up to its previous place if it was already present). -It means that when `Repeated_Offset1` (most recent) is used, history is unmodified. +This means that when `Repeated_Offset1` (most recent) is used, history is unmodified. When `Repeated_Offset2` is used, it's swapped with `Repeated_Offset1`. +If any other offset is used, it becomes `Repeated_Offset1` and the rest are shift back by one. Dictionary format @@ -1137,6 +1177,9 @@ Dictionary format `zstd` is compatible with "raw content" dictionaries, free of any format restriction, except that they must be at least 8 bytes. +These dictionaries function as if they were just the `Content` block of a formatted +dictionary. + But dictionaries created by `zstd --train` follow a format, described here. __Pre-requisites__ : a dictionary has a size, @@ -1160,16 +1203,17 @@ _Reserved ranges :_ - low range : 1 - 32767 - high range : >= (2^31) -__`Entropy_Tables`__ : following the same format as a [compressed blocks]. +__`Entropy_Tables`__ : following the same format as the tables in [compressed blocks]. They are stored in following order : Huffman tables for literals, FSE table for offsets, FSE table for match lengths, and FSE table for literals lengths. - It's finally followed by 3 offset values, populating recent offsets, + It's finally followed by 3 offset values, populating recent offsets (instead of using `{1,4,8}`), stored in order, 4-bytes little-endian each, for a total of 12 bytes. Each recent offset must have a value < dictionary size. __`Content`__ : The rest of the dictionary is its content. - The content act as a "past" in front of data to compress or decompress. + The content act as a "past" in front of data to compress or decompress, + so it can be referenced in sequence commands. [compressed blocks]: #the-format-of-compressed_block @@ -1358,6 +1402,7 @@ to crosscheck that an implementation implements the decoding table generation al Version changes --------------- +- 0.2.3 : clarified several details, by Sean Purcell - 0.2.2 : added predefined codes, by Johannes Rudolph - 0.2.1 : clarify field names, by Przemyslaw Skibinski - 0.2.0 : numerous format adjustments for zstd v0.8 diff --git a/lib/BUCK b/lib/BUCK new file mode 100644 index 000000000..6812c1b1e --- /dev/null +++ b/lib/BUCK @@ -0,0 +1,186 @@ +cxx_library( + name='zstd', + header_namespace='', + visibility=['PUBLIC'], + deps=[ + ':common', + ':compress', + ':decompress', + ':deprecated', + ], +) + +cxx_library( + name='compress', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('compress', 'zstdmt_compress.h'), + ]), + headers=subdir_glob([ + ('compress', 'zstd_opt.h'), + ]), + srcs=[ + 'compress/zstd_compress.c', + 'compress/zstdmt_compress.c', + ], + deps=[':common'], +) + +cxx_library( + name='decompress', + header_namespace='', + visibility=['PUBLIC'], + srcs=['decompress/zstd_decompress.c'], + deps=[ + ':common', + ':legacy', + ], +) + +cxx_library( + name='deprecated', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('decprecated', '*.h'), + ]), + srcs=glob(['deprecated/*.c']), + deps=[':common'], +) + +cxx_library( + name='legacy', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('legacy', '*.h'), + ]), + srcs=glob(['legacy/*.c']), + deps=[':common'], +) + +cxx_library( + name='zdict', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('dictBuilder', 'zdict.h'), + ]), + headers=subdir_glob([ + ('dictBuilder', 'divsufsort.h'), + ]), + srcs=glob(['dictBuilder/*.c']), + deps=[':common'], +) + +cxx_library( + name='bitstream', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'bitstream.h'), + ]), +) + +cxx_library( + name='entropy', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'fse.h'), + ('common', 'huf.h'), + ]), + srcs=[ + 'common/entropy_common.c', + 'common/fse_decompress.c', + 'compress/fse_compress.c', + 'compress/huf_compress.c', + 'decompress/huf_decompress.c', + ], + deps=[ + ':bitstream', + ':errors', + ':mem', + ], +) + +cxx_library( + name='errors', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'error_private.h'), + ('common', 'zstd_errors.h'), + ]), + srcs=['common/error_private.c'], +) + +cxx_library( + name='mem', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'mem.h'), + ]), +) + +cxx_library( + name='pool', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'pool.h'), + ]), + srcs=['common/pool.c'], + deps=[':threading'], +) + +cxx_library( + name='threading', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'threading.h'), + ]), + srcs=['common/threading.c'], +) + +cxx_library( + name='xxhash', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'xxhash.h'), + ]), + srcs=['common/xxhash.c'], +) + +cxx_library( + name='zstd_common', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('', 'zstd.h'), + ('common', 'zstd_internal.h'), + ]), + srcs=['common/zstd_common.c'], + deps=[ + ':errors', + ':mem', + ], +) + +cxx_library( + name='common', + deps=[ + ':bitstream', + ':entropy', + ':errors', + ':mem', + ':pool', + ':threading', + ':xxhash', + ':zstd_common', + ] +) diff --git a/lib/common/pool.c b/lib/common/pool.c index 693217f24..e439fe1b0 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -21,7 +21,7 @@ #ifdef ZSTD_MULTITHREAD -#include /* pthread adaptation */ +#include "threading.h" /* pthread adaptation */ /* A job is a function and an opaque argument */ typedef struct POOL_job_s { diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index bd011319c..e9509070d 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -3016,12 +3016,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) { U32 add = *dumps++; if (add < 255) litLength += add; - else - { + else { litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ dumps += 3; } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ + if (dumps >= de) { dumps = de-1; litLength = MaxLL+255; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } /* Offset */ @@ -3043,16 +3042,14 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) /* MatchLength */ matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); - if (matchLength == MaxML) - { + if (matchLength == MaxML) { U32 add = *dumps++; if (add < 255) matchLength += add; - else - { + else { matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ dumps += 3; } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ + if (dumps >= de) { dumps = de-1; matchLength = MaxML+255; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } matchLength += MINMATCH; @@ -3116,8 +3113,7 @@ static size_t ZSTD_execSequence(BYTE* op, /* Requirement: op <= oend_8 */ /* match within prefix */ - if (sequence.offset < 8) - { + if (sequence.offset < 8) { /* close range match, overlap */ const int sub2 = dec64table[sequence.offset]; op[0] = match[0]; @@ -3127,9 +3123,7 @@ static size_t ZSTD_execSequence(BYTE* op, match += dec32table[sequence.offset]; ZSTD_copy4(op+4, match); match -= sub2; - } - else - { + } else { ZSTD_copy8(op, match); } op += 8; match += 8; diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index 3dd740e5f..43943d81a 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -3230,7 +3230,7 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState) if (litLength&1) litLength>>=1, dumps += 3; else litLength = (U16)(litLength)>>1, dumps += 2; } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ + if (dumps >= de) { dumps = de-1; litLength = MaxLL+255; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } /* Offset */ @@ -3263,7 +3263,7 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState) if (matchLength&1) matchLength>>=1, dumps += 3; else matchLength = (U16)(matchLength)>>1, dumps += 2; } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ + if (dumps >= de) { dumps = de-1; matchLength = MaxML+255; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } matchLength += MINMATCH; diff --git a/programs/BUCK b/programs/BUCK new file mode 100644 index 000000000..069403042 --- /dev/null +++ b/programs/BUCK @@ -0,0 +1,63 @@ +cxx_binary( + name='zstd', + headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']), + srcs=glob(['*.c'], excludes=['datagen.c']), + deps=[ + ':datagen', + ':util', + '//lib:zstd', + '//lib:zdict', + '//lib:mem', + '//lib:xxhash', + ], +) + +cxx_binary( + name='zstdmt', + headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']), + srcs=glob(['*.c'], excludes=['datagen.c']), + deps=[ + ':datagen', + ':util', + '//lib:zstd', + '//lib:zdict', + '//lib:mem', + '//lib:xxhash', + ], + preprocessor_flags=['-DZSTD_MULTITHREAD'], + linker_flags=['-lpthread'], +) + +cxx_binary( + name='gzstd', + headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']), + srcs=glob(['*.c'], excludes=['datagen.c']), + deps=[ + ':datagen', + ':util', + '//lib:zstd', + '//lib:zdict', + '//lib:mem', + '//lib:xxhash', + ], + preprocessor_flags=['-DZSTD_GZDECOMPRESS'], + linker_flags=['-lz'], +) + +cxx_library( + name='datagen', + visibility=['PUBLIC'], + header_namespace='', + exported_headers=['datagen.h'], + srcs=['datagen.c'], + deps=['//lib:mem'], +) + + +cxx_library( + name='util', + visibility=['PUBLIC'], + header_namespace='', + exported_headers=['util.h', 'platform.h'], + deps=['//lib:mem'], +) diff --git a/programs/bench.c b/programs/bench.c index 1ca40d6b9..dcb23b1f2 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -40,6 +40,7 @@ #include "zstd.h" #include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" +#include "zstdmt_compress.h" /* ************************************* @@ -148,8 +149,6 @@ typedef struct { #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) -#include "compress/zstdmt_compress.h" - static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* displayName, int cLevel, const size_t* fileSizes, U32 nbFiles, diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index bef8734c7..0fdb1ee12 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -691,10 +691,13 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize); + size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize); outBuff.size = outBuff.pos + adjustedDstSize; - inBuff.size = inBuff.pos + randomCSrcSize; + inBuff.size = inBuff.pos + adjustedCSrcSize; { size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff); if (ZSTD_isError(decompressError)) break; /* error correctly detected */ + /* No forward progress possible */ + if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break; } } } } DISPLAY("\r%u fuzzer tests completed \n", testNb); @@ -933,10 +936,13 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp size_t const randomCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); size_t const adjustedDstSize = MIN(dstBufferSize - outBuff.pos, randomDstSize); + size_t const adjustedCSrcSize = MIN(cSize - inBuff.pos, randomCSrcSize); outBuff.size = outBuff.pos + adjustedDstSize; - inBuff.size = inBuff.pos + randomCSrcSize; + inBuff.size = inBuff.pos + adjustedCSrcSize; { size_t const decompressError = ZSTD_decompressStream(zd, &outBuff, &inBuff); if (ZSTD_isError(decompressError)) break; /* error correctly detected */ + /* No forward progress possible */ + if (outBuff.pos < outBuff.size && inBuff.pos == cSize) break; } } } } DISPLAY("\r%u fuzzer tests completed \n", testNb); diff --git a/zlibWrapper/BUCK b/zlibWrapper/BUCK new file mode 100644 index 000000000..a3b74ac3f --- /dev/null +++ b/zlibWrapper/BUCK @@ -0,0 +1,22 @@ +cxx_library( + name='zlib_wrapper', + visibility=['PUBLIC'], + exported_linker_flags=['-lz'], + header_namespace='', + exported_headers=['zstd_zlibwrapper.h'], + headers=[ + 'gzcompatibility.h', + 'gzguts.h', + ], + srcs=glob(['*.c']), + deps=[ + '//lib:zstd', + '//lib:zstd_common', + ] +) + +cxx_binary( + name='minigzip', + srcs=['examples/minigzip.c'], + deps=[':zlib_wrapper'], +)