diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml index ac34c7165..f75f61f8e 100644 --- a/.github/workflows/dev-long-tests.yml +++ b/.github/workflows/dev-long-tests.yml @@ -13,7 +13,7 @@ jobs: make-all: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: make all run: make all @@ -24,7 +24,7 @@ jobs: DEVNULLRIGHTS: 1 READFROMBLOCKDEVICE: 1 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: make test run: make test @@ -32,28 +32,28 @@ jobs: make-test-osx: runs-on: macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: OS-X test run: make test # make -c lib all doesn't work because of the fact that it's not a tty no-intrinsics-fuzztest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: no intrinsics fuzztest run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest tsan-zstreamtest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: thread sanitizer zstreamtest run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream ubsan-zstreamtest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: undefined behavior sanitizer zstreamtest run: CC=clang make uasan-test-zstream @@ -61,7 +61,7 @@ jobs: tsan-fuzztest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: thread sanitizer fuzztest run: CC=clang make tsan-fuzztest @@ -69,7 +69,7 @@ jobs: gcc-8-asan-ubsan-testzstd: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: gcc-8 + ASan + UBSan + Test Zstd # See https://askubuntu.com/a/1428822 run: | @@ -81,14 +81,14 @@ jobs: clang-asan-ubsan-testzstd: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: clang + ASan + UBSan + Test Zstd run: CC=clang make -j uasan-test-zstd # msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v140 # /t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}} + # This tests that we don't accidently grow the size too much. + # If the size grows intentionally, you can raise these numbers. + # But we do need to think about binary size, since it is a concern. + libzstd-size: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 + - name: libzstd size test + run: | + make clean && make -j -C lib libzstd && ./tests/check_size.py lib/libzstd.so 1100000 + make clean && make -j -C lib libzstd ZSTD_LIB_COMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 && ./tests/check_size.py lib/libzstd.so 400000 + make clean && make -j -C lib libzstd ZSTD_LIB_MINIFY=1 && ./tests/check_size.py lib/libzstd.so 300000 + make clean && make -j -C lib libzstd ZSTD_LIB_MINIFY=1 ZSTD_LIB_COMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 && ./tests/check_size.py lib/libzstd.so 80000 + minimal-decompressor-macros: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: minimal decompressor macros run: | make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror" @@ -254,7 +313,7 @@ jobs: dynamic-bmi2: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: dynamic bmi2 tests run: | make clean && make -j check MOREFLAGS="-O0 -Werror -mbmi2" @@ -266,7 +325,7 @@ jobs: test-variants: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: make all variants & validate run: | make -j -C programs allVariants MOREFLAGS=-O0 @@ -292,7 +351,7 @@ jobs: XCC: ${{ matrix.xcc }} XEMU: ${{ matrix.xemu }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: apt update & install run: | sudo apt-get update @@ -345,7 +404,7 @@ jobs: { compiler: clang, platform: x64, script: "CFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"}, ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: Mingw short test run: | ECHO "Building ${{matrix.compiler}} ${{matrix.platform}}" @@ -378,9 +437,9 @@ jobs: platform: [x64, Win32] configuration: [Release] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: Add MSBuild to PATH - uses: microsoft/setup-msbuild@v1.1.3 + uses: microsoft/setup-msbuild@34cfbaee7f672c76950673338facd8a73f637506 # tag=v1.1.3 - name: Build and run tests working-directory: ${{env.GITHUB_WORKSPACE}} env: @@ -398,7 +457,7 @@ jobs: intel-cet-compatibility: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: Build Zstd run: | make -j zstd V=1 @@ -419,7 +478,7 @@ jobs: container: image: debian:testing steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: Install dependencies run: | apt -y update @@ -431,18 +490,21 @@ jobs: cc -Wall -Wextra -Wpedantic -Werror -o simple examples/simple_compression.c $(pkg-config --cflags --libs libzstd) ./simple LICENSE + versions-compatibility: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 + - name: Versions Compatibility Test + run: | + make -C tests versionsTest -# This test currently fails on Github Actions specifically. -# Possible reason : TTY emulation. -# Note that the same test works fine locally and on travisCI. -# This will have to be fixed before transferring the test to GA. -# versions-compatibility: -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v3 -# - name: Versions Compatibility Test -# run: | -# make -C tests versionsTest + clangbuild: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 + - name: make clangbuild + run: | + make clangbuild # For reference : icc tests @@ -463,7 +525,7 @@ jobs: # sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" # sudo apt-get update # sudo apt-get install -y intel-basekit intel-hpckit -# - uses: actions/checkout@v3 +# - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 # - name: make check # run: | # make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check diff --git a/.github/workflows/publish-release-artifacts.yml b/.github/workflows/publish-release-artifacts.yml index 4a70fb8c6..0903f0d3c 100644 --- a/.github/workflows/publish-release-artifacts.yml +++ b/.github/workflows/publish-release-artifacts.yml @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 - name: Archive env: diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 1c19b48b2..afdf4d63f 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -27,12 +27,12 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # tag=v3.0.0 + uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3 with: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@99c53751e09b9529366343771cc321ec74e9bd3d # tag=v2.0.6 + uses: ossf/scorecard-action@937ffa90d79c7d720498178154ad4c7ba1e4ad8c # tag=v2.1.0 with: results_file: results.sarif results_format: sarif @@ -51,7 +51,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@6673cd052c4cd6fcf4b4e6e60ea986c889389535 # tag=v3.0.0 + uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # tag=v3.1.1 with: name: SARIF file path: results.sarif @@ -59,6 +59,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # tag=v1.0.26 + uses: github/codeql-action/upload-sarif@959cbb7472c4d4ad70cdfe6f4976053fe48ab394 # tag=v2.1.37 with: sarif_file: results.sarif diff --git a/LICENSE b/LICENSE index a793a8028..75800288c 100644 --- a/LICENSE +++ b/LICENSE @@ -2,7 +2,7 @@ BSD License For Zstandard software -Copyright (c) 2016-present, Facebook, Inc. All rights reserved. +Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -14,9 +14,9 @@ are permitted provided that the following conditions are met: this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name Facebook nor the names of its contributors may be used to - endorse or promote products derived from this software without specific - prior written permission. + * Neither the name Facebook, nor Meta, nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED diff --git a/Makefile b/Makefile index 429c90ff1..d87fc76eb 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -123,6 +123,7 @@ contrib: lib $(MAKE) -C contrib/seekable_format/examples all $(MAKE) -C contrib/seekable_format/tests test $(MAKE) -C contrib/largeNbDicts all + $(MAKE) -C contrib/externalMatchfinder all cd build/single_file_libs/ ; ./build_decoder_test.sh cd build/single_file_libs/ ; ./build_library_test.sh @@ -142,6 +143,7 @@ clean: $(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID) $(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID) $(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID) + $(Q)$(MAKE) -C contrib/externalMatchfinder $@ > $(VOID) $(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp* $(Q)$(RM) -r lz4 @echo Cleaning completed diff --git a/build/VS2010/libzstd-dll/libzstd-dll.rc b/build/VS2010/libzstd-dll/libzstd-dll.rc index ee9f56280..13e8746ff 100644 --- a/build/VS2010/libzstd-dll/libzstd-dll.rc +++ b/build/VS2010/libzstd-dll/libzstd-dll.rc @@ -32,11 +32,11 @@ BEGIN BEGIN BLOCK "040904B0" BEGIN - VALUE "CompanyName", "Yann Collet, Facebook, Inc." + VALUE "CompanyName", "Meta Platforms, Inc." VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm" VALUE "FileVersion", ZSTD_VERSION_STRING VALUE "InternalName", "libzstd.dll" - VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc." + VALUE "LegalCopyright", "Copyright (c) Meta Platforms, Inc. and affiliates." VALUE "OriginalFilename", "libzstd.dll" VALUE "ProductName", "Zstandard" VALUE "ProductVersion", ZSTD_VERSION_STRING diff --git a/build/VS2010/zstd/zstd.rc b/build/VS2010/zstd/zstd.rc index f5e404730..a2118c2df 100644 --- a/build/VS2010/zstd/zstd.rc +++ b/build/VS2010/zstd/zstd.rc @@ -32,11 +32,11 @@ BEGIN BEGIN BLOCK "040904B0" BEGIN - VALUE "CompanyName", "Yann Collet, Facebook, Inc." + VALUE "CompanyName", "Meta Platforms, Inc." VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm" VALUE "FileVersion", ZSTD_VERSION_STRING VALUE "InternalName", "zstd.exe" - VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc." + VALUE "LegalCopyright", "Copyright (c) Meta Platforms, Inc. and affiliates." VALUE "OriginalFilename", "zstd.exe" VALUE "ProductName", "Zstandard" VALUE "ProductVersion", ZSTD_VERSION_STRING diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt index f1eac9ada..0bffc87d9 100644 --- a/build/cmake/CMakeLists.txt +++ b/build/cmake/CMakeLists.txt @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2016-present, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -8,18 +8,18 @@ # ################################################################ cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR) - -# As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies. -# Set and use the newest cmake policies that are validated to work -set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3") + +# As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies. +# Set and use the newest cmake policies that are validated to work +set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3") set(ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION "13") #Policies never changed at PATCH level if("${CMAKE_MAJOR_VERSION}" LESS 3) - set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}") -elseif( "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND + set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}") +elseif( "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND "${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}") - set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}") -else() - set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0") + set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}") +else() + set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0") endif() cmake_policy(VERSION ${ZSTD_CMAKE_POLICY_VERSION}) @@ -40,11 +40,13 @@ if( CMAKE_MAJOR_VERSION LESS 3 ) set(PROJECT_VERSION_PATCH ${zstd_VERSION_PATCH}) set(PROJECT_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}") enable_language(C) # Main library is in C + enable_language(ASM) # And ASM enable_language(CXX) # Testing contributed code also utilizes CXX else() project(zstd VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}" LANGUAGES C # Main library is in C + ASM # And ASM CXX # Testing contributed code also utilizes CXX ) endif() diff --git a/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake b/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake index 8d04458c3..0265349fb 100644 --- a/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake +++ b/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake @@ -1,7 +1,8 @@ include(CheckCXXCompilerFlag) include(CheckCCompilerFlag) +include(CheckLinkerFlag) -function(EnableCompilerFlag _flag _C _CXX) +function(EnableCompilerFlag _flag _C _CXX _LD) string(REGEX REPLACE "\\+" "PLUS" varname "${_flag}") string(REGEX REPLACE "[^A-Za-z0-9]+" "_" varname "${varname}") string(REGEX REPLACE "^_+" "" varname "${varname}") @@ -18,6 +19,13 @@ function(EnableCompilerFlag _flag _C _CXX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_flag}" PARENT_SCOPE) endif () endif () + if (_LD) + CHECK_LINKER_FLAG(C ${_flag} LD_FLAG_${varname}) + if (LD_FLAG_${varname}) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_flag}" PARENT_SCOPE) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${_flag}" PARENT_SCOPE) + endif () + endif () endfunction() macro(ADD_ZSTD_COMPILATION_FLAGS) @@ -30,33 +38,39 @@ macro(ADD_ZSTD_COMPILATION_FLAGS) # EnableCompilerFlag("-std=c99" true false) # Set C compiation to c99 standard if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND MSVC) # clang-cl normally maps -Wall to -Weverything. - EnableCompilerFlag("/clang:-Wall" true true) + EnableCompilerFlag("/clang:-Wall" true true false) else () - EnableCompilerFlag("-Wall" true true) + EnableCompilerFlag("-Wall" true true false) endif () - EnableCompilerFlag("-Wextra" true true) - EnableCompilerFlag("-Wundef" true true) - EnableCompilerFlag("-Wshadow" true true) - EnableCompilerFlag("-Wcast-align" true true) - EnableCompilerFlag("-Wcast-qual" true true) - EnableCompilerFlag("-Wstrict-prototypes" true false) + EnableCompilerFlag("-Wextra" true true false) + EnableCompilerFlag("-Wundef" true true false) + EnableCompilerFlag("-Wshadow" true true false) + EnableCompilerFlag("-Wcast-align" true true false) + EnableCompilerFlag("-Wcast-qual" true true false) + EnableCompilerFlag("-Wstrict-prototypes" true false false) # Enable asserts in Debug mode if (CMAKE_BUILD_TYPE MATCHES "Debug") - EnableCompilerFlag("-DDEBUGLEVEL=1" true true) + EnableCompilerFlag("-DDEBUGLEVEL=1" true true false) endif () + # Add noexecstack flags + # LDFLAGS + EnableCompilerFlag("-z noexecstack" false false true) + # CFLAGS & CXXFLAGS + EnableCompilerFlag("-Qunused-arguments" true true false) + EnableCompilerFlag("-Wa,--noexecstack" true true false) elseif (MSVC) # Add specific compilation flags for Windows Visual set(ACTIVATE_MULTITHREADED_COMPILATION "ON" CACHE BOOL "activate multi-threaded compilation (/MP flag)") if (CMAKE_GENERATOR MATCHES "Visual Studio" AND ACTIVATE_MULTITHREADED_COMPILATION) - EnableCompilerFlag("/MP" true true) + EnableCompilerFlag("/MP" true true false) endif () # UNICODE SUPPORT - EnableCompilerFlag("/D_UNICODE" true true) - EnableCompilerFlag("/DUNICODE" true true) + EnableCompilerFlag("/D_UNICODE" true true false) + EnableCompilerFlag("/DUNICODE" true true false) # Enable asserts in Debug mode if (CMAKE_BUILD_TYPE MATCHES "Debug") - EnableCompilerFlag("/DDEBUGLEVEL=1" true true) + EnableCompilerFlag("/DDEBUGLEVEL=1" true true false) endif () endif () diff --git a/build/cmake/contrib/CMakeLists.txt b/build/cmake/contrib/CMakeLists.txt index f7631d08c..8df2a17b3 100644 --- a/build/cmake/contrib/CMakeLists.txt +++ b/build/cmake/contrib/CMakeLists.txt @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2016-present, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/build/cmake/contrib/gen_html/CMakeLists.txt b/build/cmake/contrib/gen_html/CMakeLists.txt index 8fdd61131..d1ff6c64b 100644 --- a/build/cmake/contrib/gen_html/CMakeLists.txt +++ b/build/cmake/contrib/gen_html/CMakeLists.txt @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2015-present, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/build/cmake/contrib/pzstd/CMakeLists.txt b/build/cmake/contrib/pzstd/CMakeLists.txt index 27af86c88..f7098fa0f 100644 --- a/build/cmake/contrib/pzstd/CMakeLists.txt +++ b/build/cmake/contrib/pzstd/CMakeLists.txt @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2016-present, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index cf7af0f8c..30349586b 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2015-present, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -77,6 +77,12 @@ if (MSVC) set(PlatformDependResources ${MSVC_RESOURCE_DIR}/libzstd-dll.rc) endif () +# Explicitly set the language to C for all files, including ASM files. +# Our assembly expects to be compiled by a C compiler, and is only enabled for +# __GNUC__ compatible compilers. Otherwise all the ASM code is disabled by +# macros. +set_source_files_properties(${Sources} PROPERTIES LANGUAGE C) + # Split project to static and shared libraries build set(library_targets) if (ZSTD_BUILD_SHARED) diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt index 28b1e1d16..58d998e42 100644 --- a/build/cmake/programs/CMakeLists.txt +++ b/build/cmake/programs/CMakeLists.txt @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2015-present, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt index 02444c803..250f0508f 100644 --- a/build/cmake/tests/CMakeLists.txt +++ b/build/cmake/tests/CMakeLists.txt @@ -1,6 +1,6 @@ # ################################################################ # zstd - Makefile -# Copyright (C) Yann Collet 2014-present +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # BSD license @@ -81,7 +81,7 @@ add_test(NAME fuzzer COMMAND fuzzer ${ZSTD_FUZZER_FLAGS}) # # zstreamtest # -add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c) +add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c ${TESTS_DIR}/external_matchfinder.c) if (NOT MSVC) target_compile_options(zstreamtest PRIVATE "-Wno-deprecated-declarations") endif() @@ -96,13 +96,14 @@ add_test(NAME zstreamtest COMMAND zstreamtest ${ZSTD_ZSTREAM_FLAGS}) AddTestFlagsOption(ZSTD_PLAYTESTS_FLAGS "$ENV{PLAYTESTS_FLAGS}" "Semicolon-separated list of flags to pass to the playTests.sh test") add_test(NAME playTests COMMAND sh -c "\"${TESTS_DIR}/playTests.sh\" ${ZSTD_PLAYTESTS_FLAGS}") -if (ZSTD_BUILD_PROGRAMS) +find_program(UNAME uname) # Run script only in unix shell environments +if (ZSTD_BUILD_PROGRAMS AND UNAME) set_property(TEST playTests APPEND PROPERTY ENVIRONMENT "ZSTD_BIN=$" "DATAGEN_BIN=$" ) else() - message(STATUS "Disabling playTests.sh test because ZSTD_BUILD_PROGRAMS is not enabled") + message(STATUS "Disabling playTests.sh test because requirements not met") set_tests_properties(playTests PROPERTIES DISABLED YES) endif() diff --git a/build/meson/tests/meson.build b/build/meson/tests/meson.build index 22f43209a..e70b73432 100644 --- a/build/meson/tests/meson.build +++ b/build/meson/tests/meson.build @@ -21,7 +21,6 @@ FUZZER_FLAGS = ['--no-big-tests'] FUZZERTEST = '-T200s' ZSTREAM_TESTTIME = '-T90s' DECODECORPUS_TESTTIME = '-T30' -ZSTDRTTEST = ['--test-large-data'] # ============================================================================= # Executables @@ -66,8 +65,10 @@ fuzzer = executable('fuzzer', dependencies: [ testcommon_dep, thread_dep ], install: false) -zstreamtest_sources = [join_paths(zstd_rootdir, 'tests/seqgen.c'), - join_paths(zstd_rootdir, 'tests/zstreamtest.c')] +zstreamtest_sources = [ + join_paths(zstd_rootdir, 'tests/seqgen.c'), + join_paths(zstd_rootdir, 'tests/zstreamtest.c'), + join_paths(zstd_rootdir, 'tests/external_matchfinder.c')] zstreamtest = executable('zstreamtest', zstreamtest_sources, include_directories: test_includes, @@ -134,24 +135,38 @@ checkTag = executable('checkTag', # ============================================================================= if tests_supported_oses.contains(host_machine_os) - valgrind_prog = find_program('valgrind', ['/usr/bin/valgrind'], required: true) + valgrind_prog = find_program('valgrind', ['/usr/bin/valgrind'], required: false) valgrindTest_py = files('valgrindTest.py') - test('valgrindTest', - valgrindTest_py, - args: [valgrind_prog.path(), zstd, datagen, fuzzer, fullbench], - depends: [zstd, datagen, fuzzer, fullbench], - timeout: 600) # Timeout should work on HDD drive + if valgrind_prog.found() + test('valgrindTest', + valgrindTest_py, + args: [valgrind_prog.path(), zstd, datagen, fuzzer, fullbench], + depends: [zstd, datagen, fuzzer, fullbench], + timeout: 600) # Timeout should work on HDD drive + endif endif if host_machine_os != os_windows playTests_sh = find_program(join_paths(zstd_rootdir, 'tests/playTests.sh'), required: true) - test('test-zstd', - playTests_sh, - args: ZSTDRTTEST, - env: ['ZSTD_BIN=' + zstd.full_path(), 'DATAGEN_BIN=./datagen'], - depends: [datagen], - workdir: meson.current_build_dir(), - timeout: 2800) # Timeout should work on HDD drive + + # add slow tests only if the meson version is new enough to support + # test setups with default-excluded suites + if meson.version().version_compare('>=0.57.0') + matrix = {'fast': [], 'slow': ['--test-large-data']} + else + matrix = {'fast': []} + endif + + foreach suite, opt: matrix + test('test-zstd-'+suite, + playTests_sh, + args: opt, + env: ['ZSTD_BIN=' + zstd.full_path(), 'DATAGEN_BIN=./datagen'], + depends: [datagen], + suite: suite, + workdir: meson.current_build_dir(), + timeout: 2800) # Timeout should work on HDD drive + endforeach endif test('test-fullbench-1', @@ -190,3 +205,11 @@ test('test-decodecorpus', args: ['-t', DECODECORPUS_TESTTIME], timeout: 60) test('test-poolTests', poolTests) # should be fast + +if meson.version().version_compare('>=0.57.0') + add_test_setup('fast', + is_default: true, + exclude_suites: ['slow']) + add_test_setup('slow', + exclude_suites: ['fast']) +endif diff --git a/build/single_file_libs/zstd-in.c b/build/single_file_libs/zstd-in.c index 59c6b5f19..e6fca9e4a 100644 --- a/build/single_file_libs/zstd-in.c +++ b/build/single_file_libs/zstd-in.c @@ -8,7 +8,7 @@ * \endcode */ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/build/single_file_libs/zstddeclib-in.c b/build/single_file_libs/zstddeclib-in.c index 5a58589c9..8d9c1f54b 100644 --- a/build/single_file_libs/zstddeclib-in.c +++ b/build/single_file_libs/zstddeclib-in.c @@ -8,7 +8,7 @@ * \endcode */ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/diagnose_corruption/Makefile b/contrib/diagnose_corruption/Makefile index a21a00212..ecc9e6395 100644 --- a/contrib/diagnose_corruption/Makefile +++ b/contrib/diagnose_corruption/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2019-present, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/diagnose_corruption/check_flipped_bits.c b/contrib/diagnose_corruption/check_flipped_bits.c index cc40ab84b..09ddd4674 100644 --- a/contrib/diagnose_corruption/check_flipped_bits.c +++ b/contrib/diagnose_corruption/check_flipped_bits.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/externalMatchfinder/.gitignore b/contrib/externalMatchfinder/.gitignore new file mode 100644 index 000000000..46357ef58 --- /dev/null +++ b/contrib/externalMatchfinder/.gitignore @@ -0,0 +1,2 @@ +# build artifacts +externalMatchfinder diff --git a/contrib/externalMatchfinder/Makefile b/contrib/externalMatchfinder/Makefile new file mode 100644 index 000000000..2baa558cb --- /dev/null +++ b/contrib/externalMatchfinder/Makefile @@ -0,0 +1,40 @@ +# ################################################################ +# Copyright (c) Yann Collet, Meta Platforms, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +PROGDIR = ../../programs +LIBDIR = ../../lib + +LIBZSTD = $(LIBDIR)/libzstd.a + +CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/compress -I$(LIBDIR)/common + +CFLAGS ?= -O3 +CFLAGS += -std=gnu99 +DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ + -Wstrict-aliasing=1 -Wswitch-enum \ + -Wstrict-prototypes -Wundef -Wpointer-arith \ + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ + -Wredundant-decls +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) + +default: externalMatchfinder + +all: externalMatchfinder + +externalMatchfinder: matchfinder.c main.c $(LIBZSTD) + $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ + +.PHONY: $(LIBZSTD) +$(LIBZSTD): + $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" + +clean: + $(RM) *.o + $(MAKE) -C $(LIBDIR) clean > /dev/null + $(RM) externalMatchfinder diff --git a/contrib/externalMatchfinder/README.md b/contrib/externalMatchfinder/README.md new file mode 100644 index 000000000..cb7d49d97 --- /dev/null +++ b/contrib/externalMatchfinder/README.md @@ -0,0 +1,14 @@ +externalMatchfinder +===================== + +`externalMatchfinder` is a test tool for the external matchfinder API. +It demonstrates how to use the API to perform a simple round-trip test. + +A sample matchfinder is provided in matchfinder.c, but the user can swap +this out with a different one if desired. The sample matchfinder implements +LZ compression with a 1KB hashtable. Dictionary compression is not currently supported. + +Command line : +``` +externalMatchfinder filename +``` diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalMatchfinder/main.c new file mode 100644 index 000000000..6971a46c7 --- /dev/null +++ b/contrib/externalMatchfinder/main.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) Yann Collet, Meta Platforms, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" +#include "zstd_errors.h" +#include "matchfinder.h" // simpleExternalMatchFinder + +#define CHECK(res) \ +do { \ + if (ZSTD_isError(res)) { \ + printf("ERROR: %s\n", ZSTD_getErrorName(res)); \ + return 1; \ + } \ +} while (0) \ + +int main(int argc, char *argv[]) { + if (argc != 2) { + printf("Usage: exampleMatchfinder \n"); + return 1; + } + + ZSTD_CCtx* const zc = ZSTD_createCCtx(); + + int simpleExternalMatchState = 0xdeadbeef; + + // Here is the crucial bit of code! + ZSTD_registerExternalMatchFinder( + zc, + &simpleExternalMatchState, + simpleExternalMatchFinder + ); + + { + size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1); + CHECK(res); + } + + FILE *f = fopen(argv[1], "rb"); + assert(f); + { + int const ret = fseek(f, 0, SEEK_END); + assert(ret == 0); + } + size_t const srcSize = ftell(f); + { + int const ret = fseek(f, 0, SEEK_SET); + assert(ret == 0); + } + + char* const src = malloc(srcSize + 1); + assert(src); + { + size_t const ret = fread(src, srcSize, 1, f); + assert(ret == 1); + int const ret2 = fclose(f); + assert(ret2 == 0); + } + + size_t const dstSize = ZSTD_compressBound(srcSize); + char* const dst = malloc(dstSize); + assert(dst); + + size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); + CHECK(cSize); + + char* const val = malloc(srcSize); + assert(val); + + { + size_t const res = ZSTD_decompress(val, srcSize, dst, cSize); + CHECK(res); + } + + if (memcmp(src, val, srcSize) == 0) { + printf("Compression and decompression were successful!\n"); + printf("Original size: %lu\n", srcSize); + printf("Compressed size: %lu\n", cSize); + } else { + printf("ERROR: input and validation buffers don't match!\n"); + for (size_t i = 0; i < srcSize; i++) { + if (src[i] != val[i]) { + printf("First bad index: %zu\n", i); + break; + } + } + return 1; + } + + ZSTD_freeCCtx(zc); + free(src); + free(dst); + free(val); + return 0; +} diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalMatchfinder/matchfinder.c new file mode 100644 index 000000000..f119193ef --- /dev/null +++ b/contrib/externalMatchfinder/matchfinder.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) Yann Collet, Meta Platforms, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "matchfinder.h" + +#define HSIZE 1024 +static U32 const HLOG = 10; +static U32 const MLS = 4; +static U32 const BADIDX = 0xffffffff; + +size_t simpleExternalMatchFinder( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +) { + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + const BYTE* anchor = istart; + size_t seqCount = 0; + U32 hashTable[HSIZE]; + + (void)externalMatchState; + (void)dict; + (void)dictSize; + (void)outSeqsCapacity; + (void)compressionLevel; + + { int i; + for (i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } } + + while (ip + MLS < iend) { + size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); + U32 const matchIndex = hashTable[hash]; + hashTable[hash] = (U32)(ip - istart); + + if (matchIndex != BADIDX) { + const BYTE* const match = istart + matchIndex; + U32 const matchLen = (U32)ZSTD_count(ip, match, iend); + if (matchLen >= ZSTD_MINMATCH_MIN) { + U32 const litLen = (U32)(ip - anchor); + U32 const offset = (U32)(ip - match); + ZSTD_Sequence const seq = { + offset, litLen, matchLen, 0 + }; + + /* Note: it's crucial to stay within the window size! */ + if (offset <= windowSize) { + outSeqs[seqCount++] = seq; + ip += matchLen; + anchor = ip; + continue; + } + } + } + + ip++; + } + + { ZSTD_Sequence const finalSeq = { + 0, (U32)(iend - anchor), 0, 0 + }; + outSeqs[seqCount++] = finalSeq; + } + + return seqCount; +} diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalMatchfinder/matchfinder.h new file mode 100644 index 000000000..f8ba1c965 --- /dev/null +++ b/contrib/externalMatchfinder/matchfinder.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) Yann Collet, Meta Platforms, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MATCHFINDER_H +#define MATCHFINDER_H + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +size_t simpleExternalMatchFinder( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +); + +#endif diff --git a/contrib/freestanding_lib/freestanding.py b/contrib/freestanding_lib/freestanding.py index 4e0a944f1..df6983245 100755 --- a/contrib/freestanding_lib/freestanding.py +++ b/contrib/freestanding_lib/freestanding.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # ################################################################ -# Copyright (c) 2021-2021, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/gen_html/Makefile b/contrib/gen_html/Makefile index 425f266c4..26e870399 100644 --- a/contrib/gen_html/Makefile +++ b/contrib/gen_html/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2016-present, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/gen_html/gen_html.cpp b/contrib/gen_html/gen_html.cpp index 90d5b21a3..1da5879ad 100644 --- a/contrib/gen_html/gen_html.cpp +++ b/contrib/gen_html/gen_html.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile index 4c055b0ed..40734e62e 100644 --- a/contrib/largeNbDicts/Makefile +++ b/contrib/largeNbDicts/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2018-present, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index cc489000f..7b2af0bae 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-present, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -856,7 +856,7 @@ int bench(const char **fileNameTable, unsigned nbFiles, const char *dictionary, CONTROL(cTotalSizeNoDict != 0); DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n", clevel, - (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict); + (double)totalSrcSlicesSize / (double)cTotalSizeNoDict, (unsigned)cTotalSizeNoDict); size_t* const cSizes = malloc(nbBlocks * sizeof(size_t)); CONTROL(cSizes != NULL); @@ -865,7 +865,7 @@ int bench(const char **fileNameTable, unsigned nbFiles, const char *dictionary, CONTROL(cTotalSize != 0); DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n", (unsigned)dictBuffer.size, - (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize); + (double)totalSrcSlicesSize / (double)cTotalSize, (unsigned)cTotalSize); /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */ shrinkSizes(dstSlices, cSizes); diff --git a/contrib/linux-kernel/Makefile b/contrib/linux-kernel/Makefile index f80ee8653..61a52ce20 100644 --- a/contrib/linux-kernel/Makefile +++ b/contrib/linux-kernel/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -49,8 +49,8 @@ libzstd: -UZSTD_MULTITHREAD \ -U_MSC_VER \ -U_WIN32 \ - -RZSTDLIB_VISIBILITY= \ - -RZSTDERRORLIB_VISIBILITY= \ + -RZSTDLIB_VISIBLE= \ + -RZSTDERRORLIB_VISIBLE= \ -RZSTD_FALLTHROUGH=fallthrough \ -DZSTD_HAVE_WEAK_SYMBOLS=0 \ -DZSTD_TRACE=0 \ diff --git a/contrib/linux-kernel/decompress_sources.h b/contrib/linux-kernel/decompress_sources.h index a06ca187a..8a47eb2a4 100644 --- a/contrib/linux-kernel/decompress_sources.h +++ b/contrib/linux-kernel/decompress_sources.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/linux.mk b/contrib/linux-kernel/linux.mk index 20f08c644..464c410b2 100644 --- a/contrib/linux-kernel/linux.mk +++ b/contrib/linux-kernel/linux.mk @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/linux_zstd.h b/contrib/linux-kernel/linux_zstd.h index 113408eef..f109d49f4 100644 --- a/contrib/linux-kernel/linux_zstd.h +++ b/contrib/linux-kernel/linux_zstd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/mem.h b/contrib/linux-kernel/mem.h index 1d9cc0392..a7231822b 100644 --- a/contrib/linux-kernel/mem.h +++ b/contrib/linux-kernel/mem.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/Makefile b/contrib/linux-kernel/test/Makefile index 53b0c2a65..67b55e665 100644 --- a/contrib/linux-kernel/test/Makefile +++ b/contrib/linux-kernel/test/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/compiler.h b/contrib/linux-kernel/test/include/linux/compiler.h index de43edb69..988ce4a20 100644 --- a/contrib/linux-kernel/test/include/linux/compiler.h +++ b/contrib/linux-kernel/test/include/linux/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/errno.h b/contrib/linux-kernel/test/include/linux/errno.h index b24752257..b4bdcba0e 100644 --- a/contrib/linux-kernel/test/include/linux/errno.h +++ b/contrib/linux-kernel/test/include/linux/errno.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/kernel.h b/contrib/linux-kernel/test/include/linux/kernel.h index 1f702abac..a4d791cd1 100644 --- a/contrib/linux-kernel/test/include/linux/kernel.h +++ b/contrib/linux-kernel/test/include/linux/kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/limits.h b/contrib/linux-kernel/test/include/linux/limits.h index db9c09904..574aa7b34 100644 --- a/contrib/linux-kernel/test/include/linux/limits.h +++ b/contrib/linux-kernel/test/include/linux/limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/math64.h b/contrib/linux-kernel/test/include/linux/math64.h index 8eefa2d5c..7f6713e73 100644 --- a/contrib/linux-kernel/test/include/linux/math64.h +++ b/contrib/linux-kernel/test/include/linux/math64.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/module.h b/contrib/linux-kernel/test/include/linux/module.h index 63a28d57b..06ef56f9e 100644 --- a/contrib/linux-kernel/test/include/linux/module.h +++ b/contrib/linux-kernel/test/include/linux/module.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/printk.h b/contrib/linux-kernel/test/include/linux/printk.h index eab08e0c4..92a25278e 100644 --- a/contrib/linux-kernel/test/include/linux/printk.h +++ b/contrib/linux-kernel/test/include/linux/printk.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/stddef.h b/contrib/linux-kernel/test/include/linux/stddef.h index 8538eb3e4..15c7408fc 100644 --- a/contrib/linux-kernel/test/include/linux/stddef.h +++ b/contrib/linux-kernel/test/include/linux/stddef.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/swab.h b/contrib/linux-kernel/test/include/linux/swab.h index 783046b42..2b48b434c 100644 --- a/contrib/linux-kernel/test/include/linux/swab.h +++ b/contrib/linux-kernel/test/include/linux/swab.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/include/linux/types.h b/contrib/linux-kernel/test/include/linux/types.h index 459a45700..b413db6f9 100644 --- a/contrib/linux-kernel/test/include/linux/types.h +++ b/contrib/linux-kernel/test/include/linux/types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/static_test.c b/contrib/linux-kernel/test/static_test.c index d2b8b5a32..ba4a420d4 100644 --- a/contrib/linux-kernel/test/static_test.c +++ b/contrib/linux-kernel/test/static_test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/test/test.c b/contrib/linux-kernel/test/test.c index 67d248e0c..0f4ba3f45 100644 --- a/contrib/linux-kernel/test/test.c +++ b/contrib/linux-kernel/test/test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/zstd_common_module.c b/contrib/linux-kernel/zstd_common_module.c index 22686e367..2fead39eb 100644 --- a/contrib/linux-kernel/zstd_common_module.c +++ b/contrib/linux-kernel/zstd_common_module.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/zstd_compress_module.c b/contrib/linux-kernel/zstd_compress_module.c index 04e1b5c01..8ecf43226 100644 --- a/contrib/linux-kernel/zstd_compress_module.c +++ b/contrib/linux-kernel/zstd_compress_module.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/zstd_decompress_module.c b/contrib/linux-kernel/zstd_decompress_module.c index f4ed952ed..eb1c49e69 100644 --- a/contrib/linux-kernel/zstd_decompress_module.c +++ b/contrib/linux-kernel/zstd_decompress_module.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/linux-kernel/zstd_deps.h b/contrib/linux-kernel/zstd_deps.h index 7a5bf4483..925161416 100644 --- a/contrib/linux-kernel/zstd_deps.h +++ b/contrib/linux-kernel/zstd_deps.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/match_finders/zstd_edist.c b/contrib/match_finders/zstd_edist.c index d63a7cf8d..d685cdd9e 100644 --- a/contrib/match_finders/zstd_edist.c +++ b/contrib/match_finders/zstd_edist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/match_finders/zstd_edist.h b/contrib/match_finders/zstd_edist.h index a947649b2..c739e2abc 100644 --- a/contrib/match_finders/zstd_edist.h +++ b/contrib/match_finders/zstd_edist.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/ErrorHolder.h b/contrib/pzstd/ErrorHolder.h index 829651c59..2c2797ede 100644 --- a/contrib/pzstd/ErrorHolder.h +++ b/contrib/pzstd/ErrorHolder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/Logging.h b/contrib/pzstd/Logging.h index beb160b64..84a08d200 100644 --- a/contrib/pzstd/Logging.h +++ b/contrib/pzstd/Logging.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile index 3d930cca9..830053cd7 100644 --- a/contrib/pzstd/Makefile +++ b/contrib/pzstd/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2016-present, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/Options.cpp b/contrib/pzstd/Options.cpp index 90f9d571f..a77824edb 100644 --- a/contrib/pzstd/Options.cpp +++ b/contrib/pzstd/Options.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/Options.h b/contrib/pzstd/Options.h index 924543abf..92c18a350 100644 --- a/contrib/pzstd/Options.h +++ b/contrib/pzstd/Options.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/Pzstd.cpp b/contrib/pzstd/Pzstd.cpp index 2c09bda7a..67b941991 100644 --- a/contrib/pzstd/Pzstd.cpp +++ b/contrib/pzstd/Pzstd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -10,11 +10,13 @@ #include "Pzstd.h" #include "SkippableFrame.h" #include "utils/FileSystem.h" +#include "utils/Portability.h" #include "utils/Range.h" #include "utils/ScopeGuard.h" #include "utils/ThreadPool.h" #include "utils/WorkQueue.h" +#include #include #include #include @@ -336,6 +338,10 @@ static size_t calculateStep( const ZSTD_parameters ¶ms) { (void)size; (void)numThreads; + // Not validated to work correctly for window logs > 23. + // It will definitely fail if windowLog + 2 is >= 4GB because + // the skippable frame can only store sizes up to 4GB. + assert(params.cParams.windowLog <= 23); return size_t{1} << (params.cParams.windowLog + 2); } @@ -587,7 +593,8 @@ std::uint64_t writeFile( // start writing before compression is done because we need to know the // compressed size. // Wait for the compressed size to be available and write skippable frame - SkippableFrame frame(out->size()); + assert(uint64_t(out->size()) < uint64_t(1) << 32); + SkippableFrame frame(uint32_t(out->size())); if (!writeData(frame.data(), outputFd)) { errorHolder.setError("Failed to write output"); return bytesWritten; diff --git a/contrib/pzstd/Pzstd.h b/contrib/pzstd/Pzstd.h index c667c887d..3645e5942 100644 --- a/contrib/pzstd/Pzstd.h +++ b/contrib/pzstd/Pzstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/SkippableFrame.cpp b/contrib/pzstd/SkippableFrame.cpp index 769866dfc..3bea4eb65 100644 --- a/contrib/pzstd/SkippableFrame.cpp +++ b/contrib/pzstd/SkippableFrame.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/SkippableFrame.h b/contrib/pzstd/SkippableFrame.h index 60deed040..817415e92 100644 --- a/contrib/pzstd/SkippableFrame.h +++ b/contrib/pzstd/SkippableFrame.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/main.cpp b/contrib/pzstd/main.cpp index b93f043b1..422b4a56a 100644 --- a/contrib/pzstd/main.cpp +++ b/contrib/pzstd/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/test/OptionsTest.cpp b/contrib/pzstd/test/OptionsTest.cpp index e60114825..91e39750d 100644 --- a/contrib/pzstd/test/OptionsTest.cpp +++ b/contrib/pzstd/test/OptionsTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/test/PzstdTest.cpp b/contrib/pzstd/test/PzstdTest.cpp index 5c7d66310..75453f5f6 100644 --- a/contrib/pzstd/test/PzstdTest.cpp +++ b/contrib/pzstd/test/PzstdTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/test/RoundTrip.h b/contrib/pzstd/test/RoundTrip.h index c6364ecb4..f777622a3 100644 --- a/contrib/pzstd/test/RoundTrip.h +++ b/contrib/pzstd/test/RoundTrip.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/test/RoundTripTest.cpp b/contrib/pzstd/test/RoundTripTest.cpp index 36af0673a..c37646d1c 100644 --- a/contrib/pzstd/test/RoundTripTest.cpp +++ b/contrib/pzstd/test/RoundTripTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/Buffer.h b/contrib/pzstd/utils/Buffer.h index d17ad2f2c..a85f770ba 100644 --- a/contrib/pzstd/utils/Buffer.h +++ b/contrib/pzstd/utils/Buffer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/FileSystem.h b/contrib/pzstd/utils/FileSystem.h index 8898b0357..8d57d05f0 100644 --- a/contrib/pzstd/utils/FileSystem.h +++ b/contrib/pzstd/utils/FileSystem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -8,11 +8,13 @@ */ #pragma once +#include "utils/Portability.h" #include "utils/Range.h" #include #include #include +#include #include // A small subset of `std::filesystem`. @@ -82,11 +84,11 @@ inline std::uintmax_t file_size( std::error_code& ec) noexcept { auto stat = status(path, ec); if (ec) { - return -1; + return std::numeric_limits::max(); } if (!is_regular_file(stat)) { ec.assign(ENOTSUP, std::generic_category()); - return -1; + return std::numeric_limits::max(); } ec.clear(); return stat.st_size; diff --git a/contrib/pzstd/utils/Likely.h b/contrib/pzstd/utils/Likely.h index 7cea8da27..52243a64e 100644 --- a/contrib/pzstd/utils/Likely.h +++ b/contrib/pzstd/utils/Likely.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/Portability.h b/contrib/pzstd/utils/Portability.h new file mode 100644 index 000000000..ef1f86e51 --- /dev/null +++ b/contrib/pzstd/utils/Portability.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#pragma once + +#include + +// Required for windows, which defines min/max, but we want the std:: version. +#undef min +#undef max diff --git a/contrib/pzstd/utils/Range.h b/contrib/pzstd/utils/Range.h index 6a850ad4e..0fd8f9f86 100644 --- a/contrib/pzstd/utils/Range.h +++ b/contrib/pzstd/utils/Range.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -14,7 +14,9 @@ #pragma once #include "utils/Likely.h" +#include "utils/Portability.h" +#include #include #include #include diff --git a/contrib/pzstd/utils/ResourcePool.h b/contrib/pzstd/utils/ResourcePool.h index 8dfcdd765..7c4bb6235 100644 --- a/contrib/pzstd/utils/ResourcePool.h +++ b/contrib/pzstd/utils/ResourcePool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/ScopeGuard.h b/contrib/pzstd/utils/ScopeGuard.h index c26f911bf..911fd9842 100644 --- a/contrib/pzstd/utils/ScopeGuard.h +++ b/contrib/pzstd/utils/ScopeGuard.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/ThreadPool.h b/contrib/pzstd/utils/ThreadPool.h index 8ece8e0da..a087d7c1c 100644 --- a/contrib/pzstd/utils/ThreadPool.h +++ b/contrib/pzstd/utils/ThreadPool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/WorkQueue.h b/contrib/pzstd/utils/WorkQueue.h index 1d14d922c..d7947b814 100644 --- a/contrib/pzstd/utils/WorkQueue.h +++ b/contrib/pzstd/utils/WorkQueue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/test/BufferTest.cpp b/contrib/pzstd/utils/test/BufferTest.cpp index fbba74e82..58bf08dcd 100644 --- a/contrib/pzstd/utils/test/BufferTest.cpp +++ b/contrib/pzstd/utils/test/BufferTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/test/RangeTest.cpp b/contrib/pzstd/utils/test/RangeTest.cpp index 755b50fa6..8b7dee271 100644 --- a/contrib/pzstd/utils/test/RangeTest.cpp +++ b/contrib/pzstd/utils/test/RangeTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/test/ResourcePoolTest.cpp b/contrib/pzstd/utils/test/ResourcePoolTest.cpp index 6fe145180..750ee084b 100644 --- a/contrib/pzstd/utils/test/ResourcePoolTest.cpp +++ b/contrib/pzstd/utils/test/ResourcePoolTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/test/ScopeGuardTest.cpp b/contrib/pzstd/utils/test/ScopeGuardTest.cpp index 7bc624da7..0f77cdf38 100644 --- a/contrib/pzstd/utils/test/ScopeGuardTest.cpp +++ b/contrib/pzstd/utils/test/ScopeGuardTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/test/ThreadPoolTest.cpp b/contrib/pzstd/utils/test/ThreadPoolTest.cpp index 703fd4c9c..a01052e60 100644 --- a/contrib/pzstd/utils/test/ThreadPoolTest.cpp +++ b/contrib/pzstd/utils/test/ThreadPoolTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/pzstd/utils/test/WorkQueueTest.cpp b/contrib/pzstd/utils/test/WorkQueueTest.cpp index 14cf77304..16600bb60 100644 --- a/contrib/pzstd/utils/test/WorkQueueTest.cpp +++ b/contrib/pzstd/utils/test/WorkQueueTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/recovery/Makefile b/contrib/recovery/Makefile index 9a9f4f2e8..be6ea4b0e 100644 --- a/contrib/recovery/Makefile +++ b/contrib/recovery/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2019-present, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/recovery/recover_directory.c b/contrib/recovery/recover_directory.c index 13f83fd10..b9bd7ab49 100644 --- a/contrib/recovery/recover_directory.c +++ b/contrib/recovery/recover_directory.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/examples/Makefile b/contrib/seekable_format/examples/Makefile index 9df6b75fb..fcc04587c 100644 --- a/contrib/seekable_format/examples/Makefile +++ b/contrib/seekable_format/examples/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2017-present, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/examples/parallel_compression.c b/contrib/seekable_format/examples/parallel_compression.c index 4118b0ad7..0ec9fbd20 100644 --- a/contrib/seekable_format/examples/parallel_compression.c +++ b/contrib/seekable_format/examples/parallel_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/examples/parallel_processing.c b/contrib/seekable_format/examples/parallel_processing.c index 36226b49f..b1709db77 100644 --- a/contrib/seekable_format/examples/parallel_processing.c +++ b/contrib/seekable_format/examples/parallel_processing.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/examples/seekable_compression.c b/contrib/seekable_format/examples/seekable_compression.c index 9a331a895..182b46f64 100644 --- a/contrib/seekable_format/examples/seekable_compression.c +++ b/contrib/seekable_format/examples/seekable_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/examples/seekable_decompression.c b/contrib/seekable_format/examples/seekable_decompression.c index e9e201333..2c4f3ba0c 100644 --- a/contrib/seekable_format/examples/seekable_decompression.c +++ b/contrib/seekable_format/examples/seekable_decompression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/examples/seekable_decompression_mem.c b/contrib/seekable_format/examples/seekable_decompression_mem.c index e7b1c6505..44a06fbbf 100644 --- a/contrib/seekable_format/examples/seekable_decompression_mem.c +++ b/contrib/seekable_format/examples/seekable_decompression_mem.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/tests/Makefile b/contrib/seekable_format/tests/Makefile index d51deb3ea..a81f2229f 100644 --- a/contrib/seekable_format/tests/Makefile +++ b/contrib/seekable_format/tests/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2017-present, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/zstd_seekable_compression_format.md b/contrib/seekable_format/zstd_seekable_compression_format.md index 55aebfd2e..7bd0790e8 100644 --- a/contrib/seekable_format/zstd_seekable_compression_format.md +++ b/contrib/seekable_format/zstd_seekable_compression_format.md @@ -2,7 +2,7 @@ ### Notices -Copyright (c) 2017-present Facebook, Inc. +Copyright (c) Meta Platforms, Inc. and affiliates. Permission is granted to copy and distribute this document for any purpose and without charge, diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c index 7ec9bb577..113f6f99c 100644 --- a/contrib/seekable_format/zstdseek_compress.c +++ b/contrib/seekable_format/zstdseek_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index 24083aa52..7128b9d22 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-present, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/seqBench/Makefile b/contrib/seqBench/Makefile index 0782961eb..e7f08a42c 100644 --- a/contrib/seqBench/Makefile +++ b/contrib/seqBench/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) 2018-present, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile index a9c601ebc..f6deeb13d 100644 --- a/doc/educational_decoder/Makefile +++ b/doc/educational_decoder/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/doc/educational_decoder/harness.c b/doc/educational_decoder/harness.c index 935f60da8..12c5a801b 100644 --- a/doc/educational_decoder/harness.c +++ b/doc/educational_decoder/harness.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c index 54c059352..9ade76502 100644 --- a/doc/educational_decoder/zstd_decompress.c +++ b/doc/educational_decoder/zstd_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h index d89c83523..c13c8134d 100644 --- a/doc/educational_decoder/zstd_decompress.h +++ b/doc/educational_decoder/zstd_decompress.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md index 20c4a10e3..e40677a1d 100644 --- a/doc/zstd_compression_format.md +++ b/doc/zstd_compression_format.md @@ -3,7 +3,7 @@ Zstandard Compression Format ### Notices -Copyright (c) 2016-2021 Yann Collet, Facebook, Inc. +Copyright (c) Meta Platforms, Inc. and affiliates. Permission is granted to copy and distribute this document for any purpose and without charge, @@ -435,7 +435,7 @@ They can be decoded first, and then copied during [Sequence Execution], or they can be decoded on the flow during [Sequence Execution]. Literals can be stored uncompressed or compressed using Huffman prefix codes. -When compressed, an optional tree description can be present, +When compressed, a tree description may optionally be present, followed by 1 or 4 streams. | `Literals_Section_Header` | [`Huffman_Tree_Description`] | [jumpTable] | Stream1 | [Stream2] | [Stream3] | [Stream4] | @@ -510,7 +510,7 @@ Its value is : `Size_Format = (Literals_Section_Header[0]>>2) & 3` `Regenerated_Size = (Literals_Section_Header[0]>>4) + (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12)` Only Stream1 is present for these cases. -Note : it's allowed to represent a short value (for example `13`) +Note : it's allowed to represent a short value (for example `27`) using a long format, even if it's less efficient. __`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ : @@ -521,19 +521,33 @@ __`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023). `Literals_Section_Header` uses 3 bytes. - `Size_Format` == 01 : 4 streams. - Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023). + Both `Regenerated_Size` and `Compressed_Size` use 10 bits (6-1023). `Literals_Section_Header` uses 3 bytes. - `Size_Format` == 10 : 4 streams. - Both `Regenerated_Size` and `Compressed_Size` use 14 bits (0-16383). + Both `Regenerated_Size` and `Compressed_Size` use 14 bits (6-16383). `Literals_Section_Header` uses 4 bytes. - `Size_Format` == 11 : 4 streams. - Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143). + Both `Regenerated_Size` and `Compressed_Size` use 18 bits (6-262143). `Literals_Section_Header` uses 5 bytes. Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention. Note: `Compressed_Size` __includes__ the size of the Huffman Tree description _when_ it is present. +4 streams is superior to 1 stream in decompression speed, +by exploiting instruction level parallelism. +But it's also more expensive, +costing on average ~7.3 bytes more than the 1 stream mode, mostly from the jump table. + +In general, use the 4 streams mode when there are more literals to decode, +to favor higher decompression speeds. +Beyond 1KB, the 4 streams mode is compulsory anyway. + +Note that a minimum of 6 bytes is required for the 4 streams mode. +That's a technical minimum, but it's not recommended to employ the 4 streams mode +for such a small quantity, that would be wasteful. +A more practical lower bound would be around ~256 bytes. + #### Raw Literals Block The data in Stream1 is `Regenerated_Size` bytes long, it contains the raw literals data to be used during [Sequence Execution]. diff --git a/examples/Makefile b/examples/Makefile index 8d7361dd8..31f52d357 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/examples/common.h b/examples/common.h index 9b09030eb..4873e877a 100644 --- a/examples/common.h +++ b/examples/common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c index 7c7d34288..83edc1cad 100644 --- a/examples/dictionary_compression.c +++ b/examples/dictionary_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c index 107cfc1ee..e6c999964 100644 --- a/examples/dictionary_decompression.c +++ b/examples/dictionary_decompression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/multiple_simple_compression.c b/examples/multiple_simple_compression.c index 5d2a28fcd..bf77ca133 100644 --- a/examples/multiple_simple_compression.c +++ b/examples/multiple_simple_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c index d4efc8e57..b12ad03dc 100644 --- a/examples/multiple_streaming_compression.c +++ b/examples/multiple_streaming_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/simple_compression.c b/examples/simple_compression.c index 27a65b17f..7c880725f 100644 --- a/examples/simple_compression.c +++ b/examples/simple_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c index 59c1fd414..f499156f6 100644 --- a/examples/simple_decompression.c +++ b/examples/simple_decompression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c index ff1875829..ed0a3a69c 100644 --- a/examples/streaming_compression.c +++ b/examples/streaming_compression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/streaming_compression_thread_pool.c b/examples/streaming_compression_thread_pool.c index 21cb3d549..a1a024129 100644 --- a/examples/streaming_compression_thread_pool.c +++ b/examples/streaming_compression_thread_pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Martin Liska, SUSE, Facebook, Inc. + * Copyright (c) Martin Liska, SUSE, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c index 6dc4c2267..95fa11227 100644 --- a/examples/streaming_decompression.c +++ b/examples/streaming_decompression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/examples/streaming_memory_usage.c b/examples/streaming_memory_usage.c index a5219ef1e..957acb61a 100644 --- a/examples/streaming_memory_usage.c +++ b/examples/streaming_memory_usage.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/Makefile b/lib/Makefile index 74c6f1baf..a4cf61ab1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/lib/README.md b/lib/README.md index 015cde567..c3b5d1817 100644 --- a/lib/README.md +++ b/lib/README.md @@ -161,6 +161,13 @@ The file structure is designed to make this selection manually achievable for an `ZSTD_DCtx` decompression contexts, but might also result in a small decompression speed cost. +- The C compiler macros `ZSTDLIB_VISIBLE`, `ZSTDERRORLIB_VISIBLE` and `ZDICTLIB_VISIBLE` + can be overridden to control the visibility of zstd's API. Additionally, + `ZSTDLIB_STATIC_API` and `ZDICTLIB_STATIC_API` can be overridden to control the visibility + of zstd's static API. Specifically, it can be set to `ZSTDLIB_HIDDEN` to hide the symbols + from the shared library. These macros default to `ZSTDLIB_VISIBILITY`, + `ZSTDERRORLIB_VSIBILITY`, and `ZDICTLIB_VISIBILITY` if unset, for backwards compatibility + with the old macro names. #### Windows : using MinGW+MSYS to create DLL diff --git a/lib/common/bits.h b/lib/common/bits.h index c0e917750..7939f3d0f 100644 --- a/lib/common/bits.h +++ b/lib/common/bits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 841778626..db1b4cf13 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -1,7 +1,7 @@ /* ****************************************************************** * bitstream * Part of FSE library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 42f289e0b..e423f2467 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/cpu.h b/lib/common/cpu.h index 8acd33be3..8bc34a36d 100644 --- a/lib/common/cpu.h +++ b/lib/common/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/debug.c b/lib/common/debug.c index bb863c9ea..ebf7bfccf 100644 --- a/lib/common/debug.c +++ b/lib/common/debug.c @@ -1,7 +1,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/debug.h b/lib/common/debug.h index 3b2a320a1..0e9817ea6 100644 --- a/lib/common/debug.h +++ b/lib/common/debug.h @@ -1,7 +1,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 98bd4238d..4bf8242ff 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -1,6 +1,6 @@ /* ****************************************************************** * Common functions of New Generation Entropy library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 1b67500f3..fb4d70596 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -29,7 +29,9 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; case PREFIX(corruption_detected): return "Data corruption detected"; case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification"; case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; case PREFIX(init_missing): return "Context should be init first"; case PREFIX(memory_allocation): return "Allocation error : not enough memory"; @@ -50,6 +52,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/lib/common/error_private.h b/lib/common/error_private.h index 007d81066..325daad40 100644 --- a/lib/common/error_private.h +++ b/lib/common/error_private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/fse.h b/lib/common/fse.h index 466a07281..2ef8df459 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -1,7 +1,7 @@ /* ****************************************************************** * FSE : Finite State Entropy codec * Public Prototypes declaration - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index 7034fd97b..ca894eb14 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * FSE : Finite State Entropy decoder - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/common/huf.h b/lib/common/huf.h index 595b2f6db..f7316f9b2 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -1,7 +1,7 @@ /* ****************************************************************** * huff0 huffman codec, * part of Finite State Entropy library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -88,8 +88,10 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, unsigned maxSymbolValue, unsigned tableLog); /** HUF_compress4X_wksp() : - * Same as HUF_compress2(), but uses externally allocated `workSpace`. - * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */ + * Same as HUF_compress2(), but uses externally allocated @workSpace. + * @workSpace's size, aka @wkspSize, must be >= HUF_WORKSPACE_SIZE + * @srcSize must be >= 6 + */ #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, diff --git a/lib/common/mem.h b/lib/common/mem.h index 493782f6f..98dd47a04 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/pool.c b/lib/common/pool.c index bf21c57ed..3466b85b8 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -173,7 +173,7 @@ static void POOL_join(POOL_ctx* ctx) { /* Join all of the threads */ { size_t i; for (i = 0; i < ctx->threadCapacity; ++i) { - ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */ } } } diff --git a/lib/common/pool.h b/lib/common/pool.h index b86a3452e..eb22ff509 100644 --- a/lib/common/pool.h +++ b/lib/common/pool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/portability_macros.h b/lib/common/portability_macros.h index 1650fa3d8..bf3e2b7b1 100644 --- a/lib/common/portability_macros.h +++ b/lib/common/portability_macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/threading.c b/lib/common/threading.c index c62c65d57..f2341105a 100644 --- a/lib/common/threading.c +++ b/lib/common/threading.c @@ -34,39 +34,92 @@ int g_ZSTD_threading_useless_symbol; /* === Implementation === */ +typedef struct { + void* (*start_routine)(void*); + void* arg; + int initialized; + ZSTD_pthread_cond_t initialized_cond; + ZSTD_pthread_mutex_t initialized_mutex; +} ZSTD_thread_params_t; + static unsigned __stdcall worker(void *arg) { - ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; - thread->arg = thread->start_routine(thread->arg); + void* (*start_routine)(void*); + void* thread_arg; + + /* Inialized thread_arg and start_routine and signal main thread that we don't need it + * to wait any longer. + */ + { + ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*)arg; + thread_arg = thread_param->arg; + start_routine = thread_param->start_routine; + + /* Signal main thread that we are running and do not depend on its memory anymore */ + ZSTD_pthread_mutex_lock(&thread_param->initialized_mutex); + thread_param->initialized = 1; + ZSTD_pthread_cond_signal(&thread_param->initialized_cond); + ZSTD_pthread_mutex_unlock(&thread_param->initialized_mutex); + } + + start_routine(thread_arg); + return 0; } int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, void* (*start_routine) (void*), void* arg) { + ZSTD_thread_params_t thread_param; (void)unused; - thread->arg = arg; - thread->start_routine = start_routine; - thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); - if (!thread->handle) + thread_param.start_routine = start_routine; + thread_param.arg = arg; + thread_param.initialized = 0; + *thread = NULL; + + /* Setup thread initialization synchronization */ + if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) { + /* Should never happen on Windows */ + return -1; + } + if(ZSTD_pthread_mutex_init(&thread_param.initialized_mutex, NULL)) { + /* Should never happen on Windows */ + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); + return -1; + } + + /* Spawn thread */ + *thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL); + if (!thread) { + ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex); + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); return errno; - else - return 0; + } + + /* Wait for thread to be initialized */ + ZSTD_pthread_mutex_lock(&thread_param.initialized_mutex); + while(!thread_param.initialized) { + ZSTD_pthread_cond_wait(&thread_param.initialized_cond, &thread_param.initialized_mutex); + } + ZSTD_pthread_mutex_unlock(&thread_param.initialized_mutex); + ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex); + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); + + return 0; } -int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +int ZSTD_pthread_join(ZSTD_pthread_t thread) { DWORD result; - if (!thread.handle) return 0; + if (!thread) return 0; - result = WaitForSingleObject(thread.handle, INFINITE); - CloseHandle(thread.handle); + result = WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); switch (result) { case WAIT_OBJECT_0: - if (value_ptr) *value_ptr = thread.arg; return 0; case WAIT_ABANDONED: return EINVAL; diff --git a/lib/common/threading.h b/lib/common/threading.h index b1458054e..fb5c1c878 100644 --- a/lib/common/threading.h +++ b/lib/common/threading.h @@ -61,16 +61,12 @@ extern "C" { #define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) /* ZSTD_pthread_create() and ZSTD_pthread_join() */ -typedef struct { - HANDLE handle; - void* (*start_routine)(void*); - void* arg; -} ZSTD_pthread_t; +typedef HANDLE ZSTD_pthread_t; int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, void* (*start_routine) (void*), void* arg); -int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); +int ZSTD_pthread_join(ZSTD_pthread_t thread); /** * add here more wrappers as required @@ -98,7 +94,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); #define ZSTD_pthread_t pthread_t #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) -#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) +#define ZSTD_pthread_join(a) pthread_join((a),NULL) #else /* DEBUGLEVEL >= 1 */ @@ -123,7 +119,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); #define ZSTD_pthread_t pthread_t #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) -#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) +#define ZSTD_pthread_join(a) pthread_join((a),NULL) #endif diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c index c2d61ca64..fd237c906 100644 --- a/lib/common/xxhash.c +++ b/lib/common/xxhash.c @@ -1,6 +1,6 @@ /* * xxHash - Fast Hash algorithm - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - xxHash homepage: https://cyan4973.github.io/xxHash/ diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h index 901bc0832..b8b73290b 100644 --- a/lib/common/xxhash.h +++ b/lib/common/xxhash.h @@ -1,6 +1,6 @@ /* * xxHash - Fast Hash algorithm - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - xxHash homepage: https://cyan4973.github.io/xxHash/ diff --git a/lib/common/zstd_common.c b/lib/common/zstd_common.c index 3d7e35b30..320855247 100644 --- a/lib/common/zstd_common.c +++ b/lib/common/zstd_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/zstd_deps.h b/lib/common/zstd_deps.h index 14211344a..4d767ae9b 100644 --- a/lib/common/zstd_deps.h +++ b/lib/common/zstd_deps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index e89226702..48558873d 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -94,6 +94,7 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ +#define MIN_LITERALS_FOR_4_STREAMS 6 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; diff --git a/lib/common/zstd_trace.h b/lib/common/zstd_trace.h index 6215f1e70..da20534eb 100644 --- a/lib/common/zstd_trace.h +++ b/lib/common/zstd_trace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/clevels.h b/lib/compress/clevels.h index 7ed2e0049..c18da465f 100644 --- a/lib/compress/clevels.h +++ b/lib/compress/clevels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index c2271f6ee..6a9aa11e5 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * FSE : Finite State Entropy encoder - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/compress/hist.c b/lib/compress/hist.c index 073c57e75..e2fb431f0 100644 --- a/lib/compress/hist.c +++ b/lib/compress/hist.c @@ -1,7 +1,7 @@ /* ****************************************************************** * hist : Histogram functions * part of Finite State Entropy project - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/compress/hist.h b/lib/compress/hist.h index 228ed48a7..887896b81 100644 --- a/lib/compress/hist.h +++ b/lib/compress/hist.h @@ -1,7 +1,7 @@ /* ****************************************************************** * hist : Histogram functions * part of Finite State Entropy project - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 9d2b521bc..f7ca2d3bb 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * Huffman encoder, part of New Generation Entropy library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -386,7 +386,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits /* renorm totalCost from 2^largestBits to 2^targetNbBits * note : totalCost is necessarily a multiple of baseCost */ - assert((totalCost & (baseCost - 1)) == 0); + assert(((U32)totalCost & (baseCost - 1)) == 0); totalCost >>= (largestBits - targetNbBits); assert(totalCost > 0); @@ -1253,41 +1253,59 @@ unsigned HUF_minTableLog(unsigned symbolCardinality) return minBitsSymbols; } -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode) +unsigned HUF_optimalTableLog( + unsigned maxTableLog, + size_t srcSize, + unsigned maxSymbolValue, + void* workSpace, size_t wkspSize, + HUF_CElt* table, + const unsigned* count, + HUF_depth_mode depthMode) { - unsigned optLog = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); assert(srcSize > 1); /* Not supported, RLE should be used instead */ + assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables)); - if (depthMode == HUF_depth_optimal) { /** Test valid depths and return optimal **/ - BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); + if (depthMode != HUF_depth_optimal) { + /* cheap evaluation, based on FSE */ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); + } + + { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); - size_t optSize = ((size_t) ~0); - unsigned huffLog; size_t maxBits, hSize, newSize; const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); + const unsigned minTableLog = HUF_minTableLog(symbolCardinality); + size_t optSize = ((size_t) ~0) - 1; + unsigned optLog = maxTableLog, optLogGuess; - if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; + DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize); - for (huffLog = HUF_minTableLog(symbolCardinality); huffLog <= maxTableLog; huffLog++) { - maxBits = HUF_buildCTable_wksp(table, count, - maxSymbolValue, huffLog, - workSpace, wkspSize); + /* Search until size increases */ + for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { + DEBUGLOG(7, "checking for huffLog=%u", optLogGuess); + maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); if (ERR_isError(maxBits)) continue; - hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, - workSpace, wkspSize); + if (maxBits < optLogGuess && optLogGuess > minTableLog) break; + + hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); + if (ERR_isError(hSize)) continue; newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; + if (newSize > optSize + 1) { + break; + } + if (newSize < optSize) { optSize = newSize; - optLog = huffLog; + optLog = optLogGuess; } } + assert(optLog <= HUF_TABLELOG_MAX); + return optLog; } - assert(optLog <= HUF_TABLELOG_MAX); - return optLog; } /* HUF_compress_internal() : diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d2761974c..b9b056cd5 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -59,14 +59,17 @@ * Helper functions ***************************************/ /* ZSTD_compressBound() - * Note that the result from this function is only compatible with the "normal" - * full-block strategy. - * When there are a lot of small blocks due to frequent flush in streaming mode - * the overhead of headers can make the compressed data to be larger than the - * return value of ZSTD_compressBound(). + * Note that the result from this function is only valid for + * the one-pass compression functions. + * When employing the streaming mode, + * if flushes are frequently altering the size of blocks, + * the overhead from block headers can make the compressed data larger + * than the return value of ZSTD_compressBound(). */ size_t ZSTD_compressBound(size_t srcSize) { - return ZSTD_COMPRESSBOUND(srcSize); + size_t const r = ZSTD_COMPRESSBOUND(srcSize); + if (r==0) return ERROR(srcSize_wrong); + return r; } @@ -275,6 +278,16 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; } +/* Enables validation for external sequences in debug builds. */ +static int ZSTD_resolveExternalSequenceValidation(int mode) { +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) + (void)mode; + return 1; +#else + return mode; +#endif +} + /* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged. * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */ static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) { @@ -298,6 +311,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( } cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); + cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences); assert(!ZSTD_checkCParams(cParams)); return cctxParams; } @@ -343,10 +357,13 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) #define ZSTD_NO_CLEVEL 0 /** - * Initializes the cctxParams from params and compressionLevel. + * Initializes `cctxParams` from `params` and `compressionLevel`. * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. */ -static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) +static void +ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, + const ZSTD_parameters* params, + int compressionLevel) { assert(!ZSTD_checkCParams(params->cParams)); ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); @@ -359,6 +376,7 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); + cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences); DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); } @@ -373,7 +391,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete /** * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. - * @param param Validated zstd parameters. + * @param params Validated zstd parameters. */ static void ZSTD_CCtxParams_setZstdParams( ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) @@ -581,6 +599,16 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; + case ZSTD_c_enableMatchFinderFallback: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_maxBlockSize: + bounds.lowerBound = 1; + bounds.upperBound = ZSTD_BLOCKSIZE_MAX; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -646,6 +674,8 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: + case ZSTD_c_enableMatchFinderFallback: + case ZSTD_c_maxBlockSize: default: return 0; } @@ -658,7 +688,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) if (ZSTD_isUpdateAuthorized(param)) { cctx->cParamsChanged = 1; } else { - RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); + RETURN_ERROR(stage_wrong, "can only set params in cctx init stage"); } } switch(param) @@ -702,6 +732,8 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: + case ZSTD_c_enableMatchFinderFallback: + case ZSTD_c_maxBlockSize: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -793,14 +825,14 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, case ZSTD_c_forceAttachDict : { const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; - BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref); CCtxParams->attachDictPref = pref; return CCtxParams->attachDictPref; } case ZSTD_c_literalCompressionMode : { const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; - BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm); CCtxParams->literalCompressionMode = lcm; return CCtxParams->literalCompressionMode; } @@ -934,6 +966,16 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; return CCtxParams->prefetchCDictTables; + case ZSTD_c_enableMatchFinderFallback: + BOUNDCHECK(ZSTD_c_enableMatchFinderFallback, value); + CCtxParams->enableMatchFinderFallback = value; + return CCtxParams->enableMatchFinderFallback; + + case ZSTD_c_maxBlockSize: + BOUNDCHECK(ZSTD_c_maxBlockSize, value); + CCtxParams->maxBlockSize = value; + return CCtxParams->prefetchCDictTables; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -1069,6 +1111,12 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_prefetchCDictTables: *value = (int)CCtxParams->prefetchCDictTables; break; + case ZSTD_c_enableMatchFinderFallback: + *value = CCtxParams->enableMatchFinderFallback; + break; + case ZSTD_c_maxBlockSize: + *value = (int)CCtxParams->maxBlockSize; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -1095,6 +1143,21 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( return 0; } +size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams) +{ + DEBUGLOG(4, "ZSTD_CCtx_setCParams"); + assert(cctx != NULL); + if (cctx->streamStage != zcss_init) { + /* All parameters in @cparams are allowed to be updated during MT compression. + * This must be signaled, so that MT compression picks up the changes */ + cctx->cParamsChanged = 1; + } + /* only update if parameters are valid */ + FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), ""); + cctx->requestedParams.cParams = cparams; + return 0; +} + size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize); @@ -1240,6 +1303,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, "Can't reset parameters only when not in init stage."); ZSTD_clearAllDicts(cctx); + ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx)); return ZSTD_CCtxParams_reset(&cctx->requestedParams); } return 0; @@ -1482,6 +1546,13 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; } +/* Helper function for calculating memory requirements. + * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */ +static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useExternalMatchFinder) { + U32 const divider = (minMatch==3 || useExternalMatchFinder) ? 3 : 4; + return blockSize / divider; +} + static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_compressionParameters* cParams, const ldmParams_t* ldmParams, @@ -1489,12 +1560,12 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_paramSwitch_e useRowMatchFinder, const size_t buffInSize, const size_t buffOutSize, - const U64 pledgedSrcSize) + const U64 pledgedSrcSize, + int useExternalMatchFinder) { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (cParams->minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder); size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); @@ -1513,6 +1584,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); + size_t const externalSeqSpace = useExternalMatchFinder + ? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) + : 0; + size_t const neededSpace = cctxSpace + entropySpace + @@ -1521,7 +1597,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( ldmSeqSpace + matchStateSize + tokenSpace + - bufferSpace; + bufferSpace + + externalSeqSpace; DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); return neededSpace; @@ -1539,7 +1616,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) @@ -1600,7 +1677,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) return ZSTD_estimateCCtxSize_usingCCtxParams_internal( &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, - ZSTD_CONTENTSIZE_UNKNOWN); + ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder); } } @@ -1883,8 +1960,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useExternalMatchFinder); size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; @@ -1901,7 +1977,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, - buffInSize, buffOutSize, pledgedSrcSize); + buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder); int resizeWorkspace; FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); @@ -2014,6 +2090,14 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->ldmState.loadedDictEnd = 0; } + /* reserve space for block-level external sequences */ + if (params->useExternalMatchFinder) { + size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); + zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq; + zc->externalMatchCtx.seqBuffer = + (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); + } + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); @@ -2493,11 +2577,13 @@ typedef struct { * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) */ static ZSTD_symbolEncodingTypeStats_t -ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, - const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, - BYTE* dst, const BYTE* const dstEnd, - ZSTD_strategy strategy, unsigned* countWorkspace, - void* entropyWorkspace, size_t entropyWkspSize) { +ZSTD_buildSequencesStatistics( + const seqStore_t* seqStorePtr, size_t nbSeq, + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, unsigned* countWorkspace, + void* entropyWorkspace, size_t entropyWkspSize) +{ BYTE* const ostart = dst; const BYTE* const oend = dstEnd; BYTE* op = ostart; @@ -2616,13 +2702,14 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, */ #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 MEM_STATIC size_t -ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - void* entropyWorkspace, size_t entropyWkspSize, - const int bmi2) +ZSTD_entropyCompressSeqStore_internal( + const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) { const int longOffsets = cctxParams->cParams.windowLog >= STREAM_ACCUMULATOR_MIN; ZSTD_strategy const strategy = cctxParams->cParams.strategy; @@ -2631,7 +2718,7 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; const seqDef* const sequences = seqStorePtr->sequencesStart; - const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; @@ -2649,21 +2736,20 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; - size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; - size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; + size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); /* Base suspicion of uncompressibility on ratio of literals to sequences */ unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); size_t const litSize = (size_t)(seqStorePtr->lit - literals); - HUF_depth_mode depthMode = cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_depth_optimal : HUF_depth_fast; size_t const cSize = ZSTD_compressLiterals( - &prevEntropy->huf, &nextEntropy->huf, - cctxParams->cParams.strategy, - ZSTD_literalsCompressionIsDisabled(cctxParams), op, dstCapacity, literals, litSize, entropyWorkspace, entropyWkspSize, - bmi2, suspectUncompressible, depthMode); + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_literalsCompressionIsDisabled(cctxParams), + suspectUncompressible, bmi2); FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); op += cSize; @@ -2735,14 +2821,15 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, } MEM_STATIC size_t -ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - size_t srcSize, - void* entropyWorkspace, size_t entropyWkspSize, - int bmi2) +ZSTD_entropyCompressSeqStore( + const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* entropyWorkspace, size_t entropyWkspSize, + int bmi2) { size_t const cSize = ZSTD_entropyCompressSeqStore_internal( seqStorePtr, prevEntropy, nextEntropy, cctxParams, @@ -2763,6 +2850,10 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, if (cSize >= maxCSize) return 0; /* block not compressed */ } DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); + /* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly. + * This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above. + */ + assert(cSize < ZSTD_BLOCKSIZE_MAX); return cSize; } @@ -2857,6 +2948,55 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthType = ZSTD_llt_none; } +/* ZSTD_postProcessExternalMatchFinderResult() : + * Validates and post-processes sequences obtained through the external matchfinder API: + * - Checks whether nbExternalSeqs represents an error condition. + * - Appends a block delimiter to outSeqs if one is not already present. + * See zstd.h for context regarding block delimiters. + * Returns the number of sequences after post-processing, or an error code. */ +static size_t ZSTD_postProcessExternalMatchFinderResult( + ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize +) { + RETURN_ERROR_IF( + nbExternalSeqs > outSeqsCapacity, + externalMatchFinder_failed, + "External matchfinder returned error code %lu", + (unsigned long)nbExternalSeqs + ); + + RETURN_ERROR_IF( + nbExternalSeqs == 0 && srcSize > 0, + externalMatchFinder_failed, + "External matchfinder produced zero sequences for a non-empty src buffer!" + ); + + if (srcSize == 0) { + ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); + return 1; + } + + { + ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; + + /* We can return early if lastSeq is already a block delimiter. */ + if (lastSeq.offset == 0 && lastSeq.matchLength == 0) { + return nbExternalSeqs; + } + + /* This error condition is only possible if the external matchfinder + * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ + RETURN_ERROR_IF( + nbExternalSeqs == outSeqsCapacity, + externalMatchFinder_failed, + "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" + ); + + /* lastSeq is not a block delimiter, so we need to append one. */ + ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); + return nbExternalSeqs + 1; + } +} + typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -2904,6 +3044,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } if (zc->externSeqStore.pos < zc->externSeqStore.size) { assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); + + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchFinder, + parameter_combination_unsupported, + "Long-distance matching with external matchfinder enabled is not currently supported." + ); + /* Updates ldmSeqStore.pos */ lastLLSize = ZSTD_ldm_blockCompress(&zc->externSeqStore, @@ -2915,6 +3064,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + zc->appliedParams.useExternalMatchFinder, + parameter_combination_unsupported, + "Long-distance matching with external matchfinder enabled is not currently supported." + ); + ldmSeqStore.seq = zc->ldmSequences; ldmSeqStore.capacity = zc->maxNbLdmSequences; /* Updates ldmSeqStore.size */ @@ -2929,10 +3086,64 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else { /* not long range mode */ + } else if (zc->appliedParams.useExternalMatchFinder) { + assert( + zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) + ); + assert(zc->externalMatchCtx.mFinder != NULL); + + { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog; + + size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)( + zc->externalMatchCtx.mState, + zc->externalMatchCtx.seqBuffer, + zc->externalMatchCtx.seqBufferCapacity, + src, srcSize, + NULL, 0, /* dict and dictSize, currently not supported */ + zc->appliedParams.compressionLevel, + windowSize + ); + + size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult( + zc->externalMatchCtx.seqBuffer, + nbExternalSeqs, + zc->externalMatchCtx.seqBufferCapacity, + srcSize + ); + + /* Return early if there is no error, since we don't need to worry about last literals */ + if (!ZSTD_isError(nbPostProcessedSeqs)) { + ZSTD_sequencePosition seqPos = {0,0,0}; + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize + ); + ms->ldmSeqStore = NULL; + DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); + return ZSTDbss_compress; + } + + /* Propagate the error if fallback is disabled */ + if (!zc->appliedParams.enableMatchFinderFallback) { + return nbPostProcessedSeqs; + } + + /* Fallback to software matchfinder */ + { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + DEBUGLOG( + 5, + "External matchfinder returned error code %lu. Falling back to internal matchfinder.", + (unsigned long)nbExternalSeqs + ); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } } + } else { /* not long range mode and no external matchfinder */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->appliedParams.useRowMatchFinder, dictMode); + assert(zc->externalMatchCtx.mFinder == NULL); ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } @@ -3055,19 +3266,17 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) { const size_t unrollMask = unrollSize - 1; const size_t prefixLength = length & unrollMask; size_t i; - size_t u; if (length == 1) return 1; /* Check if prefix is RLE first before using unrolled loop */ if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { return 0; } for (i = prefixLength; i != length; i += unrollSize) { + size_t u; for (u = 0; u < unrollSize; u += sizeof(size_t)) { if (MEM_readST(ip + i + u) != valueST) { return 0; - } - } - } + } } } return 1; } @@ -3083,7 +3292,8 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore) return nbSeqs < 4 && nbLits < 10; } -static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) +static void +ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) { ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; bs->prevCBlock = bs->nextCBlock; @@ -3091,7 +3301,9 @@ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* c } /* Writes the block header */ -static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { +static void +writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) +{ U32 const cBlockHeader = cSize == 1 ? lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); @@ -3104,13 +3316,16 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB * Stores literals block type (raw, rle, compressed, repeat) and * huffman description table to hufMetadata. * Requires ENTROPY_WORKSPACE_SIZE workspace - * @return : size of huffman description table or error code */ -static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, - const ZSTD_hufCTables_t* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_hufCTablesMetadata_t* hufMetadata, - const int literalsCompressionIsDisabled, - void* workspace, size_t wkspSize, HUF_depth_mode depthMode) + * @return : size of huffman description table, or an error code + */ +static size_t +ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int literalsCompressionIsDisabled, + void* workspace, size_t wkspSize, + HUF_depth_mode depthMode) { BYTE* const wkspStart = (BYTE*)workspace; BYTE* const wkspEnd = wkspStart + wkspSize; @@ -3135,33 +3350,37 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi /* small ? don't even attempt compression (speed opt) */ #ifndef COMPRESS_LITERALS_SIZE_MIN -#define COMPRESS_LITERALS_SIZE_MIN 63 +# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */ #endif { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; if (srcSize <= minLitSize) { DEBUGLOG(5, "set_basic - too small"); hufMetadata->hType = set_basic; return 0; - } - } + } } /* Scan input and build symbol stats */ - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + { size_t const largest = + HIST_count_wksp (countWksp, &maxSymbolValue, + (const BYTE*)src, srcSize, + workspace, wkspSize); FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); if (largest == srcSize) { + /* only one literal symbol */ DEBUGLOG(5, "set_rle"); hufMetadata->hType = set_rle; return 0; } if (largest <= (srcSize >> 7)+4) { + /* heuristic: likely not compressible */ DEBUGLOG(5, "set_basic - no gain"); hufMetadata->hType = set_basic; return 0; - } - } + } } /* Validate the previous Huffman table */ - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + if (repeat == HUF_repeat_check + && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { repeat = HUF_repeat_none; } @@ -3174,35 +3393,34 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi nodeWksp, nodeWkspSize); FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); huffLog = (U32)maxBits; - { /* Build and write the CTable */ - size_t const newCSize = HUF_estimateCompressedSize( - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); - size_t const hSize = HUF_writeCTable_wksp( - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - /* Check against repeating the previous CTable */ - if (repeat != HUF_repeat_none) { - size_t const oldCSize = HUF_estimateCompressedSize( - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { - DEBUGLOG(5, "set_repeat - smaller"); - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_repeat; - return 0; - } - } - if (newCSize + hSize >= srcSize) { - DEBUGLOG(5, "set_basic - no gains"); + } + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable_wksp( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_basic; + hufMetadata->hType = set_repeat; return 0; - } - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); - hufMetadata->hType = set_compressed; - nextHuf->repeatMode = HUF_repeat_check; - return hSize; + } } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; } } @@ -3212,7 +3430,8 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi * and updates nextEntropy to the appropriate repeatMode. */ static ZSTD_symbolEncodingTypeStats_t -ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { +ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) +{ ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; nextEntropy->litlength_repeatMode = FSE_repeat_none; nextEntropy->offcode_repeatMode = FSE_repeat_none; @@ -3224,16 +3443,18 @@ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { * Builds entropy for the sequences. * Stores symbol compression modes and fse table to fseMetadata. * Requires ENTROPY_WORKSPACE_SIZE wksp. - * @return : size of fse tables or error code */ -static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, - const ZSTD_fseCTables_t* prevEntropy, - ZSTD_fseCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize) + * @return : size of fse tables or error code */ +static size_t +ZSTD_buildBlockEntropyStats_sequences( + const seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) { ZSTD_strategy const strategy = cctxParams->cParams.strategy; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); BYTE* const ostart = fseMetadata->fseTablesBuffer; BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* op = ostart; @@ -3260,18 +3481,20 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, /** ZSTD_buildBlockEntropyStats() : * Builds entropy for the block. * Requires workspace size ENTROPY_WORKSPACE_SIZE - * - * @return : 0 on success or error code + * @return : 0 on success, or an error code + * Note : also employed in superblock */ -size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) +size_t ZSTD_buildBlockEntropyStats( + const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) { - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - HUF_depth_mode depthMode = cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_depth_optimal : HUF_depth_fast; + size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); + int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD); + HUF_depth_mode const depthMode = huf_useOptDepth ? HUF_depth_optimal : HUF_depth_fast; entropyMetadata->hufMetadata.hufDesSize = ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, @@ -3292,11 +3515,12 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, } /* Returns the size estimate for the literals section (header + content) of a block */ -static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, - const ZSTD_hufCTables_t* huf, - const ZSTD_hufCTablesMetadata_t* hufMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) +static size_t +ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) { unsigned* const countWksp = (unsigned*)workspace; unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; @@ -3318,12 +3542,13 @@ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSiz } /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ -static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, - const BYTE* codeTable, size_t nbSeq, unsigned maxCode, - const FSE_CTable* fseCTable, - const U8* additionalBits, - short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, - void* workspace, size_t wkspSize) +static size_t +ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, size_t nbSeq, unsigned maxCode, + const FSE_CTable* fseCTable, + const U8* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) { unsigned* const countWksp = (unsigned*)workspace; const BYTE* ctp = codeTable; @@ -3355,99 +3580,107 @@ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, } /* Returns the size estimate for the sequences section (header + content) of a block */ -static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_fseCTables_t* fseTables, - const ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) +static size_t +ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) { size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); size_t cSeqSizeEstimate = 0; cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, - fseTables->offcodeCTable, NULL, - OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - workspace, wkspSize); + fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, - fseTables->litlengthCTable, LL_bits, - LL_defaultNorm, LL_defaultNormLog, MaxLL, - workspace, wkspSize); + fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, - fseTables->matchlengthCTable, ML_bits, - ML_defaultNorm, ML_defaultNormLog, MaxML, - workspace, wkspSize); + fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; return cSeqSizeEstimate + sequencesSectionHeaderSize; } /* Returns the size estimate for a given stream of literals, of, ll, ml */ -static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, - const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize, - int writeLitEntropy, int writeSeqEntropy) { +static size_t +ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) +{ size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, - &entropy->huf, &entropyMetadata->hufMetadata, - workspace, wkspSize, writeLitEntropy); + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, - nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, - workspace, wkspSize, writeSeqEntropy); + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); return seqSize + literalsSize + ZSTD_blockHeaderSize; } /* Builds entropy statistics and uses them for blocksize estimation. * - * Returns the estimated compressed size of the seqStore, or a zstd error. + * @return: estimated compressed size of the seqStore, or a zstd error. */ -static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { - ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; +static size_t +ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) +{ + ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata; DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, entropyMetadata, - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); - return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), ""); + return ZSTD_estimateBlockSize( + seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), seqStore->ofCode, seqStore->llCode, seqStore->mlCode, (size_t)(seqStore->sequences - seqStore->sequencesStart), - &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + &zc->blockState.nextCBlock->entropy, + entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); } /* Returns literals bytes represented in a seqStore */ -static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { +static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) +{ size_t literalsBytes = 0; - size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t i; for (i = 0; i < nbSeqs; ++i) { - seqDef seq = seqStore->sequencesStart[i]; + seqDef const seq = seqStore->sequencesStart[i]; literalsBytes += seq.litLength; if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { literalsBytes += 0x10000; - } - } + } } return literalsBytes; } /* Returns match bytes represented in a seqStore */ -static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { +static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) +{ size_t matchBytes = 0; - size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); size_t i; for (i = 0; i < nbSeqs; ++i) { seqDef seq = seqStore->sequencesStart[i]; matchBytes += seq.mlBase + MINMATCH; if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { matchBytes += 0x10000; - } - } + } } return matchBytes; } @@ -3496,6 +3729,7 @@ ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, c U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */ assert(OFFBASE_IS_REPCODE(offBase)); if (adjustedRepCode == ZSTD_REP_NUM) { + assert(ll0); /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 * This is only valid if it results in a valid offset value, aka > 0. * Note : it may happen that `rep[0]==1` in exceptional circumstances. @@ -3521,14 +3755,16 @@ ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, c * 1-3 : repcode 1-3 * 4+ : real_offset+3 */ -static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, - seqStore_t* const seqStore, U32 const nbSeq) { +static void +ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, + const seqStore_t* const seqStore, U32 const nbSeq) +{ U32 idx = 0; for (; idx < nbSeq; ++idx) { seqDef* const seq = seqStore->sequencesStart + idx; U32 const ll0 = (seq->litLength == 0); U32 const offBase = seq->offBase; - assert(seq->offBase > 0); + assert(offBase > 0); if (OFFBASE_IS_REPCODE(offBase)) { U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0); U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0); @@ -3537,7 +3773,7 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_ * repcode history. */ if (dRawOffset != cRawOffset) { - seq->offBase = cRawOffset + ZSTD_REP_NUM; + seq->offBase = OFFSET_TO_OFFBASE(cRawOffset); } } /* Compression repcode history is always updated with values directly from the unmodified seqStore. @@ -3554,10 +3790,11 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_ * Returns the total size of that block (including header) or a ZSTD error code. */ static size_t -ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, +ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, + const seqStore_t* const seqStore, repcodes_t* const dRep, repcodes_t* const cRep, void* dst, size_t dstCapacity, - const void* src, size_t srcSize, + const void* src, size_t srcSize, U32 lastBlock, U32 isPartition) { const U32 rleMaxLength = 25; @@ -3631,10 +3868,11 @@ typedef struct { /* Helper function to perform the recursive search for block splits. * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. - * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then - * we do not recurse. + * If advantageous to split, then we recurse down the two sub-blocks. + * If not, or if an error occurred in estimation, then we do not recurse. * - * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. + * Note: The recursion depth is capped by a heuristic minimum number of sequences, + * defined by MIN_SEQUENCES_BLOCK_SPLITTING. * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). * In practice, recursion depth usually doesn't go beyond 4. * @@ -3646,19 +3884,20 @@ static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, ZSTD_CCtx* zc, const seqStore_t* origSeqStore) { - seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; - seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; - seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; + seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; + seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; + seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; size_t estimatedOriginalSize; size_t estimatedFirstHalfSize; size_t estimatedSecondHalfSize; size_t midIdx = (startIdx + endIdx)/2; + DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); + assert(endIdx >= startIdx); if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { - DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences"); + DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx); return; } - DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); @@ -3679,15 +3918,16 @@ ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t end } } -/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. +/* Base recursive function. + * Populates a table with intra-block partition indices that can improve compression ratio. * - * Returns the number of splits made (which equals the size of the partition table - 1). + * @return: number of splits made (which equals the size of the partition table - 1). */ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { seqStoreSplits splits = {partitions, 0}; if (nbSeq <= 4) { - DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split"); + DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq); /* Refuse to try and split anything with less than 4 sequences */ return 0; } @@ -3703,18 +3943,20 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) * Returns combined size of all blocks (which includes headers), or a ZSTD error code. */ static size_t -ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) +ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t blockSize, + U32 lastBlock, U32 nbSeq) { size_t cSize = 0; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; size_t i = 0; size_t srcBytesTotal = 0; - U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ - seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; - seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; - size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); + U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ + seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore; + seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore; + size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two @@ -3736,19 +3978,21 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); - DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); if (numSplits == 0) { - size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, - &dRep, &cRep, - op, dstCapacity, - ip, blockSize, - lastBlock, 0 /* isPartition */); + size_t cSizeSingleBlock = + ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, + &dRep, &cRep, + op, dstCapacity, + ip, blockSize, + lastBlock, 0 /* isPartition */); FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); - assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX); + assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize); return cSizeSingleBlock; } @@ -3773,7 +4017,8 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac op, dstCapacity, ip, srcBytes, lastBlockEntireSrc, 1 /* isPartition */); - DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); + DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size", + ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); ip += srcBytes; @@ -3781,10 +4026,10 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac dstCapacity -= cSizeChunk; cSize += cSizeChunk; *currSeqStore = *nextSeqStore; - assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize); } - /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes - * for the next block. + /* cRep and dRep may have diverged during the compression. + * If so, we use the dRep repcodes for the next block. */ ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); return cSize; @@ -3795,8 +4040,6 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) { - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; U32 nbSeq; size_t cSize; DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); @@ -3807,7 +4050,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, if (bss == ZSTDbss_noCompress) { if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); return cSize; @@ -3919,10 +4162,11 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, * * cSize >= blockBound(srcSize): We have expanded the block too much so * emit an uncompressed block. */ - { - size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + { size_t const cSize = + ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); if (cSize != ERROR(dstSize_tooSmall)) { - size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + size_t const maxCSize = + srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); @@ -3930,7 +4174,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, } } } - } + } /* if (bss == ZSTDbss_compress)*/ DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); /* Superblock compression failed, attempt to emit a single no compress block. @@ -3988,7 +4232,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, * All blocks will be terminated, all input will be consumed. * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. * Frame is supposed already started (header already produced) -* @return : compressed size, or an error code +* @return : compressed size, or an error code */ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, @@ -4053,7 +4297,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, MEM_writeLE24(op, cBlockHeader); cSize += ZSTD_blockHeaderSize; } - } + } /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/ ip += blockSize; @@ -5682,6 +5926,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); + params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences); #ifdef ZSTD_MULTITHREAD if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { @@ -5883,12 +6128,6 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, } } -typedef struct { - U32 idx; /* Index in array of ZSTD_Sequence */ - U32 posInSequence; /* Position within sequence at idx */ - size_t posInSrc; /* Number of bytes given by sequences provided so far */ -} ZSTD_sequencePosition; - /* ZSTD_validateSequence() : * @offCode : is presumed to follow format required by ZSTD_storeSeq() * @returns a ZSTD error code if sequence is not valid @@ -5926,10 +6165,7 @@ static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 return offBase; } -/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of - * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. - */ -static size_t +size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, @@ -5983,19 +6219,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, return 0; } -/* Returns the number of bytes to move the current read position back by. - * Only non-zero if we ended up splitting a sequence. - * Otherwise, it may return a ZSTD error if something went wrong. - * - * This function will attempt to scan through blockSize bytes - * represented by the sequences in @inSeqs, - * storing any (partial) sequences. - * - * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to - * avoid splitting a match, or to avoid splitting a match such that it would produce a match - * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. - */ -static size_t +size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const void* src, size_t blockSize) @@ -6528,3 +6752,19 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); } + +void ZSTD_registerExternalMatchFinder( + ZSTD_CCtx* zc, void* mState, + ZSTD_externalMatchFinder_F* mFinder +) { + ZSTD_externalMatchCtx emctx = { + mState, + mFinder, + + /* seqBuffer is allocated later (from the cwskp) */ + NULL, /* seqBuffer */ + 0 /* seqBufferCapacity */ + }; + zc->externalMatchCtx = emctx; + zc->requestedParams.useExternalMatchFinder = 1; +} diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index baa726f7d..300c2707d 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -118,12 +118,13 @@ typedef struct { /** ZSTD_buildBlockEntropyStats() : * Builds entropy for the block. * @return : 0 on success or error code */ -size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize); +size_t ZSTD_buildBlockEntropyStats( + const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize); /********************************* * Compression internals structs * @@ -149,6 +150,12 @@ typedef struct { size_t capacity; /* The capacity starting from `seq` pointer */ } rawSeqStore_t; +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; typedef struct { @@ -339,6 +346,18 @@ struct ZSTD_CCtx_params_s { /* Controls prefetching in some dictMatchState matchfinders */ ZSTD_paramSwitch_e prefetchCDictTables; + + /* Controls whether zstd will fall back to an internal matchfinder + * if the external matchfinder returns an error code. */ + int enableMatchFinderFallback; + + /* Indicates whether an external matchfinder has been referenced. + * Users can't set this externally. + * It is set internally in ZSTD_registerExternalMatchFinder(). */ + int useExternalMatchFinder; + + /* Adjust the max block size*/ + size_t maxBlockSize; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) @@ -370,6 +389,14 @@ typedef struct { ZSTD_entropyCTablesMetadata_t entropyMetadata; } ZSTD_blockSplitCtx; +/* Context for block-level external matchfinder API */ +typedef struct { + void* mState; + ZSTD_externalMatchFinder_F* mFinder; + ZSTD_Sequence* seqBuffer; + size_t seqBufferCapacity; +} ZSTD_externalMatchCtx; + struct ZSTD_CCtx_s { ZSTD_compressionStage_e stage; int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ @@ -439,6 +466,9 @@ struct ZSTD_CCtx_s { /* Workspace for block splitter */ ZSTD_blockSplitCtx blockSplitCtx; + + /* Workspace for external matchfinder */ + ZSTD_externalMatchCtx externalMatchCtx; }; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; @@ -1410,4 +1440,31 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); */ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + * Note that the block delimiter must include the last literals of the block. + */ +size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); + +/* Returns the number of bytes to move the current read position back by. + * Only non-zero if we ended up splitting a sequence. + * Otherwise, it may return a ZSTD error if something went wrong. + * + * This function will attempt to scan through blockSize bytes + * represented by the sequences in @inSeqs, + * storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +size_t +ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); + #endif /* ZSTD_COMPRESS_H */ diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c index ea80a45c8..666e5315d 100644 --- a/lib/compress/zstd_compress_literals.c +++ b/lib/compress/zstd_compress_literals.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -92,16 +92,37 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* return flSize+1; } -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2, - unsigned suspectUncompressible, HUF_depth_mode depthMode) +/* ZSTD_minLiteralsToCompress() : + * returns minimal amount of literals + * for literal compression to even be attempted. + * Minimum is made tighter as compression strategy increases. + */ +static size_t +ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) +{ + assert((int)strategy >= 0); + assert((int)strategy <= 9); + /* btultra2 : min 8 bytes; + * then 2x larger for each successive compression strategy + * max threshold 64 bytes */ + { int const shift = MIN(9-strategy, 3); + size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : 8 << shift; + DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); + return mintc; + } +} + +size_t ZSTD_compressLiterals ( + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, + int disableLiteralCompression, + int suspectUncompressible, + int bmi2) { - size_t const minGain = ZSTD_minGain(srcSize, strategy); size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); BYTE* const ostart = (BYTE*)dst; U32 singleStream = srcSize < 256; @@ -119,15 +140,14 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, if (disableLiteralCompression) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } + /* if too small, don't even attempt compression (speed opt) */ + if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); { HUF_repeat repeat = prevHuf->repeatMode; int const preferRepeat = (strategy < ZSTD_lazy) ? srcSize <= 1024 : 0; + HUF_depth_mode const depthMode = (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD) ? HUF_depth_optimal : HUF_depth_fast; typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int, int, unsigned, HUF_depth_mode); huf_compress_f huf_compress; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; @@ -146,10 +166,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, } } - if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } + { size_t const minGain = ZSTD_minGain(srcSize, strategy); + if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } } if (cLitSize==1) { ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); @@ -164,16 +185,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, switch(lhSize) { case 3: /* 2 - 2 - 10 - 10 */ + if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); MEM_writeLE24(ostart, lhc); break; } case 4: /* 2 - 2 - 14 - 14 */ + assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); MEM_writeLE32(ostart, lhc); break; } case 5: /* 2 - 2 - 18 - 18 */ + assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); MEM_writeLE32(ostart, lhc); ostart[4] = (BYTE)(cLitSize >> 10); diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h index bb260db9b..9eb74729d 100644 --- a/lib/compress/zstd_compress_literals.h +++ b/lib/compress/zstd_compress_literals.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -18,14 +18,18 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); -/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, +/* ZSTD_compressLiterals(): + * @entropyWorkspace: must be aligned on 4-bytes boundaries + * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE + * @suspectUncompressible: sampling checks, to potentially skip huffman coding + */ +size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize, void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2, - unsigned suspectUncompressible, HUF_depth_mode depthMode); + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + int suspectUncompressible, + int bmi2); #endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c index 2c1eee567..8872d4d35 100644 --- a/lib/compress/zstd_compress_sequences.c +++ b/lib/compress/zstd_compress_sequences.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_compress_sequences.h b/lib/compress/zstd_compress_sequences.h index 7991364c2..4a3a05da9 100644 --- a/lib/compress/zstd_compress_sequences.h +++ b/lib/compress/zstd_compress_sequences.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index eed58e7cf..1971ec646 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_compress_superblock.h b/lib/compress/zstd_compress_superblock.h index 176f9b106..8e494f0d5 100644 --- a/lib/compress/zstd_compress_superblock.h +++ b/lib/compress/zstd_compress_superblock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h index 47afe3dc7..70a359a27 100644 --- a/lib/compress/zstd_cwksp.h +++ b/lib/compress/zstd_cwksp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index c2dbd54c1..0ad88ffc7 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h index 6d8ee8c65..6f0047c4b 100644 --- a/lib/compress/zstd_double_fast.h +++ b/lib/compress/zstd_double_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 291173449..5f2c6a2ed 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h index 3bfeb2c5f..9e4236b47 100644 --- a/lib/compress/zstd_fast.h +++ b/lib/compress/zstd_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index d5a7b5cbf..810bf011c 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index 150f7b390..c24f1c794 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index c14c62454..3d74ff19e 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 4e68dbf52..f147021d2 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_ldm_geartab.h b/lib/compress/zstd_ldm_geartab.h index 647f865be..ef34bc5c9 100644 --- a/lib/compress/zstd_ldm_geartab.h +++ b/lib/compress/zstd_ldm_geartab.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 800f87e9e..68d24aa7b 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -16,7 +16,7 @@ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_MAX_PRICE (1<<30) -#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ +#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ /*-************************************* @@ -26,27 +26,35 @@ #if 0 /* approximation at bit level (for tests) */ # define BITCOST_ACCURACY 0 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) +# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat)) #elif 0 /* fractional bit accuracy (for tests) */ # define BITCOST_ACCURACY 8 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat)) #else /* opt==approx, ultra==accurate */ # define BITCOST_ACCURACY 8 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) #endif +/* ZSTD_bitWeight() : + * provide estimated "cost" of a stat in full bits only */ MEM_STATIC U32 ZSTD_bitWeight(U32 stat) { return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); } +/* ZSTD_fracWeight() : + * provide fractional-bit "cost" of a stat, + * using linear interpolation approximation */ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) { U32 const stat = rawStat + 1; U32 const hb = ZSTD_highbit32(stat); U32 const BWeight = hb * BITCOST_MULTIPLIER; + /* Fweight was meant for "Fractional weight" + * but it's effectively a value between 1 and 2 + * using fixed point arithmetic */ U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; U32 const weight = BWeight + FWeight; assert(hb + BITCOST_ACCURACY < 31); @@ -88,20 +96,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts) return total; } -static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift) +typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e; + +static U32 +ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1) { U32 s, sum=0; - DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); + DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", + (unsigned)lastEltIndex+1, (unsigned)shift ); assert(shift < 30); for (s=0; s> shift); - sum += table[s]; + unsigned const base = base1 ? 1 : (table[s]>0); + unsigned const newStat = base + (table[s] >> shift); + sum += newStat; + table[s] = newStat; } return sum; } /* ZSTD_scaleStats() : - * reduce all elements in table is sum too large + * reduce all elt frequencies in table if sum too large * return the resulting sum of elements */ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) { @@ -110,7 +124,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); assert(logTarget < 30); if (factor <= 1) return prevsum; - return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); + return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); } /* ZSTD_rescaleFreqs() : @@ -129,18 +143,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); optPtr->priceType = zop_dynamic; - if (optPtr->litLengthSum == 0) { /* first block : init */ - if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ - DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); + if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */ + + /* heuristic: use pre-defined stats for too small inputs */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { + DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD); optPtr->priceType = zop_predef; } assert(optPtr->symbolCosts != NULL); if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { - /* huffman table presumed generated by dictionary */ + + /* huffman stats covering the full value set : table presumed generated by dictionary */ optPtr->priceType = zop_dynamic; if (compressedLiterals) { + /* generate literals statistics from huffman table */ unsigned lit; assert(optPtr->litFreq != NULL); optPtr->litSum = 0; @@ -188,13 +206,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->offCodeSum += optPtr->offCodeFreq[of]; } } - } else { /* not a dictionary */ + } else { /* first block, no dictionary */ assert(optPtr->litFreq != NULL); if (compressedLiterals) { + /* base initial cost of literals on direct frequency within src */ unsigned lit = MaxLit; HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ - optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); + optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); } { unsigned const baseLLfreqs[MaxLL+1] = { @@ -224,10 +243,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); } - } - } else { /* new block : re-use previous statistics, scaled down */ + } else { /* new block : scale down accumulated statistics */ if (compressedLiterals) optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); @@ -275,10 +293,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP assert(litLength <= ZSTD_BLOCKSIZE_MAX); if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); - /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX - * because it isn't representable in the zstd format. So instead just - * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block - * would be all literals. + + /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX + * because it isn't representable in the zstd format. + * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. + * In such a case, the block would be all literals. */ if (litLength == ZSTD_BLOCKSIZE_MAX) return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); @@ -292,7 +311,7 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP } /* ZSTD_getMatchPrice() : - * Provides the cost of the match part (offset + matchLength) of a sequence + * Provides the cost of the match part (offset + matchLength) of a sequence. * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq() * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) @@ -308,8 +327,9 @@ ZSTD_getMatchPrice(U32 const offBase, U32 const mlBase = matchLength - MINMATCH; assert(matchLength >= MINMATCH); - if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ - return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */ + return WEIGHT(mlBase, optLevel) + + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */ /* dynamic statistics */ price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); @@ -347,7 +367,7 @@ static void ZSTD_updateStats(optState_t* const optPtr, optPtr->litLengthSum++; } - /* offset code : expected to follow storeSeq() numeric representation */ + /* offset code : follows storeSeq() numeric representation */ { U32 const offCode = ZSTD_highbit32(offBase); assert(offCode <= MaxOff); optPtr->offCodeFreq[offCode]++; @@ -1352,7 +1372,7 @@ size_t ZSTD_compressBlock_btopt( /* ZSTD_initStats_ultra(): * make a first compression pass, just to seed stats with more accurate starting values. * only works on first block, with no dictionary and no ldm. - * this function cannot error, hence its contract must be respected. + * this function cannot error out, its narrow contract must be respected. */ static void ZSTD_initStats_ultra(ZSTD_matchState_t* ms, @@ -1371,7 +1391,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ - /* invalidate first scan from history */ + /* invalidate first scan from history, only keep entropy stats */ ZSTD_resetSeqStore(seqStore); ms->window.base -= srcSize; ms->window.dictLimit += (U32)srcSize; @@ -1395,20 +1415,20 @@ size_t ZSTD_compressBlock_btultra2( U32 const curr = (U32)((const BYTE*)src - ms->window.base); DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); - /* 2-pass strategy: + /* 2-passes strategy: * this strategy makes a first pass over first block to collect statistics - * and seed next round's statistics with it. - * After 1st pass, function forgets everything, and starts a new block. + * in order to seed next round's statistics with it. + * After 1st pass, function forgets history, and starts a new block. * Consequently, this can only work if no data has been previously loaded in tables, * aka, no dictionary, no prefix, no ldm preprocessing. * The compression ratio gain is generally small (~0.5% on first block), - * the cost is 2x cpu time on first block. */ + ** the cost is 2x cpu time on first block. */ assert(srcSize <= ZSTD_BLOCKSIZE_MAX); if ( (ms->opt.litLengthSum==0) /* first block */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ - && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ - && (srcSize > ZSTD_PREDEF_THRESHOLD) + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */ ) { ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); } diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 627255f53..342e5a311 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 0c10eb603..7a2c71720 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 271eb1ac7..ed4dc0e99 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index c6fd92860..e5c35dae9 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -1,7 +1,7 @@ /* ****************************************************************** * huff0 huffman decoder, * part of Finite State Entropy library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -289,10 +289,11 @@ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decodi static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { U64 D4; if (MEM_isLittleEndian()) { - D4 = (symbol << 8) + nbBits; + D4 = (U64)((symbol << 8) + nbBits); } else { - D4 = symbol + (nbBits << 8); + D4 = (U64)(symbol + (nbBits << 8)); } + assert(D4 < (1U << 16)); D4 *= 0x0001000100010001ULL; return D4; } @@ -383,9 +384,8 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr * rankStart[0] is not filled because there are no entries in the table for * weight 0. */ - { - int n; - int nextRankStart = 0; + { int n; + U32 nextRankStart = 0; int const unroll = 4; int const nLimit = (int)nbSymbols - unroll + 1; for (n=0; n<(int)tableLog+1; n++) { @@ -412,10 +412,9 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr * We can switch based on the length to a different inner loop which is * optimized for that particular case. */ - { - U32 w; - int symbol=wksp->rankVal[0]; - int rankStart=0; + { U32 w; + int symbol = wksp->rankVal[0]; + int rankStart = 0; for (w=1; wrankVal[w]; int const length = (1 << w) >> 1; @@ -525,7 +524,7 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons while (p < pEnd) HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - return pEnd-pStart; + return (size_t)(pEnd-pStart); } FORCE_INLINE_TEMPLATE size_t @@ -551,6 +550,10 @@ HUF_decompress1X1_usingDTable_internal_body( return dstSize; } +/* HUF_decompress4X1_usingDTable_internal_body(): + * Conditions : + * @dstSize >= 6 + */ FORCE_INLINE_TEMPLATE size_t HUF_decompress4X1_usingDTable_internal_body( void* dst, size_t dstSize, @@ -594,6 +597,7 @@ HUF_decompress4X1_usingDTable_internal_body( if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ + if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); @@ -679,8 +683,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm( const BYTE* const iend = (const BYTE*)cSrc + 6; BYTE* const oend = (BYTE*)dst + dstSize; HUF_DecompressAsmArgs args; - { - size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); + { size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); FORWARD_IF_ERROR(ret, "Failed to init asm args"); if (ret != 0) return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); @@ -700,8 +703,7 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm( (void)iend; /* finish bit streams one by one. */ - { - size_t const segmentSize = (dstSize+3) / 4; + { size_t const segmentSize = (dstSize+3) / 4; BYTE* segmentEnd = (BYTE*)dst; int i; for (i = 0; i < 4; ++i) { @@ -1246,6 +1248,11 @@ HUF_decompress1X2_usingDTable_internal_body( /* decoded size */ return dstSize; } + +/* HUF_decompress4X2_usingDTable_internal_body(): + * Conditions: + * @dstSize >= 6 + */ FORCE_INLINE_TEMPLATE size_t HUF_decompress4X2_usingDTable_internal_body( void* dst, size_t dstSize, @@ -1286,8 +1293,9 @@ HUF_decompress4X2_usingDTable_internal_body( DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ + if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S index 3f0e5c26c..8d7c7a644 100644 --- a/lib/decompress/huf_decompress_amd64.S +++ b/lib/decompress/huf_decompress_amd64.S @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c index 6ffa35f6e..3647ad0a7 100644 --- a/lib/decompress/zstd_ddict.c +++ b/lib/decompress/zstd_ddict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/zstd_ddict.h b/lib/decompress/zstd_ddict.h index bd03268b5..c4ca8877a 100644 --- a/lib/decompress/zstd_ddict.h +++ b/lib/decompress/zstd_ddict.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index e95b8822f..f00ef3a67 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -785,7 +785,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize frameSizeInfo.compressedSize = (size_t)(ip - ipstart); frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) ? zfh.frameContentSize - : nbBlocks * zfh.blockSizeMax; + : (unsigned long long)nbBlocks * zfh.blockSizeMax; return frameSizeInfo; } } diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index e1ff21582..853ddeae1 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -166,6 +166,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + if (!singleStream) + RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, + "Not enough literals (%zu) for the 4-streams mode (min %u)", + litSize, MIN_LITERALS_FOR_4_STREAMS); RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); @@ -181,6 +185,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); } else { + assert(litSize >= MIN_LITERALS_FOR_4_STREAMS); hufSuccess = HUF_decompress4X_usingDTable_bmi2( dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); @@ -509,7 +514,8 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, for (i = 8; i < n; i += 8) { MEM_write64(spread + pos + i, sv); } - pos += n; + assert(n>=0); + pos += (size_t)n; } } /* Now we spread those positions across the table. @@ -2010,12 +2016,20 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. * We don't expect that to be the case in 64-bit mode. * In block mode, window size is not known, so we have to be conservative. - * (note: but it could be evaluated from current-lowLimit) + * (note: it could possibly be evaluated from current-lowLimit) */ ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); - RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + /* Note : the wording of the specification + * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX. + * This generally does not happen, as it makes little sense, + * since an uncompressed block would feature same size and have no decompression cost. + * Also, note that decoder from reference libzstd before < v1.5.4 + * would consider this edge case as an error. + * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX + * for broader compatibility with the deployed ecosystem of zstd decoders */ + RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); /* Decode literals section */ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); diff --git a/lib/decompress/zstd_decompress_block.h b/lib/decompress/zstd_decompress_block.h index c61a9d0c4..67791dbc3 100644 --- a/lib/decompress/zstd_decompress_block.h +++ b/lib/decompress/zstd_decompress_block.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h index 91e9dceb5..32685f2d3 100644 --- a/lib/decompress/zstd_decompress_internal.h +++ b/lib/decompress/zstd_decompress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/deprecated/zbuff.h b/lib/deprecated/zbuff.h index b83ea0fed..a968245b3 100644 --- a/lib/deprecated/zbuff.h +++ b/lib/deprecated/zbuff.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/deprecated/zbuff_common.c b/lib/deprecated/zbuff_common.c index e7d01a081..5a2f2db35 100644 --- a/lib/deprecated/zbuff_common.c +++ b/lib/deprecated/zbuff_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/deprecated/zbuff_compress.c b/lib/deprecated/zbuff_compress.c index 51cf158c4..1d8682150 100644 --- a/lib/deprecated/zbuff_compress.c +++ b/lib/deprecated/zbuff_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/deprecated/zbuff_decompress.c b/lib/deprecated/zbuff_decompress.c index d73c0f35f..51159ef53 100644 --- a/lib/deprecated/zbuff_decompress.c +++ b/lib/deprecated/zbuff_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 724675d30..18c8ec538 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -647,7 +647,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) { - const double ratio = (double)nbDmers / maxDictSize; + const double ratio = (double)nbDmers / (double)maxDictSize; if (ratio >= 10) { return; } @@ -1040,7 +1040,7 @@ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBuffe return COVER_dictSelectionError(totalCompressedSize); } - if (totalCompressedSize <= largestCompressed * regressionTolerance) { + if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) { COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; free(largestDictbuffer); return selection; diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h index 1aacdddd6..252624bde 100644 --- a/lib/dictBuilder/cover.h +++ b/lib/dictBuilder/cover.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c index 63a2cee72..46bba0120 100644 --- a/lib/dictBuilder/fastcover.c +++ b/lib/dictBuilder/fastcover.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index a932276ff..f41d505f8 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -373,7 +373,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const elt = table[u]; /* sort : improve rank */ while ((u>1) && (table[u-1].savings < elt.savings)) - table[u] = table[u-1], u--; + table[u] = table[u-1], u--; table[u] = elt; return u; } } @@ -524,7 +524,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize, if (solution.length==0) { cursor++; continue; } ZDICT_insertDictItem(dictList, dictListSize, solution, buffer); cursor += solution.length; - DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100); + DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0); } } _cleanup: diff --git a/lib/dll/example/Makefile b/lib/dll/example/Makefile index 03b034dd5..86cf6906e 100644 --- a/lib/dll/example/Makefile +++ b/lib/dll/example/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h index a6f1174b8..9f53d4cbd 100644 --- a/lib/legacy/zstd_legacy.h +++ b/lib/legacy/zstd_legacy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index c9e4f632f..8c504c792 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h index f777eb6e4..6ac876954 100644 --- a/lib/legacy/zstd_v01.h +++ b/lib/legacy/zstd_v01.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index d80481784..dfaed7bb8 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h index 1b371953b..dab0260ee 100644 --- a/lib/legacy/zstd_v02.h +++ b/lib/legacy/zstd_v02.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c index 20ca5681a..5b1fd7175 100644 --- a/lib/legacy/zstd_v03.c +++ b/lib/legacy/zstd_v03.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h index 7a00d4304..9bf3cce64 100644 --- a/lib/legacy/zstd_v03.h +++ b/lib/legacy/zstd_v03.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index 4f7eb9277..23735443a 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h index 66b97ab8e..640240d62 100644 --- a/lib/legacy/zstd_v04.h +++ b/lib/legacy/zstd_v04.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index 9e73a5daa..5d2ebb2b5 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h index bd423bfc1..2dcffc923 100644 --- a/lib/legacy/zstd_v05.h +++ b/lib/legacy/zstd_v05.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index 3839a5f63..53aaec468 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h index 9e32b76e0..633891010 100644 --- a/lib/legacy/zstd_v06.h +++ b/lib/legacy/zstd_v06.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index efe6fbefe..c2b88eb3e 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/legacy/zstd_v07.h b/lib/legacy/zstd_v07.h index bc35cfa6a..1ff39041f 100644 --- a/lib/legacy/zstd_v07.h +++ b/lib/legacy/zstd_v07.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/lib/libzstd.mk b/lib/libzstd.mk index c97db5628..5e11d5d29 100644 --- a/lib/libzstd.mk +++ b/lib/libzstd.mk @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -15,17 +15,34 @@ # Zstd lib directory LIBZSTD ?= ./ +# ZSTD_LIB_MINIFY is a helper variable that +# configures a bunch of other variables to space-optimized defaults. +ZSTD_LIB_MINIFY ?= 0 + # Legacy support -ZSTD_LEGACY_SUPPORT ?= 5 +ifneq ($(ZSTD_LIB_MINIFY), 0) + ZSTD_LEGACY_SUPPORT ?= 0 +else + ZSTD_LEGACY_SUPPORT ?= 5 +endif ZSTD_LEGACY_MULTITHREADED_API ?= 0 # Build size optimizations -HUF_FORCE_DECOMPRESS_X1 ?= 0 -HUF_FORCE_DECOMPRESS_X2 ?= 0 -ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 0 -ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG ?= 0 -ZSTD_NO_INLINE ?= 0 -ZSTD_STRIP_ERROR_STRINGS ?= 0 +ifneq ($(ZSTD_LIB_MINIFY), 0) + HUF_FORCE_DECOMPRESS_X1 ?= 1 + HUF_FORCE_DECOMPRESS_X2 ?= 0 + ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 1 + ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG ?= 0 + ZSTD_NO_INLINE ?= 1 + ZSTD_STRIP_ERROR_STRINGS ?= 1 +else + HUF_FORCE_DECOMPRESS_X1 ?= 0 + HUF_FORCE_DECOMPRESS_X2 ?= 0 + ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 0 + ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG ?= 0 + ZSTD_NO_INLINE ?= 0 + ZSTD_STRIP_ERROR_STRINGS ?= 0 +endif # Assembly support ZSTD_NO_ASM ?= 0 @@ -61,17 +78,8 @@ LIBVER := $(shell echo $(LIBVER_SCRIPT)) CCVER := $(shell $(CC) --version) ZSTD_VERSION?= $(LIBVER) -# ZSTD_LIB_MINIFY is a helper variable that -# configures a bunch of other variables to space-optimized defaults. -ZSTD_LIB_MINIFY ?= 0 ifneq ($(ZSTD_LIB_MINIFY), 0) HAVE_CC_OZ ?= $(shell echo "" | $(CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0) - ZSTD_LEGACY_SUPPORT ?= 0 - ZSTD_LIB_DEPRECATED ?= 0 - HUF_FORCE_DECOMPRESS_X1 ?= 1 - ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 1 - ZSTD_NO_INLINE ?= 1 - ZSTD_STRIP_ERROR_STRINGS ?= 1 ifneq ($(HAVE_CC_OZ), 0) # Some compilers (clang) support an even more space-optimized setting. CFLAGS += -Oz diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in index 33184297c..d5cc0270c 100644 --- a/lib/libzstd.pc.in +++ b/lib/libzstd.pc.in @@ -1,5 +1,5 @@ # ZSTD - standard compression algorithm -# Copyright (C) 2014-2016, Yann Collet, Facebook +# Copyright (c) Meta Platforms, Inc. and affiliates. # BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) prefix=@PREFIX@ diff --git a/lib/module.modulemap b/lib/module.modulemap index bbb939782..eff98dfac 100644 --- a/lib/module.modulemap +++ b/lib/module.modulemap @@ -1,17 +1,27 @@ module libzstd [extern_c] { header "zstd.h" export * - config_macros [exhaustive] /* zstd.h */ \ + config_macros [exhaustive] \ + /* zstd.h */ \ ZSTD_STATIC_LINKING_ONLY, \ + ZSTDLIB_VISIBILITY, \ ZSTDLIB_VISIBLE, \ + ZSTDLIB_HIDDEN, \ ZSTD_DLL_EXPORT, \ ZSTDLIB_STATIC_API, \ ZSTD_DISABLE_DEPRECATE_WARNINGS, \ ZSTD_CLEVEL_DEFAULT, \ - /* zdict.h */ ZDICT_STATIC_LINKING_ONLY, \ + /* zdict.h */ \ + ZDICT_STATIC_LINKING_ONLY, \ + ZDICTLIB_VISIBLE, \ + ZDICTLIB_HIDDEN, \ ZDICTLIB_VISIBILITY, \ + ZDICTLIB_STATIC_API, \ ZDICT_DISABLE_DEPRECATE_WARNINGS, \ - /* zstd_errors.h */ ZSTDERRORLIB_VISIBILITY + /* zstd_errors.h */ \ + ZSTDERRORLIB_VISIBLE, \ + ZSTDERRORLIB_HIDDEN, \ + ZSTDERRORLIB_VISIBILITY module dictbuilder [extern_c] { header "zdict.h" diff --git a/lib/zdict.h b/lib/zdict.h index 8e21ba0f7..2268f948a 100644 --- a/lib/zdict.h +++ b/lib/zdict.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -8,32 +8,43 @@ * You may select, at your option, one of the above-listed licenses. */ -#ifndef DICTBUILDER_H_001 -#define DICTBUILDER_H_001 - #if defined (__cplusplus) extern "C" { #endif +#ifndef ZSTD_ZDICT_H +#define ZSTD_ZDICT_H /*====== Dependencies ======*/ #include /* size_t */ /* ===== ZDICTLIB_API : control library symbols visibility ===== */ -#ifndef ZDICTLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default"))) +#ifndef ZDICTLIB_VISIBLE + /* Backwards compatibility with old macro name */ +# ifdef ZDICTLIB_VISIBILITY +# define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY +# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default"))) # else -# define ZDICTLIB_VISIBILITY +# define ZDICTLIB_VISIBLE # endif #endif + +#ifndef ZDICTLIB_HIDDEN +# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden"))) +# else +# define ZDICTLIB_HIDDEN +# endif +#endif + #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY +# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ #else -# define ZDICTLIB_API ZDICTLIB_VISIBILITY +# define ZDICTLIB_API ZDICTLIB_VISIBLE #endif /******************************************************************************* @@ -201,9 +212,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap const size_t* samplesSizes, unsigned nbSamples); typedef struct { - int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */ - unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ - unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) + int compressionLevel; /**< optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /**< force dictID value; 0 means auto mode (32-bits random value) * NOTE: The zstd format reserves some dictionary IDs for future use. * You may use them in private settings, but be warned that they * may be used by zstd in a public dictionary registry in the future. @@ -260,9 +271,21 @@ ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictS ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); +#endif /* ZSTD_ZDICT_H */ +#if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC) +#define ZSTD_ZDICT_H_STATIC -#ifdef ZDICT_STATIC_LINKING_ONLY +/* This can be overridden externally to hide static symbols. */ +#ifndef ZDICTLIB_STATIC_API +# if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE +# elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE +# else +# define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE +# endif +#endif /* ==================================================================================== * The definitions in this section are considered experimental. @@ -318,7 +341,7 @@ typedef struct { * In general, it's recommended to provide a few thousands samples, though this can vary a lot. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. */ -ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( +ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover( void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t parameters); @@ -340,7 +363,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( * See ZDICT_trainFromBuffer() for details on failure modes. * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. */ -ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( +ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover( void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_cover_params_t* parameters); @@ -361,7 +384,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( * In general, it's recommended to provide a few thousands samples, though this can vary a lot. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. */ -ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, +ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters); @@ -384,7 +407,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, * See ZDICT_trainFromBuffer() for details on failure modes. * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. */ -ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, +ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t* parameters); @@ -409,7 +432,7 @@ typedef struct { * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. */ -ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( +ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy( void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters); @@ -421,32 +444,31 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( or _CRT_SECURE_NO_WARNINGS in Visual. Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS -# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ +# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */ #else # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ -# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API +# define ZDICT_DEPRECATED(message) [[deprecated(message)]] # elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405) -# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) +# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message))) # elif (ZDICT_GCC_VERSION >= 301) -# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) +# define ZDICT_DEPRECATED(message) __attribute__((deprecated)) # elif defined(_MSC_VER) -# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) +# define ZDICT_DEPRECATED(message) __declspec(deprecated(message)) # else # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") -# define ZDICT_DEPRECATED(message) ZDICTLIB_API +# define ZDICT_DEPRECATED(message) # endif #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") +ZDICTLIB_STATIC_API size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); -#endif /* ZDICT_STATIC_LINKING_ONLY */ +#endif /* ZSTD_ZDICT_H_STATIC */ #if defined (__cplusplus) } #endif - -#endif /* DICTBUILDER_H_001 */ diff --git a/lib/zstd.h b/lib/zstd.h index 1dff31b4e..c2dfda7bb 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -21,14 +21,24 @@ extern "C" { /* ===== ZSTDLIB_API : control library symbols visibility ===== */ #ifndef ZSTDLIB_VISIBLE -# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) + /* Backwards compatibility with old macro name */ +# ifdef ZSTDLIB_VISIBILITY +# define ZSTDLIB_VISIBLE ZSTDLIB_VISIBILITY +# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) # define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default"))) -# define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) # else # define ZSTDLIB_VISIBLE +# endif +#endif + +#ifndef ZSTDLIB_HIDDEN +# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) +# else # define ZSTDLIB_HIDDEN # endif #endif + #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) # define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBLE #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) @@ -201,8 +211,30 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) /*====== Helper functions ======*/ -#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +/* ZSTD_compressBound() : + * maximum compressed size in worst case single-pass scenario. + * When invoking `ZSTD_compress()` or any other one-pass compression function, + * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize) + * as it eliminates one potential failure scenario, + * aka not enough room in dst buffer to write the compressed frame. + * Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE . + * In which case, ZSTD_compressBound() will return an error code + * which can be tested using ZSTD_isError(). + * + * ZSTD_COMPRESSBOUND() : + * same as ZSTD_compressBound(), but as a macro. + * It can be used to produce constants, which can be useful for static allocation, + * for example to size a static array on stack. + * Will produce constant value 0 if srcSize too large. + */ +#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00LLU : 0xFF00FF00U) +#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +/* ZSTD_isError() : + * Most ZSTD_* functions returning a size_t value can be tested for error, + * using ZSTD_isError(). + * @return 1 if error, 0 otherwise + */ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ @@ -446,6 +478,8 @@ typedef enum { * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables + * ZSTD_c_enableMatchFinderFallback + * ZSTD_c_maxBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -465,7 +499,10 @@ typedef enum { ZSTD_c_experimentalParam13=1010, ZSTD_c_experimentalParam14=1011, ZSTD_c_experimentalParam15=1012, - ZSTD_c_experimentalParam16=1013 + ZSTD_c_experimentalParam16=1013, + ZSTD_c_experimentalParam17=1014, + ZSTD_c_experimentalParam18=1015, + } ZSTD_cParameter; typedef struct { @@ -528,7 +565,7 @@ typedef enum { * They will be used to compress next frame. * Resetting session never fails. * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. + * This also removes any reference to any dictionary or external matchfinder. * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) * - Both : similar to resetting the session, followed by resetting parameters. @@ -990,8 +1027,9 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, * meaning "return to no-dictionary mode". - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames, + * until parameters are reset, a new dictionary is loaded, or the dictionary + * is explicitly invalidated by loading a NULL dictionary. * Note 2 : Loading a dictionary involves building tables. * It's also a CPU consuming operation, with non-negligible impact on latency. * Tables are dependent on compression parameters, and for this reason, @@ -1004,7 +1042,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ - * Reference a prepared dictionary, to be used for all next compressed frames. + * Reference a prepared dictionary, to be used for all future compressed frames. * Note that compression parameters are enforced from within CDict, * and supersede any compression parameter previously set within CCtx. * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. @@ -1039,9 +1077,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ - * Create an internal DDict from dict buffer, - * to be used to decompress next frames. - * The dictionary remains valid for all future frames, until explicitly invalidated. + * Create an internal DDict from dict buffer, to be used to decompress all future frames. + * The dictionary remains valid for all future frames, until explicitly invalidated, or + * a new dictionary is loaded. * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, * meaning "return to no-dictionary mode". @@ -1065,9 +1103,10 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s * The memory for the table is allocated on the first call to refDDict, and can be * freed with ZSTD_freeDCtx(). * + * If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary + * will be managed, and referencing a dictionary effectively "discards" any previous one. + * * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. * Special: referencing a NULL DDict means "return to no-dictionary mode". * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. */ @@ -1706,6 +1745,13 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); * This function never fails (wide contract) */ ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +/*! ZSTD_CCtx_setCParams() : + * Set all parameters provided within @cparams into the working @cctx. + * Note : if modifying parameters during compression (MT mode only), + * note that changes to the .windowLog parameter will be ignored. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams); + /*! ZSTD_compress_advanced() : * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. @@ -1713,10 +1759,10 @@ ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressio ZSTD_DEPRECATED("use ZSTD_compress2") ZSTDLIB_STATIC_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); /*! ZSTD_compress_usingCDict_advanced() : * Note : this function is now DEPRECATED. @@ -2010,6 +2056,27 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 +/* ZSTD_c_enableMatchFinderFallback + * Allowed values are 0 (disable) and 1 (enable). The default setting is 0. + * + * Controls whether zstd will fall back to an internal matchfinder if an + * external matchfinder is registered and returns an error code. This fallback is + * block-by-block: the internal matchfinder will only be called for blocks where + * the external matchfinder returns an error code. Fallback compression will + * follow any other cParam settings, such as compression level, the same as in a + * normal (fully-internal) compression operation. + * + * The user is strongly encouraged to read the full external matchfinder API + * documentation (below) before setting this parameter. */ +#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17 + +/* ZSTD_c_maxBlockSize + * + * Default is ZSTD_BLOCKSIZE_MAX. + * + */ +#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -2476,8 +2543,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLev ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ -ZSTDLIB_STATIC_API ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.") +ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); @@ -2501,8 +2568,8 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. Data fragment must be large enough to ensure successful decoding. `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. - @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least result bytes on next attempt. errorCode, which can be tested using ZSTD_isError(). It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, @@ -2521,7 +2588,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ The most memory efficient way is to use a round buffer of sufficient size. Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), - which can @return an error code if required value is too large for current system (in 32-bits mode). + which can return an error code if required value is too large for current system (in 32-bits mode). In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, up to the moment there is not enough room left in the buffer to guarantee decoding another full block, which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. @@ -2541,7 +2608,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. It can also be an error code, which can be tested with ZSTD_isError(). @@ -2642,6 +2709,142 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ +/* ********************** EXTERNAL MATCHFINDER API ********************** + * + * *** OVERVIEW *** + * This API allows users to replace the zstd internal block-level matchfinder + * with an external matchfinder function. Potential applications of the API + * include hardware-accelerated matchfinders and matchfinders specialized to + * particular types of data. + * + * See contrib/externalMatchfinder for an example program employing the + * external matchfinder API. + * + * *** USAGE *** + * The user is responsible for implementing a function of type + * ZSTD_externalMatchFinder_F. For each block, zstd will pass the following + * arguments to the user-provided function: + * + * - externalMatchState: a pointer to a user-managed state for the external + * matchfinder. + * + * - outSeqs, outSeqsCapacity: an output buffer for sequences produced by the + * external matchfinder. outSeqsCapacity is guaranteed >= + * ZSTD_sequenceBound(srcSize). The memory backing outSeqs is managed by + * the CCtx. + * + * - src, srcSize: an input buffer which the external matchfinder must parse + * into sequences. srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. + * + * - dict, dictSize: a history buffer, which may be empty, which the external + * matchfinder may reference as it produces sequences for the src buffer. + * Currently, zstd will always pass dictSize == 0 into external matchfinders, + * but this will change in the future. + * + * - compressionLevel: a signed integer representing the zstd compression level + * set by the user for the current operation. The external matchfinder may + * choose to use this information to change its compression strategy and + * speed/ratio tradeoff. Note: The compression level does not reflect zstd + * parameters set through the advanced API. + * + * - windowSize: a size_t representing the maximum allowed offset for external + * sequences. Note that sequence offsets are sometimes allowed to exceed the + * windowSize if a dictionary is present, see doc/zstd_compression_format.md + * for details. + * + * The user-provided function shall return a size_t representing the number of + * sequences written to outSeqs. This return value will be treated as an error + * code if it is greater than outSeqsCapacity. The return value must be non-zero + * if srcSize is non-zero. The ZSTD_EXTERNAL_MATCHFINDER_ERROR macro is provided + * for convenience, but any value greater than outSeqsCapacity will be treated as + * an error code. + * + * If the user-provided function does not return an error code, the sequences + * written to outSeqs must be a valid parse of the src buffer. Data corruption may + * occur if the parse is not valid. A parse is defined to be valid if the + * following conditions hold: + * - The sum of matchLengths and literalLengths is equal to srcSize. + * - All sequences in the parse have matchLength != 0, except for the final + * sequence. matchLength is not constrained for the final sequence. + * - All offsets respect the windowSize parameter as specified in + * doc/zstd_compression_format.md. + * + * zstd will only validate these conditions (and fail compression if they do not + * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence + * validation has a performance cost. + * + * If the user-provided function returns an error, zstd will either fall back + * to an internal matchfinder or fail the compression operation. The user can + * choose between the two behaviors by setting the + * ZSTD_c_enableMatchFinderFallback cParam. Fallback compression will follow any + * other cParam settings, such as compression level, the same as in a normal + * compression operation. + * + * The user shall instruct zstd to use a particular ZSTD_externalMatchFinder_F + * function by calling ZSTD_registerExternalMatchFinder(cctx, externalMatchState, + * externalMatchFinder). This setting will persist until the next parameter reset + * of the CCtx. + * + * The externalMatchState must be initialized by the user before calling + * ZSTD_registerExternalMatchFinder. The user is responsible for destroying the + * externalMatchState. + * + * *** LIMITATIONS *** + * External matchfinders are compatible with all zstd compression APIs. There are + * only two limitations. + * + * First, the ZSTD_c_enableLongDistanceMatching cParam is not supported. + * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an + * external matchfinder. + * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in + * some cases (see its documentation for details). Users must explicitly set + * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an + * external matchfinder is registered. + * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default + * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should + * check the docs on ZSTD_c_enableLongDistanceMatching whenever the external + * matchfinder API is used in conjunction with advanced settings (like windowLog). + * + * Second, history buffers are not supported. Concretely, zstd will always pass + * dictSize == 0 to the external matchfinder (for now). This has two implications: + * - Dictionaries are not supported. Compression will *not* fail if the user + * references a dictionary, but the dictionary won't have any effect. + * - Stream history is not supported. All compression APIs, including streaming + * APIs, work with the external matchfinder, but the external matchfinder won't + * receive any history from the previous block. Each block is an independent chunk. + * + * Long-term, we plan to overcome both limitations. There is no technical blocker to + * overcoming them. It is purely a question of engineering effort. + */ + +#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) + +typedef size_t ZSTD_externalMatchFinder_F ( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +); + +/*! ZSTD_registerExternalMatchFinder() : + * Instruct zstd to use an external matchfinder function. + * + * The externalMatchState must be initialized by the caller, and the caller is + * responsible for managing its lifetime. This parameter is sticky across + * compressions. It will remain set until the user explicitly resets compression + * parameters. + * + * The user is strongly encouraged to read the full API documentation (above) + * before calling this function. */ +ZSTDLIB_STATIC_API void +ZSTD_registerExternalMatchFinder( + ZSTD_CCtx* cctx, + void* externalMatchState, + ZSTD_externalMatchFinder_F* externalMatchFinder +); + #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ #if defined (__cplusplus) diff --git a/lib/zstd_errors.h b/lib/zstd_errors.h index 2ec0b0ab1..bd6dbee5f 100644 --- a/lib/zstd_errors.h +++ b/lib/zstd_errors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -20,19 +20,31 @@ extern "C" { /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDERRORLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +#ifndef ZSTDERRORLIB_VISIBLE + /* Backwards compatibility with old macro name */ +# ifdef ZSTDERRORLIB_VISIBILITY +# define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY +# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default"))) # else -# define ZSTDERRORLIB_VISIBILITY +# define ZSTDERRORLIB_VISIBLE # endif #endif + +#ifndef ZSTDERRORLIB_HIDDEN +# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden"))) +# else +# define ZSTDERRORLIB_HIDDEN +# endif +#endif + #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ #else -# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE #endif /*-********************************************* @@ -58,10 +70,12 @@ typedef enum { ZSTD_error_frameParameter_windowTooLarge = 16, ZSTD_error_corruption_detected = 20, ZSTD_error_checksum_wrong = 22, + ZSTD_error_literals_headerWrong = 24, ZSTD_error_dictionary_corrupted = 30, ZSTD_error_dictionary_wrong = 32, ZSTD_error_dictionaryCreation_failed = 34, ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_combination_unsupported = 41, ZSTD_error_parameter_outOfBound = 42, ZSTD_error_tableLog_tooLarge = 44, ZSTD_error_maxSymbolValue_tooLarge = 46, @@ -79,6 +93,7 @@ typedef enum { ZSTD_error_seekableIO = 102, ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_externalMatchFinder_failed = 106, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; diff --git a/programs/Makefile b/programs/Makefile index 1cdec4a64..b79b60b4f 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/programs/benchfn.c b/programs/benchfn.c index 1aadbdd91..8e6726f8d 100644 --- a/programs/benchfn.c +++ b/programs/benchfn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -229,9 +229,9 @@ BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, cont->timeSpent_ns += (unsigned long long)loopDuration_ns; /* estimate nbLoops for next run to last approximately 1 second */ - if (loopDuration_ns > (runBudget_ns / 50)) { + if (loopDuration_ns > ((double)runBudget_ns / 50)) { double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); - cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1; + cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1; } else { /* previous run was too short : blindly increase workload by x multiplier */ const unsigned multiplier = 10; @@ -239,7 +239,7 @@ BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, cont->nbLoops *= multiplier; } - if(loopDuration_ns < runTimeMin_ns) { + if(loopDuration_ns < (double)runTimeMin_ns) { /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ assert(completed == 0); continue; diff --git a/programs/benchfn.h b/programs/benchfn.h index 99d13ac47..1bd93d135 100644 --- a/programs/benchfn.h +++ b/programs/benchfn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/benchzstd.c b/programs/benchzstd.c index 6ceca020c..285e401ef 100644 --- a/programs/benchzstd.c +++ b/programs/benchzstd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/benchzstd.h b/programs/benchzstd.h index 11ac85da7..aa683dfc2 100644 --- a/programs/benchzstd.h +++ b/programs/benchzstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/datagen.c b/programs/datagen.c index 3b4f9e5c7..ddc690bb1 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/datagen.h b/programs/datagen.h index b76ae2a22..ca7270006 100644 --- a/programs/datagen.h +++ b/programs/datagen.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/dibio.c b/programs/dibio.c index 8643bc378..b21338cd8 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/dibio.h b/programs/dibio.h index 666c1e661..a96104c36 100644 --- a/programs/dibio.h +++ b/programs/dibio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/fileio.c b/programs/fileio.c index 313a530ce..6bfa559e9 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -1059,12 +1059,12 @@ FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but no DISPLAYUPDATE_PROGRESS( "\rRead : %u MB ==> %.2f%% ", (unsigned)(inFileSize>>20), - (double)outFileSize/inFileSize*100) + (double)outFileSize/(double)inFileSize*100) } else { DISPLAYUPDATE_PROGRESS( "\rRead : %u / %u MB ==> %.2f%% ", (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), - (double)outFileSize/inFileSize*100); + (double)outFileSize/(double)inFileSize*100); } } while (1) { @@ -1157,11 +1157,11 @@ FIO_compressLzmaFrame(cRess_t* ress, if (srcFileSize == UTIL_FILESIZE_UNKNOWN) DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), - (double)outFileSize/inFileSize*100) + (double)outFileSize/(double)inFileSize*100) else DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), - (double)outFileSize/inFileSize*100); + (double)outFileSize/(double)inFileSize*100); if (ret == LZMA_STREAM_END) break; } @@ -1241,11 +1241,11 @@ FIO_compressLz4Frame(cRess_t* ress, if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), - (double)outFileSize/inFileSize*100) + (double)outFileSize/(double)inFileSize*100) } else { DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), - (double)outFileSize/inFileSize*100); + (double)outFileSize/(double)inFileSize*100); } /* Write Block */ diff --git a/programs/fileio.h b/programs/fileio.h index b848934bc..e37398ded 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/fileio_asyncio.c b/programs/fileio_asyncio.c index 92c9a5b1d..8f12fe1f9 100644 --- a/programs/fileio_asyncio.c +++ b/programs/fileio_asyncio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/fileio_asyncio.h b/programs/fileio_asyncio.h index 30db44b6e..34dad6f4d 100644 --- a/programs/fileio_asyncio.h +++ b/programs/fileio_asyncio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/fileio_common.h b/programs/fileio_common.h index aec2e8d56..827a5a06b 100644 --- a/programs/fileio_common.h +++ b/programs/fileio_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/fileio_types.h b/programs/fileio_types.h index a1fac2ca7..c1f42f1ad 100644 --- a/programs/fileio_types.h +++ b/programs/fileio_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/platform.h b/programs/platform.h index 7006f97f2..18a3587bf 100644 --- a/programs/platform.h +++ b/programs/platform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/timefn.c b/programs/timefn.c index 64577b0e9..7005b2696 100644 --- a/programs/timefn.c +++ b/programs/timefn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/timefn.h b/programs/timefn.h index 8ba8ed787..438a8de07 100644 --- a/programs/timefn.h +++ b/programs/timefn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/util.c b/programs/util.c index bdb651074..21d7d3b46 100644 --- a/programs/util.c +++ b/programs/util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -66,6 +66,27 @@ extern "C" { #define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } } +static int g_traceDepth = 0; +int g_traceFileStat = 0; + +#define UTIL_TRACE_CALL(...) \ + { \ + if (g_traceFileStat) { \ + UTIL_DISPLAY("Trace:FileStat: %*s> ", g_traceDepth, ""); \ + UTIL_DISPLAY(__VA_ARGS__); \ + UTIL_DISPLAY("\n"); \ + ++g_traceDepth; \ + } \ + } + +#define UTIL_TRACE_RET(ret) \ + { \ + if (g_traceFileStat) { \ + --g_traceDepth; \ + UTIL_DISPLAY("Trace:FileStat: %*s< %d\n", g_traceDepth, "", (ret)); \ + } \ + } + /* A modified version of realloc(). * If UTIL_realloc() fails the original block is freed. */ @@ -121,21 +142,34 @@ int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, * Functions ***************************************/ +void UTIL_traceFileStat(void) +{ + g_traceFileStat = 1; +} + int UTIL_stat(const char* filename, stat_t* statbuf) { + int ret; + UTIL_TRACE_CALL("UTIL_stat(%s)", filename); #if defined(_MSC_VER) - return !_stat64(filename, statbuf); + ret = !_stat64(filename, statbuf); #elif defined(__MINGW32__) && defined (__MSVCRT__) - return !_stati64(filename, statbuf); + ret = !_stati64(filename, statbuf); #else - return !stat(filename, statbuf); + ret = !stat(filename, statbuf); #endif + UTIL_TRACE_RET(ret); + return ret; } int UTIL_isRegularFile(const char* infilename) { stat_t statbuf; - return UTIL_stat(infilename, &statbuf) && UTIL_isRegularFileStat(&statbuf); + int ret; + UTIL_TRACE_CALL("UTIL_isRegularFile(%s)", infilename); + ret = UTIL_stat(infilename, &statbuf) && UTIL_isRegularFileStat(&statbuf); + UTIL_TRACE_RET(ret); + return ret; } int UTIL_isRegularFileStat(const stat_t* statbuf) @@ -151,44 +185,66 @@ int UTIL_isRegularFileStat(const stat_t* statbuf) int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions) { stat_t localStatBuf; + UTIL_TRACE_CALL("UTIL_chmod(%s, %u)", filename, (unsigned)permissions); if (statbuf == NULL) { - if (!UTIL_stat(filename, &localStatBuf)) return 0; + if (!UTIL_stat(filename, &localStatBuf)) { + UTIL_TRACE_RET(0); + return 0; + } statbuf = &localStatBuf; } - if (!UTIL_isRegularFileStat(statbuf)) return 0; /* pretend success, but don't change anything */ - return chmod(filename, permissions); + if (!UTIL_isRegularFileStat(statbuf)) { + UTIL_TRACE_RET(0); + return 0; /* pretend success, but don't change anything */ + } + UTIL_TRACE_CALL("chmod"); + { + int const ret = chmod(filename, permissions); + UTIL_TRACE_RET(ret); + UTIL_TRACE_RET(ret); + return ret; + } } /* set access and modification times */ int UTIL_utime(const char* filename, const stat_t *statbuf) { int ret; + UTIL_TRACE_CALL("UTIL_utime(%s)", filename); /* We check that st_mtime is a macro here in order to give us confidence * that struct stat has a struct timespec st_mtim member. We need this * check because there are some platforms that claim to be POSIX 2008 * compliant but which do not have st_mtim... */ #if (PLATFORM_POSIX_VERSION >= 200809L) && defined(st_mtime) - /* (atime, mtime) */ - struct timespec timebuf[2] = { {0, UTIME_NOW} }; - timebuf[1] = statbuf->st_mtim; - ret = utimensat(AT_FDCWD, filename, timebuf, 0); + { + /* (atime, mtime) */ + struct timespec timebuf[2] = { {0, UTIME_NOW} }; + timebuf[1] = statbuf->st_mtim; + ret = utimensat(AT_FDCWD, filename, timebuf, 0); + } #else - struct utimbuf timebuf; - timebuf.actime = time(NULL); - timebuf.modtime = statbuf->st_mtime; - ret = utime(filename, &timebuf); + { + struct utimbuf timebuf; + timebuf.actime = time(NULL); + timebuf.modtime = statbuf->st_mtime; + ret = utime(filename, &timebuf); + } #endif errno = 0; + UTIL_TRACE_RET(ret); return ret; } int UTIL_setFileStat(const char *filename, const stat_t *statbuf) { int res = 0; - stat_t curStatBuf; - if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) + UTIL_TRACE_CALL("UTIL_setFileStat(%s)", filename); + + if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) { + UTIL_TRACE_RET(-1); return -1; + } /* set access and modification times */ res += UTIL_utime(filename, statbuf); @@ -200,13 +256,18 @@ int UTIL_setFileStat(const char *filename, const stat_t *statbuf) res += UTIL_chmod(filename, &curStatBuf, statbuf->st_mode & 07777); /* Copy file permissions */ errno = 0; + UTIL_TRACE_RET(-res); return -res; /* number of errors is returned */ } int UTIL_isDirectory(const char* infilename) { stat_t statbuf; - return UTIL_stat(infilename, &statbuf) && UTIL_isDirectoryStat(&statbuf); + int ret; + UTIL_TRACE_CALL("UTIL_isDirectory(%s)", infilename); + ret = UTIL_stat(infilename, &statbuf) && UTIL_isDirectoryStat(&statbuf); + UTIL_TRACE_RET(ret); + return ret; } int UTIL_isDirectoryStat(const stat_t* statbuf) @@ -224,33 +285,44 @@ int UTIL_compareStr(const void *p1, const void *p2) { int UTIL_isSameFile(const char* fName1, const char* fName2) { + int ret; assert(fName1 != NULL); assert(fName2 != NULL); + UTIL_TRACE_CALL("UTIL_isSameFile(%s, %s)", fName1, fName2); #if defined(_MSC_VER) || defined(_WIN32) /* note : Visual does not support file identification by inode. * inode does not work on Windows, even with a posix layer, like msys2. * The following work-around is limited to detecting exact name repetition only, * aka `filename` is considered different from `subdir/../filename` */ - return !strcmp(fName1, fName2); + ret = !strcmp(fName1, fName2); #else { stat_t file1Stat; stat_t file2Stat; - return UTIL_stat(fName1, &file1Stat) + ret = UTIL_stat(fName1, &file1Stat) && UTIL_stat(fName2, &file2Stat) && (file1Stat.st_dev == file2Stat.st_dev) && (file1Stat.st_ino == file2Stat.st_ino); } #endif + UTIL_TRACE_RET(ret); + return ret; } /* UTIL_isFIFO : distinguish named pipes */ int UTIL_isFIFO(const char* infilename) { + UTIL_TRACE_CALL("UTIL_isFIFO(%s)", infilename); /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ #if PLATFORM_POSIX_VERSION >= 200112L - stat_t statbuf; - if (UTIL_stat(infilename, &statbuf) && UTIL_isFIFOStat(&statbuf)) return 1; + { + stat_t statbuf; + if (UTIL_stat(infilename, &statbuf) && UTIL_isFIFOStat(&statbuf)) { + UTIL_TRACE_RET(1); + return 1; + } + } #endif (void)infilename; + UTIL_TRACE_RET(0); return 0; } @@ -278,13 +350,20 @@ int UTIL_isBlockDevStat(const stat_t* statbuf) int UTIL_isLink(const char* infilename) { + UTIL_TRACE_CALL("UTIL_isLink(%s)", infilename); /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ #if PLATFORM_POSIX_VERSION >= 200112L - stat_t statbuf; - int const r = lstat(infilename, &statbuf); - if (!r && S_ISLNK(statbuf.st_mode)) return 1; + { + stat_t statbuf; + int const r = lstat(infilename, &statbuf); + if (!r && S_ISLNK(statbuf.st_mode)) { + UTIL_TRACE_RET(1); + return 1; + } + } #endif (void)infilename; + UTIL_TRACE_RET(0); return 0; } @@ -294,13 +373,18 @@ static int g_fakeStdoutIsConsole = 0; int UTIL_isConsole(FILE* file) { + int ret; + UTIL_TRACE_CALL("UTIL_isConsole(%d)", fileno(file)); if (file == stdin && g_fakeStdinIsConsole) - return 1; - if (file == stderr && g_fakeStderrIsConsole) - return 1; - if (file == stdout && g_fakeStdoutIsConsole) - return 1; - return IS_CONSOLE(file); + ret = 1; + else if (file == stderr && g_fakeStderrIsConsole) + ret = 1; + else if (file == stdout && g_fakeStdoutIsConsole) + ret = 1; + else + ret = IS_CONSOLE(file); + UTIL_TRACE_RET(ret); + return ret; } void UTIL_fakeStdinIsConsole(void) @@ -319,8 +403,16 @@ void UTIL_fakeStderrIsConsole(void) U64 UTIL_getFileSize(const char* infilename) { stat_t statbuf; - if (!UTIL_stat(infilename, &statbuf)) return UTIL_FILESIZE_UNKNOWN; - return UTIL_getFileSizeStat(&statbuf); + UTIL_TRACE_CALL("UTIL_getFileSize(%s)", infilename); + if (!UTIL_stat(infilename, &statbuf)) { + UTIL_TRACE_RET(-1); + return UTIL_FILESIZE_UNKNOWN; + } + { + U64 const size = UTIL_getFileSizeStat(&statbuf); + UTIL_TRACE_RET((int)size); + return size; + } } U64 UTIL_getFileSizeStat(const stat_t* statbuf) @@ -397,11 +489,16 @@ U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles) { U64 total = 0; unsigned n; + UTIL_TRACE_CALL("UTIL_getTotalFileSize(%u)", nbFiles); for (n=0; n 200 MB/s per core, to strong modes with excellent compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\. +\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip\fR(1) and \fBxz\fR(1)\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, from fast modes at > 200 MB/s per core, to strong modes with excellent compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\. . .P -\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences : +\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences: . .IP "\(bu" 4 Source files are preserved by default\. It\'s possible to remove them automatically by using the \fB\-\-rm\fR command\. @@ -34,10 +34,13 @@ When compressing a single file, \fBzstd\fR displays progress notifications and r .IP "\(bu" 4 \fBzstd\fR does not accept input from console, though it does accept \fBstdin\fR when it\'s not the console\. . +.IP "\(bu" 4 +\fBzstd\fR does not store the input\'s filename or attributes, only its contents\. +. .IP "" 0 . .P -\fBzstd\fR processes each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal : it will display an error message and skip the \fIfile\fR\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\. +\fBzstd\fR processes each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal: it will display an error message and skip the file\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\. . .P Unless \fB\-\-stdout\fR or \fB\-o\fR is specified, \fIfiles\fR are written to a new file whose name is derived from the source \fIfile\fR name: @@ -50,12 +53,12 @@ When decompressing, the \fB\.zst\fR suffix is removed from the source filename t . .IP "" 0 . -.SS "Concatenation with \.zst files" +.SS "Concatenation with \.zst Files" It is possible to concatenate multiple \fB\.zst\fR files\. \fBzstd\fR will decompress such agglomerated file as if it was a single \fB\.zst\fR file\. . .SH "OPTIONS" . -.SS "Integer suffixes and special values" +.SS "Integer Suffixes and Special Values" In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers\. There must be no space between the integer and the suffix\. . .TP @@ -66,7 +69,7 @@ Multiply the integer by 1,024 (2^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accep \fBMiB\fR Multiply the integer by 1,048,576 (2^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\. . -.SS "Operation mode" +.SS "Operation Mode" If multiple operation mode options are given, the last one takes effect\. . .TP @@ -83,20 +86,20 @@ Test the integrity of compressed \fIfiles\fR\. This option is equivalent to \fB\ . .TP \fB\-b#\fR -Benchmark file(s) using compression level # +Benchmark file(s) using compression level \fI#\fR\. See \fIBENCHMARK\fR below for a description of this operation\. . .TP -\fB\-\-train FILEs\fR -Use FILEs as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\. +\fB\-\-train FILES\fR +Use \fIFILES\fR as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\. See \fIDICTIONARY BUILDER\fR below for a description of this operation\. . .TP \fB\-l\fR, \fB\-\-list\fR Display information related to a zstd compressed file, such as size, ratio, and checksum\. Some of these fields may not be available\. This command\'s output can be augmented with the \fB\-v\fR modifier\. . -.SS "Operation modifiers" +.SS "Operation Modifiers" . .IP "\(bu" 4 -\fB\-#\fR: \fB#\fR compression level [1\-19] (default: 3) +\fB\-#\fR: selects \fB#\fR compression level [1\-19] (default: 3) . .IP "\(bu" 4 \fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\. @@ -108,13 +111,22 @@ Display information related to a zstd compressed file, such as size, ratio, and \fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to \fBZSTDMT_NBWORKERS_MAX\fR, which is either 64 in 32\-bit mode, or 256 for 64\-bit environments\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\. . .IP "\(bu" 4 -\fB\-\-single\-thread\fR: Use a single thread for both I/O and compression\. As compression is serialized with I/O, this can be slightly slower\. Single\-thread mode features significantly lower memory usage, which can be useful for systems with limited amount of memory, such as 32\-bit systems\. Note 1 : this mode is the only available one when multithread support is disabled\. Note 2 : this mode is different from \fB\-T1\fR, which spawns 1 compression thread in parallel with I/O\. Final compressed result is also slightly different from \fB\-T1\fR\. +\fB\-\-single\-thread\fR: Use a single thread for both I/O and compression\. As compression is serialized with I/O, this can be slightly slower\. Single\-thread mode features significantly lower memory usage, which can be useful for systems with limited amount of memory, such as 32\-bit systems\. +. +.IP +Note 1: this mode is the only available one when multithread support is disabled\. +. +.IP +Note 2: this mode is different from \fB\-T1\fR, which spawns 1 compression thread in parallel with I/O\. Final compressed result is also slightly different from \fB\-T1\fR\. . .IP "\(bu" 4 \fB\-\-auto\-threads={physical,logical} (default: physical)\fR: When using a default amount of threads via \fB\-T0\fR, choose the default based on the number of detected physical or logical cores\. . .IP "\(bu" 4 -\fB\-\-adapt[=min=#,max=#]\fR : \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\. +\fB\-\-adapt[=min=#,max=#]\fR: \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MiB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. +. +.IP +\fINote\fR: at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\. . .IP "\(bu" 4 \fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\. @@ -126,40 +138,49 @@ Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \ \fB\-D DICT\fR: use \fBDICT\fR as Dictionary to compress or decompress FILE(s) . .IP "\(bu" 4 -\fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that windowSize > srcSize\. +\fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that \fIwindowSize\fR > \fIsrcSize\fR\. . .IP -Note: cannot use both this and \-D together Note: \fB\-\-long\fR mode will be automatically activated if chainLog < fileLog (fileLog being the windowLog required to cover the whole file)\. You can also manually force it\. Note: for all levels, you can use \-\-patch\-from in \-\-single\-thread mode to improve compression ratio at the cost of speed Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e\. 4096), and by setting a large \fB\-\-zstd=chainLog=\fR +Note: cannot use both this and \fB\-D\fR together\. +. +.IP +Note: \fB\-\-long\fR mode will be automatically activated if \fIchainLog\fR < \fIfileLog\fR (\fIfileLog\fR being the \fIwindowLog\fR required to cover the whole file)\. You can also manually force it\. +. +.IP +Note: for all levels, you can use \fB\-\-patch\-from\fR in \fB\-\-single\-thread\fR mode to improve compression ratio at the cost of speed\. +. +.IP +Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e\. 4096), and by setting a large \fB\-\-zstd=chainLog=\fR\. . .IP "\(bu" 4 -\fB\-\-rsyncable\fR : \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your mileage may vary\. +\fB\-\-rsyncable\fR: \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your mileage may vary\. . .IP "\(bu" 4 \fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled) . .IP "\(bu" 4 -\fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \-\-content\-size (meaning that the original size will be placed in the header)\. +\fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \fB\-\-content\-size\fR (meaning that the original size will be placed in the header)\. . .IP "\(bu" 4 \fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\. . .IP "\(bu" 4 -\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, \fBzstd\fR uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i\.e\. you can increase or decrease it)\. +\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, \fBzstd\fR uses 128 MiB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i\.e\. you can increase or decrease it)\. . .IP -This is also used during compression when using with \-\-patch\-from=\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MB)\. +This is also used during compression when using with \fB\-\-patch\-from=\fR\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MiB)\. . .IP -Additionally, this can be used to limit memory for dictionary training\. This parameter overrides the default limit of 2 GB\. zstd will load training samples up to the memory limit and ignore the rest\. +Additionally, this can be used to limit memory for dictionary training\. This parameter overrides the default limit of 2 GiB\. zstd will load training samples up to the memory limit and ignore the rest\. . .IP "\(bu" 4 -\fB\-\-stream\-size=#\fR : Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\. +\fB\-\-stream\-size=#\fR: Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\. . .IP "\(bu" 4 \fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\. . .IP "\(bu" 4 -\fB\-o FILE\fR: save result into \fBFILE\fR +\fB\-o FILE\fR: save result into \fBFILE\fR\. . .IP "\(bu" 4 \fB\-f\fR, \fB\-\-force\fR: disable input and output checks\. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc\. During decompression and when the output destination is stdout, pass\-through unrecognized formats as\-is\. @@ -171,10 +192,10 @@ Additionally, this can be used to limit memory for dictionary training\. This pa \fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\. . .IP "\(bu" 4 -\fB\-\-[no\-]pass\-through\fR enable / disable passing through uncompressed files as\-is\. During decompression when pass\-through is enabled, unrecognized formats will be copied as\-is from the input to the output\. By default, pass\-through will occur when the output destination is stdout and the force (\-f) option is set\. +\fB\-\-[no\-]pass\-through\fR enable / disable passing through uncompressed files as\-is\. During decompression when pass\-through is enabled, unrecognized formats will be copied as\-is from the input to the output\. By default, pass\-through will occur when the output destination is stdout and the force (\fB\-f\fR) option is set\. . .IP "\(bu" 4 -\fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. If used in combination with \-o, will trigger a confirmation prompt (which can be silenced with \-f), as this is a destructive operation\. +\fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. If used in combination with \fB\-o\fR, will trigger a confirmation prompt (which can be silenced with \fB\-f\fR), as this is a destructive operation\. . .IP "\(bu" 4 \fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\. @@ -201,7 +222,7 @@ If input directory contains "\.\.", the files in this directory will be ignored\ \fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit . .IP "\(bu" 4 -\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\. +\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced: \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\. . .IP "\(bu" 4 \fB\-v\fR, \fB\-\-verbose\fR: verbose mode, display more information @@ -213,14 +234,14 @@ If input directory contains "\.\.", the files in this directory will be ignored\ \fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\. . .IP "\(bu" 4 -\fB\-\-show\-default\-cparams\fR: Shows the default compression parameters that will be used for a particular src file\. If the provided src file is not a regular file (e\.g\. named pipe), the cli will just output the default parameters\. That is, the parameters that are used when the src size is unknown\. +\fB\-\-show\-default\-cparams\fR: shows the default compression parameters that will be used for a particular input file, based on the provided compression level and the input size\. If the provided file is not a regular file (e\.g\. a pipe), this flag will output the parameters used for inputs of unknown size\. . .IP "\(bu" 4 \fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files . .IP "" 0 . -.SS "gzip Operation modifiers" +.SS "gzip Operation Modifiers" When invoked via a \fBgzip\fR symlink, \fBzstd\fR will support further options that intend to mimic the \fBgzip\fR behavior: . .TP @@ -231,7 +252,7 @@ do not store the original filename and timestamps when compressing a file\. This \fB\-\-best\fR alias to the option \fB\-9\fR\. . -.SS "Interactions with Environment Variables" +.SS "Environment Variables" Employing environment variables to set parameters has security implications\. Therefore, this avenue is intentionally limited\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the compression level and number of threads to use during compression, respectively\. . .P @@ -251,7 +272,7 @@ They can both be overridden by corresponding command line arguments: \fB\-#\fR f Use FILEs as training set to create a dictionary\. The training set should ideally contain a lot of samples (> 100), and weight typically 100x the target dictionary size (for example, ~10 MB for a 100 KB dictionary)\. \fB\-\-train\fR can be combined with \fB\-r\fR to indicate a directory rather than listing all the files, which can be useful to circumvent shell expansion limits\. . .IP -Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files\. In the case where some samples happen to be large, only the first 128 KB of these samples will be used for training\. +Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files\. In the case where some samples happen to be large, only the first 128 KiB of these samples will be used for training\. . .IP \fB\-\-train\fR supports multithreading if \fBzstd\fR is compiled with threading support (default)\. Additional advanced parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The slower cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Default \fB\-\-train\fR is equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\. @@ -281,7 +302,10 @@ In situations where the training set is larger than maximum memory, the CLI will . .TP \fB\-\-dictID=#\fR -A dictionary ID is a locally unique ID\. The decoder will use this value to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to provide an explicit number ID instead\. It\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. Note that short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. +A dictionary ID is a locally unique ID\. The decoder will use this value to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to provide an explicit number ID instead\. It\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. Note that short numbers have an advantage: an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. +. +.IP +Note that RFC8878 reserves IDs less than 32768 and greater than or equal to 2^31, so they should not be used in public\. . .TP \fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR @@ -366,7 +390,7 @@ cut file(s) into independent chunks of size # (default: no chunking) set process priority to real\-time . .P -\fBOutput Format:\fR CompressionLevel#Filename : InputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed +\fBOutput Format:\fR CompressionLevel#Filename: InputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed . .P \fBMethodology:\fR For both compression and decompression speed, the entire input is compressed/decompressed in\-memory to measure speed\. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy\. @@ -377,14 +401,14 @@ set process priority to real\-time Specify the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to non\-identical compressed frames\. . .SS "\-\-zstd[=options]:" -\fBzstd\fR provides 22 predefined compression levels\. The selected or default predefined compression level can be changed with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR: +\fBzstd\fR provides 22 predefined regular compression levels plus the fast levels\. This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor\. (You can see the result of this translation with \fB\-\-show\-default\-cparams\fR\.) These specific parameters can be overridden with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR: . .TP \fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR Specify a strategy used by a match finder\. . .IP -There are 9 strategies numbered from 1 to 9, from faster to stronger: 1=ZSTD_fast, 2=ZSTD_dfast, 3=ZSTD_greedy, 4=ZSTD_lazy, 5=ZSTD_lazy2, 6=ZSTD_btlazy2, 7=ZSTD_btopt, 8=ZSTD_btultra, 9=ZSTD_btultra2\. +There are 9 strategies numbered from 1 to 9, from fastest to strongest: 1=\fBZSTD_fast\fR, 2=\fBZSTD_dfast\fR, 3=\fBZSTD_greedy\fR, 4=\fBZSTD_lazy\fR, 5=\fBZSTD_lazy2\fR, 6=\fBZSTD_btlazy2\fR, 7=\fBZSTD_btopt\fR, 8=\fBZSTD_btultra\fR, 9=\fBZSTD_btultra2\fR\. . .TP \fBwindowLog\fR=\fIwlog\fR, \fBwlog\fR=\fIwlog\fR @@ -404,17 +428,17 @@ Specify the maximum number of bits for a hash table\. Bigger hash tables cause fewer collisions which usually makes compression faster, but requires more memory during compression\. . .IP -The minimum \fIhlog\fR is 6 (64 B) and the maximum is 30 (1 GiB)\. +The minimum \fIhlog\fR is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB)\. . .TP \fBchainLog\fR=\fIclog\fR, \fBclog\fR=\fIclog\fR -Specify the maximum number of bits for a hash chain or a binary tree\. +Specify the maximum number of bits for the secondary search structure, whose form depends on the selected \fBstrategy\fR\. . .IP -Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the ZSTD_fast strategy\. +Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the \fBZSTD_fast\fR \fBstrategy\fR, which only has the primary hash table\. . .IP -The minimum \fIclog\fR is 6 (64 B) and the maximum is 29 (524 Mib) on 32\-bit platforms and 30 (1 Gib) on 64\-bit platforms\. +The minimum \fIclog\fR is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32\-bit platforms and 30 (1B entries / 4 GiB) on 64\-bit platforms\. . .TP \fBsearchLog\fR=\fIslog\fR, \fBslog\fR=\fIslog\fR @@ -441,20 +465,23 @@ The minimum \fImml\fR is 3 and the maximum is 7\. The impact of this field vary depending on selected strategy\. . .IP -For ZSTD_btopt, ZSTD_btultra and ZSTD_btultra2, it specifies the minimum match length that causes match finder to stop searching\. A larger \fBtargetLength\fR usually improves compression ratio but decreases compression speed\. t For ZSTD_fast, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed : a larger \fBtargetLength\fR increases compression speed but decreases compression ratio\. +For \fBZSTD_btopt\fR, \fBZSTD_btultra\fR and \fBZSTD_btultra2\fR, it specifies the minimum match length that causes match finder to stop searching\. A larger \fBtargetLength\fR usually improves compression ratio but decreases compression speed\. +. +.IP +For \fBZSTD_fast\fR, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed: a larger \fBtargetLength\fR increases compression speed but decreases compression ratio\. . .IP For all other strategies, this field has no impact\. . .IP -The minimum \fItlen\fR is 0 and the maximum is 128 Kib\. +The minimum \fItlen\fR is 0 and the maximum is 128 KiB\. . .TP \fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\. . .IP -The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default" : \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\. +The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default": \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\. . .TP \fBldmHashLog\fR=\fIlhlog\fR, \fBlhlog\fR=\fIlhlog\fR @@ -514,6 +541,12 @@ The following parameters sets advanced compression options to something similar .P \fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6 . +.SH "SEE ALSO" +\fBzstdgrep\fR(1), \fBzstdless\fR(1), \fBgzip\fR(1), \fBxz\fR(1) +. +.P +The \fIzstandard\fR format is specified in Y\. Collet, "Zstandard Compression and the \'application/zstd\' Media Type", https://www\.ietf\.org/rfc/rfc8878\.txt, Internet RFC 8878 (February 2021)\. +. .SH "BUGS" Report bugs at: https://github\.com/facebook/zstd/issues . diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 37c2ba187..45a88a347 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -4,7 +4,7 @@ zstd(1) -- zstd, zstdmt, unzstd, zstdcat - Compress or decompress .zst files SYNOPSIS -------- -`zstd` [*OPTIONS*] [-|_INPUT-FILE_] [-o _OUTPUT-FILE_] +`zstd` [] [-|] [-o ] `zstdmt` is equivalent to `zstd -T0` @@ -16,7 +16,7 @@ SYNOPSIS DESCRIPTION ----------- `zstd` is a fast lossless compression algorithm and data compression tool, -with command line syntax similar to `gzip (1)` and `xz (1)`. +with command line syntax similar to `gzip`(1) and `xz`(1). It is based on the **LZ77** family, with further FSE & huff0 entropy stages. `zstd` offers highly configurable compression speed, from fast modes at > 200 MB/s per core, @@ -24,7 +24,7 @@ to strong modes with excellent compression ratios. It also features a very fast decoder, with speeds > 500 MB/s per core. `zstd` command line syntax is generally similar to gzip, -but features the following differences : +but features the following differences: - Source files are preserved by default. It's possible to remove them automatically by using the `--rm` command. @@ -35,12 +35,13 @@ but features the following differences : Use `-q` to turn it off. - `zstd` does not accept input from console, though it does accept `stdin` when it's not the console. + - `zstd` does not store the input's filename or attributes, only its contents. `zstd` processes each _file_ according to the selected operation mode. If no _files_ are given or _file_ is `-`, `zstd` reads from standard input and writes the processed data to standard output. `zstd` will refuse to write compressed data to standard output -if it is a terminal : it will display an error message and skip the _file_. +if it is a terminal: it will display an error message and skip the file. Similarly, `zstd` will refuse to read compressed data from standard input if it is a terminal. @@ -52,14 +53,15 @@ whose name is derived from the source _file_ name: * When decompressing, the `.zst` suffix is removed from the source filename to get the target filename -### Concatenation with .zst files +### Concatenation with .zst Files It is possible to concatenate multiple `.zst` files. `zstd` will decompress such agglomerated file as if it was a single `.zst` file. OPTIONS ------- -### Integer suffixes and special values +### Integer Suffixes and Special Values + In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers. There must be no space between the integer and the suffix. @@ -71,7 +73,8 @@ There must be no space between the integer and the suffix. Multiply the integer by 1,048,576 (2\^20). `Mi`, `M`, and `MB` are accepted as synonyms for `MiB`. -### Operation mode +### Operation Mode + If multiple operation mode options are given, the last one takes effect. @@ -88,19 +91,21 @@ the last one takes effect. decompressed data is discarded and checksummed for errors. No files are created or removed. * `-b#`: - Benchmark file(s) using compression level # -* `--train FILEs`: - Use FILEs as a training set to create a dictionary. + Benchmark file(s) using compression level _#_. + See _BENCHMARK_ below for a description of this operation. +* `--train FILES`: + Use _FILES_ as a training set to create a dictionary. The training set should contain a lot of small files (> 100). + See _DICTIONARY BUILDER_ below for a description of this operation. * `-l`, `--list`: Display information related to a zstd compressed file, such as size, ratio, and checksum. Some of these fields may not be available. This command's output can be augmented with the `-v` modifier. -### Operation modifiers +### Operation Modifiers * `-#`: - `#` compression level \[1-19] (default: 3) + selects `#` compression level \[1-19\] (default: 3) * `--ultra`: unlocks high compression levels 20+ (maximum 22), using a lot more memory. Note that decompression will also require more memory when using these levels. @@ -122,21 +127,24 @@ the last one takes effect. As compression is serialized with I/O, this can be slightly slower. Single-thread mode features significantly lower memory usage, which can be useful for systems with limited amount of memory, such as 32-bit systems. - Note 1 : this mode is the only available one when multithread support is disabled. - Note 2 : this mode is different from `-T1`, which spawns 1 compression thread in parallel with I/O. + + Note 1: this mode is the only available one when multithread support is disabled. + + Note 2: this mode is different from `-T1`, which spawns 1 compression thread in parallel with I/O. Final compressed result is also slightly different from `-T1`. * `--auto-threads={physical,logical} (default: physical)`: When using a default amount of threads via `-T0`, choose the default based on the number of detected physical or logical cores. -* `--adapt[=min=#,max=#]` : +* `--adapt[=min=#,max=#]`: `zstd` will dynamically adapt compression level to perceived I/O conditions. Compression level adaptation can be observed live by using command `-v`. Adaptation can be constrained between supplied `min` and `max` levels. The feature works when combined with multi-threading and `--long` mode. It does not work with `--single-thread`. - It sets window size to 8 MB by default (can be changed manually, see `wlog`). + It sets window size to 8 MiB by default (can be changed manually, see `wlog`). Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible. - _note_ : at the time of this writing, `--adapt` can remain stuck at low speed + + _Note_: at the time of this writing, `--adapt` can remain stuck at low speed when combined with multiple worker threads (>=2). * `--long[=#]`: enables long distance matching with `#` `windowLog`, if `#` is not @@ -153,18 +161,21 @@ the last one takes effect. * `--patch-from FILE`: Specify the file to be used as a reference point for zstd's diff engine. This is effectively dictionary compression with some convenient parameter - selection, namely that windowSize > srcSize. + selection, namely that _windowSize_ > _srcSize_. - Note: cannot use both this and -D together - Note: `--long` mode will be automatically activated if chainLog < fileLog - (fileLog being the windowLog required to cover the whole file). You + Note: cannot use both this and `-D` together. + + Note: `--long` mode will be automatically activated if _chainLog_ < _fileLog_ + (_fileLog_ being the _windowLog_ required to cover the whole file). You can also manually force it. - Note: for all levels, you can use --patch-from in --single-thread mode - to improve compression ratio at the cost of speed + + Note: for all levels, you can use `--patch-from` in `--single-thread` mode + to improve compression ratio at the cost of speed. + Note: for level 19, you can get increased compression ratio at the cost of speed by specifying `--zstd=targetLength=` to be something large - (i.e. 4096), and by setting a large `--zstd=chainLog=` -* `--rsyncable` : + (i.e. 4096), and by setting a large `--zstd=chainLog=`. +* `--rsyncable`: `zstd` will periodically synchronize the compression state to make the compressed file more rsync-friendly. There is a negligible impact to compression ratio, and the faster compression levels will see a small @@ -177,24 +188,24 @@ the last one takes effect. * `--[no-]content-size`: enable / disable whether or not the original size of the file is placed in the header of the compressed file. The default option is - --content-size (meaning that the original size will be placed in the header). + `--content-size` (meaning that the original size will be placed in the header). * `--no-dictID`: do not store dictionary ID within frame header (dictionary compression). The decoder will have to rely on implicit knowledge about which dictionary to use, it won't be able to check if it's correct. * `-M#`, `--memory=#`: - Set a memory usage limit. By default, `zstd` uses 128 MB for decompression + Set a memory usage limit. By default, `zstd` uses 128 MiB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i.e. you can increase or decrease it). - This is also used during compression when using with --patch-from=. In this case, - this parameter overrides that maximum size allowed for a dictionary. (128 MB). + This is also used during compression when using with `--patch-from=`. In this case, + this parameter overrides that maximum size allowed for a dictionary. (128 MiB). Additionally, this can be used to limit memory for dictionary training. This parameter - overrides the default limit of 2 GB. zstd will load training samples up to the memory limit + overrides the default limit of 2 GiB. zstd will load training samples up to the memory limit and ignore the rest. -* `--stream-size=#` : +* `--stream-size=#`: Sets the pledged source size of input coming from a stream. This value must be exact, as it will be included in the produced frame header. Incorrect stream sizes will cause an error. This information will be used to better optimize compression parameters, resulting in @@ -207,7 +218,7 @@ the last one takes effect. Exact guesses result in better compression ratios. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation. * `-o FILE`: - save result into `FILE` + save result into `FILE`. * `-f`, `--force`: disable input and output checks. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc. @@ -227,11 +238,11 @@ the last one takes effect. enable / disable passing through uncompressed files as-is. During decompression when pass-through is enabled, unrecognized formats will be copied as-is from the input to the output. By default, pass-through will - occur when the output destination is stdout and the force (-f) option is + occur when the output destination is stdout and the force (`-f`) option is set. * `--rm`: remove source file(s) after successful compression or decompression. If used in combination with - -o, will trigger a confirmation prompt (which can be silenced with -f), as this is a destructive operation. + `-o`, will trigger a confirmation prompt (which can be silenced with `-f`), as this is a destructive operation. * `-k`, `--keep`: keep source file(s) after successful compression or decompression. This is the default behavior. @@ -270,7 +281,7 @@ the last one takes effect. display help/long help and exit * `-V`, `--version`: display version number and exit. - Advanced : `-vV` also displays supported formats. + Advanced: `-vV` also displays supported formats. `-vvV` also displays POSIX support. `-q` will only display the version number, suitable for machine reading. * `-v`, `--verbose`: @@ -281,15 +292,13 @@ the last one takes effect. * `--no-progress`: do not display the progress bar, but keep all other messages. * `--show-default-cparams`: - Shows the default compression parameters that will be used for a - particular src file. If the provided src file is not a regular file - (e.g. named pipe), the cli will just output the default parameters. - That is, the parameters that are used when the src size is unknown. + shows the default compression parameters that will be used for a particular input file, based on the provided compression level and the input size. + If the provided file is not a regular file (e.g. a pipe), this flag will output the parameters used for inputs of unknown size. * `--`: All arguments after `--` are treated as files -### gzip Operation modifiers +### gzip Operation Modifiers When invoked via a `gzip` symlink, `zstd` will support further options that intend to mimic the `gzip` behavior: @@ -300,7 +309,7 @@ options that intend to mimic the `gzip` behavior: alias to the option `-9`. -### Interactions with Environment Variables +### Environment Variables Employing environment variables to set parameters has security implications. Therefore, this avenue is intentionally limited. @@ -341,7 +350,7 @@ Compression of small files similar to the sample set will be greatly improved. Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files. In the case where some samples happen to be large, - only the first 128 KB of these samples will be used for training. + only the first 128 KiB of these samples will be used for training. `--train` supports multithreading if `zstd` is compiled with threading support (default). Additional advanced parameters can be specified with `--train-fastcover`. @@ -389,11 +398,13 @@ Compression of small files similar to the sample set will be greatly improved. It's possible to provide an explicit number ID instead. It's up to the dictionary manager to not assign twice the same ID to 2 different dictionaries. - Note that short numbers have an advantage : + Note that short numbers have an advantage: an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes. This compares favorably to 4 bytes default. + Note that RFC8878 reserves IDs less than 32768 and greater than or equal to 2\^31, so they should not be used in public. + * `--train-cover[=k#,d=#,steps=#,split=#,shrink[=#]]`: Select parameters for the default dictionary builder algorithm named cover. If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8. @@ -482,7 +493,7 @@ BENCHMARK * `--priority=rt`: set process priority to real-time -**Output Format:** CompressionLevel#Filename : InputSize -> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed +**Output Format:** CompressionLevel#Filename: InputSize -> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed **Methodology:** For both compression and decompression speed, the entire input is compressed/decompressed in-memory to measure speed. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy. @@ -499,9 +510,10 @@ This minimum is either 512 KB, or `overlapSize`, whichever is largest. Different job sizes will lead to non-identical compressed frames. ### --zstd[=options]: -`zstd` provides 22 predefined compression levels. -The selected or default predefined compression level can be changed with -advanced compression options. +`zstd` provides 22 predefined regular compression levels plus the fast levels. +This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor. +(You can see the result of this translation with `--show-default-cparams`.) +These specific parameters can be overridden with advanced compression options. The _options_ are provided as a comma-separated list. You may specify only the options you want to change and the rest will be taken from the selected or default compression level. @@ -510,10 +522,10 @@ The list of available _options_: - `strategy`=_strat_, `strat`=_strat_: Specify a strategy used by a match finder. - There are 9 strategies numbered from 1 to 9, from faster to stronger: - 1=ZSTD\_fast, 2=ZSTD\_dfast, 3=ZSTD\_greedy, - 4=ZSTD\_lazy, 5=ZSTD\_lazy2, 6=ZSTD\_btlazy2, - 7=ZSTD\_btopt, 8=ZSTD\_btultra, 9=ZSTD\_btultra2. + There are 9 strategies numbered from 1 to 9, from fastest to strongest: + 1=`ZSTD_fast`, 2=`ZSTD_dfast`, 3=`ZSTD_greedy`, + 4=`ZSTD_lazy`, 5=`ZSTD_lazy2`, 6=`ZSTD_btlazy2`, + 7=`ZSTD_btopt`, 8=`ZSTD_btultra`, 9=`ZSTD_btultra2`. - `windowLog`=_wlog_, `wlog`=_wlog_: Specify the maximum number of bits for a match distance. @@ -533,19 +545,20 @@ The list of available _options_: Bigger hash tables cause fewer collisions which usually makes compression faster, but requires more memory during compression. - The minimum _hlog_ is 6 (64 B) and the maximum is 30 (1 GiB). + The minimum _hlog_ is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB). - `chainLog`=_clog_, `clog`=_clog_: - Specify the maximum number of bits for a hash chain or a binary tree. + Specify the maximum number of bits for the secondary search structure, + whose form depends on the selected `strategy`. Higher numbers of bits increases the chance to find a match which usually improves compression ratio. It also slows down compression speed and increases memory requirements for compression. - This option is ignored for the ZSTD_fast strategy. + This option is ignored for the `ZSTD_fast` `strategy`, which only has the primary hash table. - The minimum _clog_ is 6 (64 B) and the maximum is 29 (524 Mib) on 32-bit platforms - and 30 (1 Gib) on 64-bit platforms. + The minimum _clog_ is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32-bit platforms + and 30 (1B entries / 4 GiB) on 64-bit platforms. - `searchLog`=_slog_, `slog`=_slog_: Specify the maximum number of searches in a hash chain or a binary tree @@ -567,19 +580,19 @@ The list of available _options_: - `targetLength`=_tlen_, `tlen`=_tlen_: The impact of this field vary depending on selected strategy. - For ZSTD\_btopt, ZSTD\_btultra and ZSTD\_btultra2, it specifies + For `ZSTD_btopt`, `ZSTD_btultra` and `ZSTD_btultra2`, it specifies the minimum match length that causes match finder to stop searching. A larger `targetLength` usually improves compression ratio but decreases compression speed. -t - For ZSTD\_fast, it triggers ultra-fast mode when > 0. + + For `ZSTD_fast`, it triggers ultra-fast mode when > 0. The value represents the amount of data skipped between match sampling. - Impact is reversed : a larger `targetLength` increases compression speed + Impact is reversed: a larger `targetLength` increases compression speed but decreases compression ratio. For all other strategies, this field has no impact. - The minimum _tlen_ is 0 and the maximum is 128 Kib. + The minimum _tlen_ is 0 and the maximum is 128 KiB. - `overlapLog`=_ovlog_, `ovlog`=_ovlog_: Determine `overlapSize`, amount of data reloaded from previous job. @@ -591,7 +604,7 @@ t 9 means "full overlap", meaning up to `windowSize` is reloaded from previous job. Reducing _ovlog_ by 1 reduces the reloaded amount by a factor 2. For example, 8 means "windowSize/2", and 6 means "windowSize/8". - Value 0 is special and means "default" : _ovlog_ is automatically determined by `zstd`. + Value 0 is special and means "default": _ovlog_ is automatically determined by `zstd`. In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_. - `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_: @@ -641,6 +654,11 @@ similar to predefined level 19 for files bigger than 256 KB: `--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6 +SEE ALSO +-------- +`zstdgrep`(1), `zstdless`(1), `gzip`(1), `xz`(1) + +The format is specified in Y. Collet, "Zstandard Compression and the 'application/zstd' Media Type", https://www.ietf.org/rfc/rfc8878.txt, Internet RFC 8878 (February 2021). BUGS ---- diff --git a/programs/zstdcli.c b/programs/zstdcli.c index cc6bbb935..9937a9c97 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -52,12 +52,12 @@ /*-************************************ * Constants **************************************/ -#define COMPRESSOR_NAME "zstd command line interface" +#define COMPRESSOR_NAME "Zstandard CLI" #ifndef ZSTD_VERSION # define ZSTD_VERSION "v" ZSTD_VERSION_STRING #endif #define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR +#define WELCOME_MESSAGE "*** %s (%i-bit) %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR #define ZSTD_ZSTDMT "zstdmt" #define ZSTD_UNZSTD "unzstd" @@ -143,160 +143,174 @@ static int exeNameMatch(const char* exeName, const char* test) */ static void usage(FILE* f, const char* programName) { - DISPLAY_F(f, "Usage: %s [OPTION]... [FILE]... [-o file]\n", programName); - DISPLAY_F(f, "Compress or uncompress FILEs (with no FILE or when FILE is `-`, read from standard input).\n\n"); - DISPLAY_F(f, " -o file result stored into `file` (only 1 output file)\n"); -#ifndef ZSTD_NOCOMPRESS - DISPLAY_F(f, " -1 .. -%d compression level (faster .. better; default: %d)\n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT); -#endif -#ifndef ZSTD_NODECOMPRESS - DISPLAY_F(f, " -d, --decompress decompression\n"); -#endif - DISPLAY_F(f, " -f, --force disable input and output checks. Allows overwriting existing files,\n"); - DISPLAY_F(f, " input from console, output to stdout, operating on links,\n"); - DISPLAY_F(f, " block devices, etc. During decompression and when the output\n"); - DISPLAY_F(f, " destination is stdout, pass-through unrecognized formats as-is.\n"); - DISPLAY_F(f, " --rm remove source file(s) after successful de/compression\n"); - DISPLAY_F(f, " -k, --keep preserve source file(s) (default) \n"); + DISPLAY_F(f, "Compress or decompress the INPUT file(s); reads from STDIN if INPUT is `-` or not provided.\n\n"); + DISPLAY_F(f, "Usage: %s [OPTIONS...] [INPUT... | -] [-o OUTPUT]\n\n", programName); + DISPLAY_F(f, "Options:\n"); + DISPLAY_F(f, " -o OUTPUT Write output to a single file, OUTPUT.\n"); + DISPLAY_F(f, " -k, --keep Preserve INPUT file(s). [Default] \n"); + DISPLAY_F(f, " --rm Remove INPUT file(s) after successful (de)compression.\n"); #ifdef ZSTD_GZCOMPRESS if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */ - DISPLAY_F(f, " -n, --no-name do not store original filename when compressing\n"); + DISPLAY_F(f, " -n, --no-name Do not store original filename when compressing.\n\n"); } #endif - DISPLAY_F(f, " -D DICT use DICT as Dictionary for compression or decompression\n"); - DISPLAY_F(f, " -h display usage and exit\n"); - DISPLAY_F(f, " -H,--help display long help and exit\n"); + DISPLAY_F(f, "\n"); +#ifndef ZSTD_NOCOMPRESS + DISPLAY_F(f, " -# Desired compression level, where `#` is a number between 1 and %d;\n", ZSTDCLI_CLEVEL_MAX); + DISPLAY_F(f, " lower numbers provide faster compression, higher numbers yield\n"); + DISPLAY_F(f, " better compression ratios. [Default: %d]\n\n", ZSTDCLI_CLEVEL_DEFAULT); +#endif +#ifndef ZSTD_NODECOMPRESS + DISPLAY_F(f, " -d, --decompress Perform decompression.\n"); +#endif + DISPLAY_F(f, " -D DICT Use DICT as the dictionary for compression or decompression.\n\n"); + DISPLAY_F(f, " -f, --force Disable input and output checks. Allows overwriting existing files,\n"); + DISPLAY_F(f, " receiving input from the console, printing ouput to STDOUT, and\n"); + DISPLAY_F(f, " operating on links, block devices, etc. Unrecognized formats will be\n"); + DISPLAY_F(f, " passed-through through as-is.\n\n"); + + DISPLAY_F(f, " -h Display short usage and exit.\n"); + DISPLAY_F(f, " -H, --help Display full help and exit.\n"); + DISPLAY_F(f, " -V, --version Display the program version and exit.\n"); DISPLAY_F(f, "\n"); } static void usage_advanced(const char* programName) { DISPLAYOUT(WELCOME_MESSAGE); + DISPLAYOUT("\n"); usage(stdout, programName); - DISPLAYOUT("Advanced options :\n"); - DISPLAYOUT(" -V, --version display Version number and exit\n"); + DISPLAYOUT("Advanced options:\n"); + DISPLAYOUT(" -c, --stdout Write to STDOUT (even if it is a console) and keep the INPUT file(s).\n\n"); - DISPLAYOUT(" -c, --stdout write to standard output (even if it is the console), keep original file\n"); - - DISPLAYOUT(" -v, --verbose verbose mode; specify multiple times to increase verbosity\n"); - DISPLAYOUT(" -q, --quiet suppress warnings; specify twice to suppress errors too\n"); - DISPLAYOUT(" --[no-]progress forcibly display, or never display the progress counter\n"); - DISPLAYOUT(" note: any (de)compressed output to terminal will mix with progress counter text\n"); + DISPLAYOUT(" -v, --verbose Enable verbose output; pass multiple times to increase verbosity.\n"); + DISPLAYOUT(" -q, --quiet Suppress warnings; pass twice to suppress errors.\n"); +#ifndef ZSTD_NOTRACE + DISPLAYOUT(" --trace LOG Log tracing information to LOG.\n"); +#endif + DISPLAYOUT("\n"); + DISPLAYOUT(" --[no-]progress Forcibly show/hide the progress counter. NOTE: Any (de)compressed\n"); + DISPLAYOUT(" output to terminal will mix with progress counter text.\n\n"); #ifdef UTIL_HAS_CREATEFILELIST - DISPLAYOUT(" -r operate recursively on directories\n"); - DISPLAYOUT(" --filelist FILE read list of files to operate upon from FILE\n"); - DISPLAYOUT(" --output-dir-flat DIR : processed files are stored into DIR\n"); + DISPLAYOUT(" -r Operate recursively on directories.\n"); + DISPLAYOUT(" --filelist LIST Read a list of files to operate on from LIST.\n"); + DISPLAYOUT(" --output-dir-flat DIR Store processed files in DIR.\n"); #endif #ifdef UTIL_HAS_MIRRORFILELIST - DISPLAYOUT(" --output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure\n"); + DISPLAYOUT(" --output-dir-mirror DIR Store processed files in DIR, respecting original directory structure.\n"); #endif if (AIO_supported()) - DISPLAYOUT(" --[no-]asyncio use asynchronous IO (default: enabled)\n"); + DISPLAYOUT(" --[no-]asyncio Use asynchronous IO. [Default: Enabled]\n"); + DISPLAYOUT("\n"); #ifndef ZSTD_NOCOMPRESS - DISPLAYOUT(" --[no-]check during compression, add XXH64 integrity checksum to frame (default: enabled)\n"); + DISPLAYOUT(" --[no-]check Add XXH64 integrity checksums during compression. [Default: Add, Validate]\n"); #ifndef ZSTD_NODECOMPRESS - DISPLAYOUT(" if specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate)\n"); + DISPLAYOUT(" If `-d` is present, ignore/validate checksums during decompression.\n"); #endif #else #ifdef ZSTD_NOCOMPRESS - DISPLAYOUT(" --[no-]check during decompression, ignore/validate checksums in compressed frame (default: validate)"); + DISPLAYOUT(" --[no-]check Ignore/validate checksums during decompression. [Default: Validate]"); #endif - DISPLAYOUT("\n"); #endif /* ZSTD_NOCOMPRESS */ -#ifndef ZSTD_NOTRACE - DISPLAYOUT(" --trace FILE log tracing information to FILE\n"); -#endif - DISPLAYOUT(" -- all arguments after \"--\" are treated as files\n"); + DISPLAYOUT("\n"); + DISPLAYOUT(" -- Treat remaining arguments after `--` as files.\n"); #ifndef ZSTD_NOCOMPRESS DISPLAYOUT("\n"); - DISPLAYOUT("Advanced compression options :\n"); - DISPLAYOUT(" --ultra enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); - DISPLAYOUT(" --fast[=#] switch to very fast compression levels (default: %u)\n", 1); + DISPLAYOUT("Advanced compression options:\n"); + DISPLAYOUT(" --ultra Enable levels beyond %i, up to %i; requires more memory.\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); + DISPLAYOUT(" --fast[=#] Use to very fast compression levels. [Default: %u]\n", 1); #ifdef ZSTD_GZCOMPRESS if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */ - DISPLAYOUT(" --best compatibility alias for -9 \n"); - DISPLAYOUT(" --no-name do not store original filename when compressing\n"); + DISPLAYOUT(" --best Compatibility alias for `-9`.\n"); } #endif - DISPLAYOUT(" --long[=#] enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); - DISPLAYOUT(" --patch-from=FILE : specify the file to be used as a reference point for zstd's diff engine. \n"); - DISPLAYOUT(" --adapt dynamically adapt compression level to I/O conditions\n"); + DISPLAYOUT(" --adapt Dynamically adapt compression level to I/O conditions.\n"); + DISPLAYOUT(" --long[=#] Enable long distance matching with window log #. [Default: %u]\n", g_defaultMaxWindowLog); + DISPLAYOUT(" --patch-from=REF Use REF as the reference point for Zstandard's diff engine. \n\n"); # ifdef ZSTD_MULTITHREAD - DISPLAYOUT(" -T# spawn # compression threads (default: 1, 0==# cores) \n"); - DISPLAYOUT(" -B# select size of each job (default: 0==automatic) \n"); - DISPLAYOUT(" --single-thread use a single thread for both I/O and compression (result slightly different than -T1) \n"); - DISPLAYOUT(" --auto-threads={physical,logical} : use either physical cores or logical cores as default when specifying -T0 (default: physical)\n"); - DISPLAYOUT(" --rsyncable compress using a rsync-friendly method (-B sets block size) \n"); + DISPLAYOUT(" -T# Spawn # compression threads. [Default: 1; pass 0 for core count.]\n"); + DISPLAYOUT(" --single-thread Share a single thread for I/O and compression (slightly different than `-T1`).\n"); + DISPLAYOUT(" --auto-threads={physical|logical}\n"); + DISPLAYOUT(" Use physical/logical cores when using `-T0`. [Default: Physical]\n\n"); + DISPLAYOUT(" -B# Set job size to #. [Default: 0 (automatic)]\n"); + DISPLAYOUT(" --rsyncable Compress using a rsync-friendly method (`-B` sets block size). \n"); + DISPLAYOUT("\n"); # endif - DISPLAYOUT(" --exclude-compressed : only compress files that are not already compressed \n"); - DISPLAYOUT(" --stream-size=# specify size of streaming input from `stdin` \n"); - DISPLAYOUT(" --size-hint=# optimize compression parameters for streaming input of approximately this size \n"); - DISPLAYOUT(" --target-compressed-block-size=# : generate compressed block of approximately targeted size \n"); - DISPLAYOUT(" --no-dictID don't write dictID into header (dictionary compression only)\n"); - DISPLAYOUT(" --[no-]compress-literals : force (un)compressed literals\n"); - DISPLAYOUT(" --[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies\n"); + DISPLAYOUT(" --exclude-compressed Only compress files that are not already compressed.\n\n"); - DISPLAYOUT(" --format=zstd compress files to the .zst format (default)\n"); + DISPLAYOUT(" --stream-size=# Specify size of streaming input from STDIN.\n"); + DISPLAYOUT(" --size-hint=# Optimize compression parameters for streaming input of approximately size #.\n"); + DISPLAYOUT(" --target-compressed-block-size=#\n"); + DISPLAYOUT(" Generate compressed blocks of approximately # size.\n\n"); + DISPLAYOUT(" --no-dictID Don't write `dictID` into the header (dictionary compression only).\n"); + DISPLAYOUT(" --[no-]compress-literals Force (un)compressed literals.\n"); + DISPLAYOUT(" --[no-]row-match-finder Explicitly enable/disable the fast, row-based matchfinder for\n"); + DISPLAYOUT(" the 'greedy', 'lazy', and 'lazy2' strategies.\n"); + + DISPLAYOUT("\n"); + DISPLAYOUT(" --format=zstd Compress files to the `.zst` format. [Default]\n"); #ifdef ZSTD_GZCOMPRESS - DISPLAYOUT(" --format=gzip compress files to the .gz format\n"); + DISPLAYOUT(" --format=gzip Compress files to the `.gz` format.\n"); #endif #ifdef ZSTD_LZMACOMPRESS - DISPLAYOUT(" --format=xz compress files to the .xz format\n"); - DISPLAYOUT(" --format=lzma compress files to the .lzma format\n"); + DISPLAYOUT(" --format=xz Compress files to the `.xz` format.\n"); + DISPLAYOUT(" --format=lzma Compress files to the `.lzma` format.\n"); #endif #ifdef ZSTD_LZ4COMPRESS - DISPLAYOUT( " --format=lz4 compress files to the .lz4 format\n"); + DISPLAYOUT( " --format=lz4 Compress files to the `.lz4` format.\n"); #endif #endif /* !ZSTD_NOCOMPRESS */ #ifndef ZSTD_NODECOMPRESS DISPLAYOUT("\n"); - DISPLAYOUT("Advanced decompression options :\n"); - DISPLAYOUT(" -l print information about zstd compressed files\n"); - DISPLAYOUT(" --test test compressed file integrity\n"); - DISPLAYOUT(" -M# Set a memory usage limit for decompression\n"); + DISPLAYOUT("Advanced decompression options:\n"); + DISPLAYOUT(" -l Print information about Zstandard-compressed files.\n"); + DISPLAYOUT(" --test Test compressed file integrity.\n"); + DISPLAYOUT(" -M# Set the memory usage limit to # megabytes.\n"); # if ZSTD_SPARSE_DEFAULT - DISPLAYOUT(" --[no-]sparse sparse mode (default: enabled on file, disabled on stdout)\n"); + DISPLAYOUT(" --[no-]sparse Enable sparse mode. [Default: Enabled for files, disabled for STDOUT.]\n"); # else - DISPLAYOUT(" --[no-]sparse sparse mode (default: disabled)\n"); + DISPLAYOUT(" --[no-]sparse Enable sparse mode. [Default: Disabled]\n"); # endif { - char const* passThroughDefault = "disabled"; + char const* passThroughDefault = "Disabled"; if (exeNameMatch(programName, ZSTD_CAT) || exeNameMatch(programName, ZSTD_ZCAT) || exeNameMatch(programName, ZSTD_GZCAT)) { - passThroughDefault = "enabled"; + passThroughDefault = "Enabled"; } - DISPLAYOUT(" --[no-]pass-through : passes through uncompressed files as-is (default: %s)\n", passThroughDefault); + DISPLAYOUT(" --[no-]pass-through Pass through uncompressed files as-is. [Default: %s]\n", passThroughDefault); } #endif /* ZSTD_NODECOMPRESS */ #ifndef ZSTD_NODICT DISPLAYOUT("\n"); - DISPLAYOUT("Dictionary builder :\n"); - DISPLAYOUT(" --train ## create a dictionary from a training set of files\n"); - DISPLAYOUT(" --train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args\n"); - DISPLAYOUT(" --train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args\n"); - DISPLAYOUT(" --train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel); - DISPLAYOUT(" -o DICT DICT is dictionary name (default: %s)\n", g_defaultDictName); - DISPLAYOUT(" --maxdict=# limit dictionary to specified size (default: %u)\n", g_defaultMaxDictSize); - DISPLAYOUT(" --dictID=# force dictionary ID to specified value (default: random)\n"); + DISPLAYOUT("Dictionary builder:\n"); + DISPLAYOUT(" --train Create a dictionary from a training set of files.\n\n"); + DISPLAYOUT(" --train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]]\n"); + DISPLAYOUT(" Use the cover algorithm (with optional arguments).\n"); + DISPLAYOUT(" --train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]]\n"); + DISPLAYOUT(" Use the fast cover algorithm (with optional arguments).\n\n"); + DISPLAYOUT(" --train-legacy[=s=#] Use the legacy algorithm with selectivity #. [Default: %u]\n", g_defaultSelectivityLevel); + DISPLAYOUT(" -o NAME Use NAME as dictionary name. [Default: %s]\n", g_defaultDictName); + DISPLAYOUT(" --maxdict=# Limit dictionary to specified size #. [Default: %u]\n", g_defaultMaxDictSize); + DISPLAYOUT(" --dictID=# Force dictionary ID to #. [Default: Random]\n"); #endif #ifndef ZSTD_NOBENCH DISPLAYOUT("\n"); - DISPLAYOUT("Benchmark options : \n"); - DISPLAYOUT(" -b# benchmark file(s), using # compression level (default: %d)\n", ZSTDCLI_CLEVEL_DEFAULT); - DISPLAYOUT(" -e# test all compression levels successively from -b# to -e# (default: 1)\n"); - DISPLAYOUT(" -i# minimum evaluation time in seconds (default: 3s)\n"); - DISPLAYOUT(" -B# cut file into independent chunks of size # (default: no chunking)\n"); - DISPLAYOUT(" -S output one benchmark result per input file (default: consolidated result)\n"); - DISPLAYOUT(" --priority=rt set process priority to real-time\n"); + DISPLAYOUT("Benchmark options:\n"); + DISPLAYOUT(" -b# Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT); + DISPLAYOUT(" -e# Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n"); + DISPLAYOUT(" -i# Set the minimum evaluation to time # seconds. [Default: 3]\n"); + DISPLAYOUT(" -B# Cut file into independent chunks of size #. [Default: No chunking]\n"); + DISPLAYOUT(" -S Output one benchmark result per input file. [Default: Consolidated result]\n"); + DISPLAYOUT(" --priority=rt Set process priority to real-time.\n"); #endif } @@ -990,6 +1004,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--fake-stdin-is-console")) { UTIL_fakeStdinIsConsole(); continue; } if (!strcmp(argument, "--fake-stdout-is-console")) { UTIL_fakeStdoutIsConsole(); continue; } if (!strcmp(argument, "--fake-stderr-is-console")) { UTIL_fakeStderrIsConsole(); continue; } + if (!strcmp(argument, "--trace-file-stat")) { UTIL_traceFileStat(); continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT diff --git a/programs/zstdcli_trace.c b/programs/zstdcli_trace.c index b3b977feb..35075a52c 100644 --- a/programs/zstdcli_trace.c +++ b/programs/zstdcli_trace.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/zstdcli_trace.h b/programs/zstdcli_trace.h index 38c27dc04..9c135d3ca 100644 --- a/programs/zstdcli_trace.h +++ b/programs/zstdcli_trace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/zstdgrep.1.md b/programs/zstdgrep.1.md index 35186a4bf..6370a81c7 100644 --- a/programs/zstdgrep.1.md +++ b/programs/zstdgrep.1.md @@ -4,16 +4,16 @@ zstdgrep(1) -- print lines matching a pattern in zstandard-compressed files SYNOPSIS -------- -`zstdgrep` [*grep-flags*] [--] _pattern_ [_files_ ...] +`zstdgrep` [] [--] [ ...] DESCRIPTION ----------- -`zstdgrep` runs `grep (1)` on files, or `stdin` if no files argument is given, after decompressing them with `zstdcat (1)`. +`zstdgrep` runs `grep`(1) on files, or `stdin` if no files argument is given, after decompressing them with `zstdcat`(1). -The grep-flags and pattern arguments are passed on to `grep (1)`. If an `-e` flag is found in the `grep-flags`, `zstdgrep` will not look for a pattern argument. +The and arguments are passed on to `grep`(1). If an `-e` flag is found in the , `zstdgrep` will not look for a argument. -Note that modern `grep` alternatives such as `ripgrep` (`rg`) support `zstd`-compressed files out of the box, +Note that modern `grep` alternatives such as `ripgrep` (`rg`(1)) support `zstd`-compressed files out of the box, and can prove better alternatives than `zstdgrep` notably for unsupported complex pattern searches. Note though that such alternatives may also feature some minor command line differences. @@ -23,7 +23,7 @@ In case of missing arguments or missing pattern, 1 will be returned, otherwise 0 SEE ALSO -------- -`zstd (1)` +`zstd`(1) AUTHORS ------- diff --git a/programs/zstdless.1.md b/programs/zstdless.1.md index d91d48abc..67c1c7676 100644 --- a/programs/zstdless.1.md +++ b/programs/zstdless.1.md @@ -4,13 +4,13 @@ zstdless(1) -- view zstandard-compressed files SYNOPSIS -------- -`zstdless` [*flags*] [_file_ ...] +`zstdless` [] [ ...] DESCRIPTION ----------- -`zstdless` runs `less (1)` on files or stdin, if no files argument is given, after decompressing them with `zstdcat (1)`. +`zstdless` runs `less`(1) on files or stdin, if no argument is given, after decompressing them with `zstdcat`(1). SEE ALSO -------- -`zstd (1)` +`zstd`(1) diff --git a/tests/DEPRECATED-test-zstd-speed.py b/tests/DEPRECATED-test-zstd-speed.py index 665e0a7f9..71d75b8cc 100755 --- a/tests/DEPRECATED-test-zstd-speed.py +++ b/tests/DEPRECATED-test-zstd-speed.py @@ -2,7 +2,7 @@ # THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py # ################################################################ -# Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/Makefile b/tests/Makefile index afea6475a..8226176cc 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,6 +1,6 @@ # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -169,7 +169,7 @@ fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PR $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) CLEAN += zstreamtest zstreamtest32 -ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c +ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c external_matchfinder.c ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES) ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES) zstreamtest32 : CFLAGS += -m32 diff --git a/tests/automated_benchmarking.py b/tests/automated_benchmarking.py index c98391193..153e7db4c 100644 --- a/tests/automated_benchmarking.py +++ b/tests/automated_benchmarking.py @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/bigdict.c b/tests/bigdict.c index fb0892586..ff2bb2d70 100644 --- a/tests/bigdict.c +++ b/tests/bigdict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/checkTag.c b/tests/checkTag.c index f6c5e9766..26871ed0f 100644 --- a/tests/checkTag.c +++ b/tests/checkTag.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/check_size.py b/tests/check_size.py new file mode 100755 index 000000000..028b0a9c0 --- /dev/null +++ b/tests/check_size.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# ################################################################ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ + +import os +import subprocess +import sys + +if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} FILE SIZE_LIMIT") + sys.exit(1) + +file = sys.argv[1] +limit = int(sys.argv[2]) + +if not os.path.exists(file): + print(f"{file} does not exist") + sys.exit(1) + +size = os.path.getsize(file) + +if size > limit: + print(f"file {file} is {size} bytes, which is greater than the limit of {limit} bytes") + sys.exit(1) diff --git a/tests/cli-tests/README.md b/tests/cli-tests/README.md index cdf9b8e71..7ca07c3e9 100644 --- a/tests/cli-tests/README.md +++ b/tests/cli-tests/README.md @@ -45,6 +45,16 @@ Examples: ./run.py --preserve --verbose basic/help.sh ``` +### Updating exact output + +If a test is failing because a `.stderr.exact` or `.stdout.exact` no longer matches, you can re-run the tests with `--set-exact-output` and the correct output will be written. + +Example: +``` +./run.py --set-exact-output +./run.py basic/help.sh --set-exact-output +``` + ## Writing a test Test cases are arbitrary executables, and can be written in any language, but are generally shell scripts. diff --git a/tests/cli-tests/basic/help.sh.stdout.glob b/tests/cli-tests/basic/help.sh.stdout.glob index bfb0f7753..5580dc669 100644 --- a/tests/cli-tests/basic/help.sh.stdout.glob +++ b/tests/cli-tests/basic/help.sh.stdout.glob @@ -1,25 +1,34 @@ + zstd -h -Usage: zstd *OPTION*... *FILE*... *-o file* -Compress or uncompress FILEs (with no FILE or when FILE is `-`, read from standard input). +Compress or decompress the INPUT file(s); reads from STDIN if INPUT is `-` or not provided. - -o file result stored into `file` (only 1 output file) - -1 .. -19 compression level (faster .. better; default: 3) - -d, --decompress decompression - -f, --force disable input and output checks. Allows overwriting existing files, - input from console, output to stdout, operating on links, - block devices, etc. During decompression and when the output - destination is stdout, pass-through unrecognized formats as-is. - --rm remove source file(s) after successful de/compression - -k, --keep preserve source file(s) (default) - -D DICT use DICT as Dictionary for compression or decompression - -h display usage and exit - -H,--help display long help and exit +Usage: zstd *OPTIONS...* *INPUT... | -* *-o OUTPUT* + +Options: + -o OUTPUT Write output to a single file, OUTPUT. + -k, --keep Preserve INPUT file(s). *Default* + --rm Remove INPUT file(s) after successful (de)compression. + + -# Desired compression level, where `#` is a number between 1 and 19; + lower numbers provide faster compression, higher numbers yield + better compression ratios. *Default: 3* + + -d, --decompress Perform decompression. + -D DICT Use DICT as the dictionary for compression or decompression. + + -f, --force Disable input and output checks. Allows overwriting existing files, + receiving input from the console, printing ouput to STDOUT, and + operating on links, block devices, etc. Unrecognized formats will be + passed-through through as-is. + + -h Display short usage and exit. + -H, --help Display full help and exit. + -V, --version Display the program version and exit. + zstd -H ... -Advanced options : +Advanced options: ... + zstd --help ... -Advanced options : +Advanced options: ... diff --git a/tests/cli-tests/basic/version.sh.stdout.glob b/tests/cli-tests/basic/version.sh.stdout.glob index 54968fa41..4cc9fb9b2 100644 --- a/tests/cli-tests/basic/version.sh.stdout.glob +++ b/tests/cli-tests/basic/version.sh.stdout.glob @@ -1,2 +1,2 @@ -*** zstd command line interface *-bits v1.*.*, by Yann Collet *** -*** zstd command line interface *-bits v1.*.*, by Yann Collet *** +*** Zstandard CLI (*-bit) v1.*.*, by Yann Collet *** +*** Zstandard CLI (*-bit) v1.*.*, by Yann Collet *** diff --git a/tests/cli-tests/compression/levels.sh b/tests/cli-tests/compression/levels.sh index 4837790ce..47ac2029f 100755 --- a/tests/cli-tests/compression/levels.sh +++ b/tests/cli-tests/compression/levels.sh @@ -10,11 +10,9 @@ zstd --fast=10 file -o file-f10.zst zstd --fast=1 file -o file-f1.zst zstd -1 file -o file-1.zst zstd -19 file -o file-19.zst -zstd -22 --ultra file -o file-22.zst -zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst +zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst -cmp_size -ne file-19.zst file-22.zst cmp_size -lt file-19.zst file-1.zst cmp_size -lt file-1.zst file-f1.zst cmp_size -lt file-f1.zst file-f10.zst diff --git a/tests/cli-tests/compression/levels.sh.stderr.exact b/tests/cli-tests/compression/levels.sh.stderr.exact index cb00433e6..5664f854b 100644 --- a/tests/cli-tests/compression/levels.sh.stderr.exact +++ b/tests/cli-tests/compression/levels.sh.stderr.exact @@ -6,11 +6,9 @@ zstd --fast=10 file -o file-f10.zst zstd --fast=1 file -o file-f1.zst zstd -1 file -o file-1.zst zstd -19 file -o file-19.zst -zstd -22 --ultra file -o file-22.zst -zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst +zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst -cmp_size -ne file-19.zst file-22.zst cmp_size -lt file-19.zst file-1.zst cmp_size -lt file-1.zst file-f1.zst cmp_size -lt file-f1.zst file-f10.zst diff --git a/tests/cli-tests/file-stat/compress-file-to-file.sh b/tests/cli-tests/file-stat/compress-file-to-file.sh new file mode 100755 index 000000000..949f34ff1 --- /dev/null +++ b/tests/cli-tests/file-stat/compress-file-to-file.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +set -e + +datagen > file + +zstd file -q --trace-file-stat -o file.zst +zstd -tq file.zst diff --git a/tests/cli-tests/file-stat/compress-file-to-file.sh.stderr.exact b/tests/cli-tests/file-stat/compress-file-to-file.sh.stderr.exact new file mode 100644 index 000000000..34bb704c7 --- /dev/null +++ b/tests/cli-tests/file-stat/compress-file-to-file.sh.stderr.exact @@ -0,0 +1,36 @@ +Trace:FileStat: > UTIL_isLink(file) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(2) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_getFileSize(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: < 65537 +Trace:FileStat: > UTIL_isDirectory(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_isSameFile(file, file.zst) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(file.zst) +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(file.zst) +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 1 +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_getFileSize(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: < 65537 +Trace:FileStat: > UTIL_utime(file.zst) +Trace:FileStat: < 0 diff --git a/tests/cli-tests/file-stat/compress-file-to-stdout.sh b/tests/cli-tests/file-stat/compress-file-to-stdout.sh new file mode 100755 index 000000000..99ebfc46f --- /dev/null +++ b/tests/cli-tests/file-stat/compress-file-to-stdout.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +set -e + +datagen > file + +zstd file -cq --trace-file-stat > file.zst +zstd -tq file.zst diff --git a/tests/cli-tests/file-stat/compress-file-to-stdout.sh.stderr.exact b/tests/cli-tests/file-stat/compress-file-to-stdout.sh.stderr.exact new file mode 100644 index 000000000..e01922f84 --- /dev/null +++ b/tests/cli-tests/file-stat/compress-file-to-stdout.sh.stderr.exact @@ -0,0 +1,22 @@ +Trace:FileStat: > UTIL_isLink(file) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(1) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_getFileSize(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: < 65537 +Trace:FileStat: > UTIL_isDirectory(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_isRegularFile(/*stdout*\) +Trace:FileStat: > UTIL_stat(/*stdout*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_getFileSize(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: < 65537 diff --git a/tests/cli-tests/file-stat/compress-stdin-to-file.sh b/tests/cli-tests/file-stat/compress-stdin-to-file.sh new file mode 100755 index 000000000..837946198 --- /dev/null +++ b/tests/cli-tests/file-stat/compress-stdin-to-file.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +set -e + +datagen > file + +zstd < file -q --trace-file-stat -o file.zst +zstd -tq file.zst diff --git a/tests/cli-tests/file-stat/compress-stdin-to-file.sh.stderr.exact b/tests/cli-tests/file-stat/compress-stdin-to-file.sh.stderr.exact new file mode 100644 index 000000000..dce9005f4 --- /dev/null +++ b/tests/cli-tests/file-stat/compress-stdin-to-file.sh.stderr.exact @@ -0,0 +1,28 @@ +Trace:FileStat: > UTIL_isConsole(0) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(2) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_getFileSize(/*stdin*\) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < -1 +Trace:FileStat: > UTIL_isDirectory(/*stdin*\) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isSameFile(/*stdin*\, file.zst) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(file.zst) +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(file.zst) +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 1 +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_getFileSize(/*stdin*\) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < -1 diff --git a/tests/cli-tests/file-stat/compress-stdin-to-stdout.sh b/tests/cli-tests/file-stat/compress-stdin-to-stdout.sh new file mode 100755 index 000000000..64f4b0362 --- /dev/null +++ b/tests/cli-tests/file-stat/compress-stdin-to-stdout.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +set -e + +datagen > file + +zstd < file -cq --trace-file-stat > file.zst +zstd -tq file.zst diff --git a/tests/cli-tests/file-stat/compress-stdin-to-stdout.sh.stderr.exact b/tests/cli-tests/file-stat/compress-stdin-to-stdout.sh.stderr.exact new file mode 100644 index 000000000..d47bdcf09 --- /dev/null +++ b/tests/cli-tests/file-stat/compress-stdin-to-stdout.sh.stderr.exact @@ -0,0 +1,20 @@ +Trace:FileStat: > UTIL_isConsole(0) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(1) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_getFileSize(/*stdin*\) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < -1 +Trace:FileStat: > UTIL_isDirectory(/*stdin*\) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(/*stdout*\) +Trace:FileStat: > UTIL_stat(/*stdout*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_getFileSize(/*stdin*\) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < -1 diff --git a/tests/cli-tests/file-stat/decompress-file-to-file.sh b/tests/cli-tests/file-stat/decompress-file-to-file.sh new file mode 100755 index 000000000..3e08c247b --- /dev/null +++ b/tests/cli-tests/file-stat/decompress-file-to-file.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +datagen | zstd -q > file.zst + +zstd -dq --trace-file-stat file.zst diff --git a/tests/cli-tests/file-stat/decompress-file-to-file.sh.stderr.exact b/tests/cli-tests/file-stat/decompress-file-to-file.sh.stderr.exact new file mode 100644 index 000000000..a2d7ad282 --- /dev/null +++ b/tests/cli-tests/file-stat/decompress-file-to-file.sh.stderr.exact @@ -0,0 +1,30 @@ +Trace:FileStat: > UTIL_isLink(file.zst) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(1) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(2) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isDirectory(file.zst) +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 1 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_isSameFile(file.zst, file) +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_utime(file) +Trace:FileStat: < 0 diff --git a/tests/cli-tests/file-stat/decompress-file-to-stdout.sh b/tests/cli-tests/file-stat/decompress-file-to-stdout.sh new file mode 100755 index 000000000..518c2a950 --- /dev/null +++ b/tests/cli-tests/file-stat/decompress-file-to-stdout.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +datagen | zstd -q > file.zst + +zstd -dcq --trace-file-stat file.zst > file diff --git a/tests/cli-tests/file-stat/decompress-file-to-stdout.sh.stderr.exact b/tests/cli-tests/file-stat/decompress-file-to-stdout.sh.stderr.exact new file mode 100644 index 000000000..87fc5e97c --- /dev/null +++ b/tests/cli-tests/file-stat/decompress-file-to-stdout.sh.stderr.exact @@ -0,0 +1,14 @@ +Trace:FileStat: > UTIL_isLink(file.zst) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(1) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isDirectory(file.zst) +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 1 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_stat(file.zst) +Trace:FileStat: < 1 +Trace:FileStat: > UTIL_isRegularFile(/*stdout*\) +Trace:FileStat: > UTIL_stat(/*stdout*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 diff --git a/tests/cli-tests/file-stat/decompress-stdin-to-file.sh b/tests/cli-tests/file-stat/decompress-stdin-to-file.sh new file mode 100755 index 000000000..135d75524 --- /dev/null +++ b/tests/cli-tests/file-stat/decompress-stdin-to-file.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +datagen | zstd -q > file.zst + +zstd -dcq --trace-file-stat < file.zst -o file diff --git a/tests/cli-tests/file-stat/decompress-stdin-to-file.sh.stderr.exact b/tests/cli-tests/file-stat/decompress-stdin-to-file.sh.stderr.exact new file mode 100644 index 000000000..749fd395a --- /dev/null +++ b/tests/cli-tests/file-stat/decompress-stdin-to-file.sh.stderr.exact @@ -0,0 +1,20 @@ +Trace:FileStat: > UTIL_isConsole(0) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(2) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isDirectory(/*stdin*\) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isSameFile(/*stdin*\, file) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(file) +Trace:FileStat: > UTIL_stat(file) +Trace:FileStat: < 1 +Trace:FileStat: < 1 diff --git a/tests/cli-tests/file-stat/decompress-stdin-to-stdout.sh b/tests/cli-tests/file-stat/decompress-stdin-to-stdout.sh new file mode 100755 index 000000000..495f07b17 --- /dev/null +++ b/tests/cli-tests/file-stat/decompress-stdin-to-stdout.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +datagen | zstd -q > file.zst + +zstd -dcq --trace-file-stat < file.zst > file diff --git a/tests/cli-tests/file-stat/decompress-stdin-to-stdout.sh.stderr.exact b/tests/cli-tests/file-stat/decompress-stdin-to-stdout.sh.stderr.exact new file mode 100644 index 000000000..61487f61e --- /dev/null +++ b/tests/cli-tests/file-stat/decompress-stdin-to-stdout.sh.stderr.exact @@ -0,0 +1,12 @@ +Trace:FileStat: > UTIL_isConsole(0) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isConsole(1) +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isDirectory(/*stdin*\) +Trace:FileStat: > UTIL_stat(/*stdin*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 +Trace:FileStat: > UTIL_isRegularFile(/*stdout*\) +Trace:FileStat: > UTIL_stat(/*stdout*\) +Trace:FileStat: < 0 +Trace:FileStat: < 0 diff --git a/tests/cli-tests/progress/no-progress.sh.stderr.glob b/tests/cli-tests/progress/no-progress.sh.stderr.glob index f07ad3803..b4262517a 100644 --- a/tests/cli-tests/progress/no-progress.sh.stderr.glob +++ b/tests/cli-tests/progress/no-progress.sh.stderr.glob @@ -55,36 +55,36 @@ decompress 2 files args = --no-progress --fake-stderr-is-console -v compress file to file -*zstd* +*Zstandard CLI* hello*hello.zst* compress pipe to pipe -*zstd* +*Zstandard CLI* *stdin*stdout* compress pipe to file -*zstd* +*Zstandard CLI* *stdin*hello.zst* compress file to pipe -*zstd* +*Zstandard CLI* *hello*stdout* compress 2 files -*zstd* +*Zstandard CLI* *hello*hello.zst* *world*world.zst* 2 files compressed* decompress file to file -*zstd* +*Zstandard CLI* hello.zst* decompress pipe to pipe -*zstd* +*Zstandard CLI* *stdin* decompress pipe to file -*zstd* +*Zstandard CLI* *stdin* decompress file to pipe -*zstd* +*Zstandard CLI* hello.zst* decompress 2 files -*zstd* +*Zstandard CLI* hello.zst* world.zst* 2 files decompressed* diff --git a/tests/cli-tests/run.py b/tests/cli-tests/run.py index d726fba7f..8b4e21a53 100755 --- a/tests/cli-tests/run.py +++ b/tests/cli-tests/run.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -209,6 +209,7 @@ class Options: preserve: bool, scratch_dir: str, test_dir: str, + set_exact_output: bool, ) -> None: self.env = env self.timeout = timeout @@ -216,6 +217,7 @@ class Options: self.preserve = preserve self.scratch_dir = scratch_dir self.test_dir = test_dir + self.set_exact_output = set_exact_output class TestCase: @@ -335,7 +337,7 @@ class TestCase: self._test_stdin.close() self._test_stdin = None - def _check_output_exact(self, out_name: str, expected: bytes) -> None: + def _check_output_exact(self, out_name: str, expected: bytes, exact_name: str) -> None: """ Check the output named :out_name: for an exact match against the :expected: content. Saves the success and message. @@ -349,6 +351,10 @@ class TestCase: self._success[check_name] = False self._message[check_name] = f"{out_name} does not match!\n> diff expected actual\n{diff(expected, actual)}" + if self._opts.set_exact_output: + with open(exact_name, "wb") as f: + f.write(actual) + def _check_output_glob(self, out_name: str, expected: bytes) -> None: """ Check the output named :out_name: for a glob match against the :expected: glob. @@ -386,7 +392,7 @@ class TestCase: ignore_name = f"{self._test_file}.{out_name}.ignore" if os.path.exists(exact_name): - return self._check_output_exact(out_name, read_file(exact_name)) + return self._check_output_exact(out_name, read_file(exact_name), exact_name) elif os.path.exists(glob_name): return self._check_output_glob(out_name, read_file(glob_name)) elif os.path.exists(ignore_name): @@ -394,7 +400,7 @@ class TestCase: self._success[check_name] = True self._message[check_name] = f"{out_name} ignored!" else: - return self._check_output_exact(out_name, bytes()) + return self._check_output_exact(out_name, bytes(), exact_name) def _check_stderr(self) -> None: """Checks the stderr output against the expectation.""" @@ -678,6 +684,11 @@ if __name__ == "__main__": "Scratch directory located in TEST_DIR/scratch/." ) ) + parser.add_argument( + "--set-exact-output", + action="store_true", + help="Set stderr.exact and stdout.exact for all failing tests, unless .ignore or .glob already exists" + ) parser.add_argument( "tests", nargs="*", @@ -714,6 +725,7 @@ if __name__ == "__main__": preserve=args.preserve, test_dir=args.test_dir, scratch_dir=scratch_dir, + set_exact_output=args.set_exact_output, ) if len(args.tests) == 0: diff --git a/tests/datagencli.c b/tests/datagencli.c index 7300fdb76..09ec5e9ae 100644 --- a/tests/datagencli.c +++ b/tests/datagencli.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index d2b126f39..20ad5add0 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -136,7 +136,7 @@ static void RAND_genDist(U32* seed, BYTE* dist, double weight) BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */ while (i < DISTSIZE) { - size_t states = ((size_t)(weight * statesLeft)) + 1; + size_t states = ((size_t)(weight * (double)statesLeft)) + 1; size_t j; for (j = 0; j < states && i < DISTSIZE; j++, i++) { dist[i] = symb; @@ -662,11 +662,11 @@ generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, * ensure nice numbers */ U32 matchLen = MIN_SEQ_LEN + - ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i))); + ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i))); U32 literalLen = (RAND(seed) & 7) ? ROUND(RAND_exp(seed, - literalsSize / + (double)literalsSize / (double)(numSequences - i))) : 0; /* actual offset, code to send, and point to copy up to when shifting diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c new file mode 100644 index 000000000..8ae76d519 --- /dev/null +++ b/tests/external_matchfinder.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) Yann Collet, Meta Platforms, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "external_matchfinder.h" +#include +#include "zstd_compress_internal.h" + +#define HSIZE 1024 +static U32 const HLOG = 10; +static U32 const MLS = 4; +static U32 const BADIDX = 0xffffffff; + +static size_t simpleExternalMatchFinder( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +) { + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + const BYTE* anchor = istart; + size_t seqCount = 0; + U32 hashTable[HSIZE]; + + (void)externalMatchState; + (void)dict; + (void)dictSize; + (void)outSeqsCapacity; + (void)compressionLevel; + + { int i; + for (i=0; i < HSIZE; i++) { + hashTable[i] = BADIDX; + } } + + while (ip + MLS < iend) { + size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); + U32 const matchIndex = hashTable[hash]; + hashTable[hash] = (U32)(ip - istart); + + if (matchIndex != BADIDX) { + const BYTE* const match = istart + matchIndex; + U32 const matchLen = (U32)ZSTD_count(ip, match, iend); + if (matchLen >= ZSTD_MINMATCH_MIN) { + U32 const litLen = (U32)(ip - anchor); + U32 const offset = (U32)(ip - match); + ZSTD_Sequence const seq = { + offset, litLen, matchLen, 0 + }; + + /* Note: it's crucial to stay within the window size! */ + if (offset <= windowSize) { + outSeqs[seqCount++] = seq; + ip += matchLen; + anchor = ip; + continue; + } + } + } + + ip++; + } + + { ZSTD_Sequence const finalSeq = { + 0, (U32)(iend - anchor), 0, 0 + }; + outSeqs[seqCount++] = finalSeq; + } + + return seqCount; +} + +size_t zstreamExternalMatchFinder( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +) { + EMF_testCase const testCase = *((EMF_testCase*)externalMatchState); + memset(outSeqs, 0, outSeqsCapacity); + + switch (testCase) { + case EMF_ZERO_SEQS: + return 0; + case EMF_ONE_BIG_SEQ: + outSeqs[0].offset = 0; + outSeqs[0].matchLength = 0; + outSeqs[0].litLength = (U32)(srcSize); + return 1; + case EMF_LOTS_OF_SEQS: + return simpleExternalMatchFinder( + externalMatchState, + outSeqs, outSeqsCapacity, + src, srcSize, + dict, dictSize, + compressionLevel, + windowSize + ); + case EMF_SMALL_ERROR: + return outSeqsCapacity + 1; + case EMF_BIG_ERROR: + default: + return ZSTD_EXTERNAL_MATCHFINDER_ERROR; + } +} diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h new file mode 100644 index 000000000..041f73e4d --- /dev/null +++ b/tests/external_matchfinder.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) Yann Collet, Meta Platforms, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef EXTERNAL_MATCHFINDER +#define EXTERNAL_MATCHFINDER + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + +/* See external_matchfinder.c for details on each test case */ +typedef enum { + EMF_ZERO_SEQS = 0, + EMF_ONE_BIG_SEQ = 1, + EMF_LOTS_OF_SEQS = 2, + EMF_BIG_ERROR = 3, + EMF_SMALL_ERROR = 4 +} EMF_testCase; + +size_t zstreamExternalMatchFinder( + void* externalMatchState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +); + +#endif // EXTERNAL_MATCHFINDER diff --git a/tests/fullbench.c b/tests/fullbench.c index b55ff767f..25c6d2b40 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 912348c38..35a19dd89 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/block_decompress.c b/tests/fuzz/block_decompress.c index bdbf76916..e4767b3f5 100644 --- a/tests/fuzz/block_decompress.c +++ b/tests/fuzz/block_decompress.c @@ -1,5 +1,5 @@ /** - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/block_round_trip.c b/tests/fuzz/block_round_trip.c index 46a84c7b9..54012a1da 100644 --- a/tests/fuzz/block_round_trip.c +++ b/tests/fuzz/block_round_trip.c @@ -1,5 +1,5 @@ /** - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/decompress_dstSize_tooSmall.c b/tests/fuzz/decompress_dstSize_tooSmall.c index 3f7607b29..8ad3ea1c9 100644 --- a/tests/fuzz/decompress_dstSize_tooSmall.c +++ b/tests/fuzz/decompress_dstSize_tooSmall.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c index 33c58c8bd..8dea3e560 100644 --- a/tests/fuzz/dictionary_decompress.c +++ b/tests/fuzz/dictionary_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/dictionary_loader.c b/tests/fuzz/dictionary_loader.c index 5b60bc484..1ac22740f 100644 --- a/tests/fuzz/dictionary_loader.c +++ b/tests/fuzz/dictionary_loader.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c index 0b20e8d67..6dd78c3f3 100644 --- a/tests/fuzz/dictionary_round_trip.c +++ b/tests/fuzz/dictionary_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/dictionary_stream_round_trip.c b/tests/fuzz/dictionary_stream_round_trip.c index 9af712f0e..3ebbd843b 100644 --- a/tests/fuzz/dictionary_stream_round_trip.c +++ b/tests/fuzz/dictionary_stream_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fse_read_ncount.c b/tests/fuzz/fse_read_ncount.c index c32386099..29e19442a 100644 --- a/tests/fuzz/fse_read_ncount.c +++ b/tests/fuzz/fse_read_ncount.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h index 6f3fb2994..d1e439fef 100644 --- a/tests/fuzz/fuzz.h +++ b/tests/fuzz/fuzz.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 17eac4f6f..03ffeeefd 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c index eae8ee4b6..a93e8ba95 100644 --- a/tests/fuzz/fuzz_data_producer.c +++ b/tests/fuzz/fuzz_data_producer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h index 62771a9f8..8ca501f9b 100644 --- a/tests/fuzz/fuzz_data_producer.h +++ b/tests/fuzz/fuzz_data_producer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz_helpers.c b/tests/fuzz/fuzz_helpers.c index 61c0debe5..1b6ad9736 100644 --- a/tests/fuzz/fuzz_helpers.c +++ b/tests/fuzz/fuzz_helpers.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h index c180478e9..aaf4c1df4 100644 --- a/tests/fuzz/fuzz_helpers.h +++ b/tests/fuzz/fuzz_helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/huf_decompress.c b/tests/fuzz/huf_decompress.c index fea09fc93..13f040a17 100644 --- a/tests/fuzz/huf_decompress.c +++ b/tests/fuzz/huf_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/huf_round_trip.c b/tests/fuzz/huf_round_trip.c index 32b08d6d5..0993ecd14 100644 --- a/tests/fuzz/huf_round_trip.c +++ b/tests/fuzz/huf_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/raw_dictionary_round_trip.c b/tests/fuzz/raw_dictionary_round_trip.c index 0e6517654..7ceab2be6 100644 --- a/tests/fuzz/raw_dictionary_round_trip.c +++ b/tests/fuzz/raw_dictionary_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c index e6d2dec64..550c65d86 100644 --- a/tests/fuzz/regression_driver.c +++ b/tests/fuzz/regression_driver.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/seekable_roundtrip.c b/tests/fuzz/seekable_roundtrip.c index dcdcaae11..6f0aa288f 100644 --- a/tests/fuzz/seekable_roundtrip.c +++ b/tests/fuzz/seekable_roundtrip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index cc872a081..ec8fef488 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/simple_compress.c b/tests/fuzz/simple_compress.c index 8e6980b35..c9fea22d3 100644 --- a/tests/fuzz/simple_compress.c +++ b/tests/fuzz/simple_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c index dfff11cbc..ce5f9f098 100644 --- a/tests/fuzz/simple_decompress.c +++ b/tests/fuzz/simple_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 9da986bc9..23a805af2 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index 86a39b8c9..0254d06ec 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index 719eac48d..8a28907b6 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/zstd_frame_info.c b/tests/fuzz/zstd_frame_info.c index 9ce645d15..95dbdd49a 100644 --- a/tests/fuzz/zstd_frame_info.c +++ b/tests/fuzz/zstd_frame_info.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 08ce70dd7..d5210627e 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h index 9fbefdc72..a4cfe321b 100644 --- a/tests/fuzz/zstd_helpers.h +++ b/tests/fuzz/zstd_helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 879e537bc..3ad8ced5e 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -82,8 +82,8 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; void FUZ_bug976(void); void FUZ_bug976(void) { /* these constants shall not depend on MIN() macro */ - assert(ZSTD_HASHLOG_MAX < 31); - assert(ZSTD_CHAINLOG_MAX < 31); + DEBUG_STATIC_ASSERT(ZSTD_HASHLOG_MAX < 31); + DEBUG_STATIC_ASSERT(ZSTD_CHAINLOG_MAX < 31); } @@ -118,23 +118,24 @@ static U32 FUZ_highbit32(U32 v32) /*============================================= * Test macros =============================================*/ -#define CHECK_Z(f) { \ - size_t const err = f; \ - if (ZSTD_isError(err)) { \ - DISPLAY("Error => %s : %s ", \ - #f, ZSTD_getErrorName(err)); \ - exit(1); \ +#define CHECK(fn) { if(!(fn)) { DISPLAYLEVEL(1, "Error : test (%s) failed \n", #fn); exit(1); } } + +#define CHECK_Z(f) { \ + size_t const err = f; \ + if (ZSTD_isError(err)) { \ + DISPLAY("Error => %s : %s ", \ + #f, ZSTD_getErrorName(err)); \ + exit(1); \ } } -#define CHECK_VAR(var, fn) var = fn; if (ZSTD_isError(var)) { DISPLAYLEVEL(1, "%s : fails : %s \n", #fn, ZSTD_getErrorName(var)); goto _output_error; } +#define CHECK_VAR(var, fn) var = fn; if (ZSTD_isError(var)) { DISPLAYLEVEL(1, "%s : fails : %s \n", #fn, ZSTD_getErrorName(var)); exit(1); } #define CHECK_NEWV(var, fn) size_t const CHECK_VAR(var, fn) -#define CHECK(fn) { CHECK_NEWV(__err, fn); } #define CHECKPLUS(var, fn, more) { CHECK_NEWV(var, fn); more; } #define CHECK_OP(op, lhs, rhs) { \ if (!((lhs) op (rhs))) { \ DISPLAY("Error L%u => FAILED %s %s %s ", __LINE__, #lhs, #op, #rhs); \ - goto _output_error; \ + exit(1); \ } \ } #define CHECK_EQ(lhs, rhs) CHECK_OP(==, lhs, rhs) @@ -338,6 +339,7 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, } #ifdef ZSTD_MULTITHREAD + typedef struct { ZSTD_CCtx* cctx; ZSTD_threadPool* pool; @@ -429,8 +431,8 @@ static int threadPoolTests(void) { ZSTD_pthread_create(&t1, NULL, threadPoolTests_compressionJob, &p1); ZSTD_pthread_create(&t2, NULL, threadPoolTests_compressionJob, &p2); - ZSTD_pthread_join(t1, NULL); - ZSTD_pthread_join(t2, NULL); + ZSTD_pthread_join(t1); + ZSTD_pthread_join(t2); assert(!memcmp(decodedBuffer, decodedBuffer2, CNBuffSize)); free(decodedBuffer2); @@ -461,6 +463,101 @@ _output_error: * Unit tests =============================================*/ +static void test_compressBound(unsigned tnb) +{ + DISPLAYLEVEL(3, "test%3u : compressBound : ", tnb); + + /* check ZSTD_compressBound == ZSTD_COMPRESSBOUND + * for a large range of known valid values */ + DEBUG_STATIC_ASSERT(sizeof(size_t) >= 4); + { int s; + for (s=0; s<30; s++) { + size_t const w = (size_t)1 << s; + CHECK_EQ(ZSTD_compressBound(w), ZSTD_COMPRESSBOUND(w)); + } } + + // Ensure error if srcSize too big + { size_t const w = ZSTD_MAX_INPUT_SIZE + 1; + CHECK(ZSTD_isError(ZSTD_compressBound(w))); /* must fail */ + CHECK_EQ(ZSTD_COMPRESSBOUND(w), 0); + } + + DISPLAYLEVEL(3, "OK \n"); +} + +static void test_decompressBound(unsigned tnb) +{ + DISPLAYLEVEL(3, "test%3u : decompressBound : ", tnb); + + // Simple compression, with size : should provide size; + { const char example[] = "abcd"; + char cBuffer[ZSTD_COMPRESSBOUND(sizeof(example))]; + size_t const cSize = ZSTD_compress(cBuffer, sizeof(cBuffer), example, sizeof(example), 0); + CHECK_Z(cSize); + CHECK_EQ(ZSTD_decompressBound(cBuffer, cSize), (unsigned long long)sizeof(example)); + } + + // Simple small compression without size : should provide 1 block size + { char cBuffer[ZSTD_COMPRESSBOUND(0)]; + ZSTD_outBuffer out = { cBuffer, sizeof(cBuffer), 0 }; + ZSTD_inBuffer in = { NULL, 0, 0 }; + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + assert(cctx); + CHECK_Z( ZSTD_initCStream(cctx, 0) ); + CHECK_Z( ZSTD_compressStream(cctx, &out, &in) ); + CHECK_EQ( ZSTD_endStream(cctx, &out), 0 ); + CHECK_EQ( ZSTD_decompressBound(cBuffer, out.pos), ZSTD_BLOCKSIZE_MAX ); + ZSTD_freeCCtx(cctx); + } + + // Attempt to overflow 32-bit intermediate multiplication result + // This requires dBound >= 4 GB, aka 2^32. + // This requires 2^32 / 2^17 = 2^15 blocks + // => create 2^15 blocks (can be empty, or just 1 byte). + { const char input[] = "a"; + size_t const nbBlocks = (1 << 15) + 1; + size_t blockNb; + size_t const outCapacity = 1 << 18; // large margin + char* const outBuffer = malloc (outCapacity); + ZSTD_outBuffer out = { outBuffer, outCapacity, 0 }; + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + assert(cctx); + assert(outBuffer); + CHECK_Z( ZSTD_initCStream(cctx, 0) ); + for (blockNb=0; blockNb 0x100000000LLU /* 4 GB */ ); + ZSTD_freeCCtx(cctx); + free(outBuffer); + } + + DISPLAYLEVEL(3, "OK \n"); +} + +static void test_setCParams(unsigned tnb) +{ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_compressionParameters cparams; + assert(cctx); + + DISPLAYLEVEL(3, "test%3u : ZSTD_CCtx_setCParams : ", tnb); + + /* valid cparams */ + cparams = ZSTD_getCParams(1, 0, 0); + CHECK_Z(ZSTD_CCtx_setCParams(cctx, cparams)); + + /* invalid cparams (must fail) */ + cparams.windowLog = 99; + CHECK(ZSTD_isError(ZSTD_CCtx_setCParams(cctx, cparams))); + + free(cctx); + DISPLAYLEVEL(3, "OK \n"); +} + static int basicUnitTests(U32 const seed, double compressibility) { size_t const CNBuffSize = 5 MB; @@ -507,6 +604,12 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "%u (OK) \n", vn); } + test_compressBound(testNb++); + + test_decompressBound(testNb++); + + test_setCParams(testNb++); + DISPLAYLEVEL(3, "test%3u : ZSTD_adjustCParams : ", testNb++); { ZSTD_compressionParameters params; @@ -1579,7 +1682,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1); - CHECK( ZSTD_compressBegin(staticCCtx, 1) ); + CHECK_Z( ZSTD_compressBegin(staticCCtx, 1) ); DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : use CStream on CCtx-sized static context (should fail) : ", testNb++); @@ -1647,8 +1750,8 @@ static int basicUnitTests(U32 const seed, double compressibility) testResult = 1; goto _end; } - CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_nbWorkers, 2) ); - CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_compressionLevel, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_nbWorkers, 2) ); + CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_compressionLevel, 1) ); DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3u : compress %u bytes with 2 threads : ", testNb++, (unsigned)CNBuffSize); @@ -1678,9 +1781,9 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : compress -T2 with checksum : ", testNb++); - CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_checksumFlag, 1) ); - CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_contentSizeFlag, 1) ); - CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_overlapLog, 3) ); + CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_checksumFlag, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_contentSizeFlag, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_overlapLog, 3) ); CHECK_VAR(cSize, ZSTD_compress2(mtctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize) ); @@ -1699,11 +1802,11 @@ static int basicUnitTests(U32 const seed, double compressibility) ZSTD_DCtx* const dctx = ZSTD_createDCtx(); char out[32]; if (cctx == NULL || dctx == NULL) goto _output_error; - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) ); CHECK_VAR(cSize, ZSTD_compress2(cctx, out, sizeof(out), NULL, 0) ); DISPLAYLEVEL(3, "OK (%u bytes)\n", (unsigned)cSize); - CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 10) ); + CHECK_Z( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 10) ); { char const* outPtr = out; ZSTD_inBuffer inBuffer = { outPtr, cSize, 0 }; ZSTD_outBuffer outBuffer = { NULL, 0, 0 }; @@ -1718,9 +1821,9 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : compress with block splitting : ", testNb++) { ZSTD_CCtx* cctx = ZSTD_createCCtx(); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_useBlockSplitter, ZSTD_ps_enable) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_useBlockSplitter, ZSTD_ps_enable) ); cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); - CHECK(cSize); + CHECK_Z(cSize); ZSTD_freeCCtx(cctx); } DISPLAYLEVEL(3, "OK \n"); @@ -1728,13 +1831,13 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++) { ZSTD_CCtx* cctx = ZSTD_createCCtx(); size_t cSize1, cSize2; - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2) ); cSize1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); - CHECK(cSize1); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_literalCompressionMode, ZSTD_ps_disable) ); + CHECK_Z(cSize1); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_literalCompressionMode, ZSTD_ps_disable) ); cSize2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); - CHECK(cSize2); + CHECK_Z(cSize2); CHECK_LT(cSize1, cSize2); ZSTD_freeCCtx(cctx); } @@ -1745,10 +1848,10 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Set rsyncable and don't give the ZSTD_compressBound(CNBuffSize) so * ZSTDMT is forced to not take the shortcut. */ - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) ); - CHECK( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) ); ZSTD_freeCCtx(cctx); } DISPLAYLEVEL(3, "OK \n"); @@ -1758,22 +1861,22 @@ static int basicUnitTests(U32 const seed, double compressibility) int const jobSize = 512 KB; int value; /* Check that the overlap log and job size are unset. */ - CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); CHECK_EQ(value, 0); - CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); CHECK_EQ(value, 0); /* Set and check the overlap log and job size. */ - CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) ); - CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, jobSize) ); - CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_Z( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) ); + CHECK_Z( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, jobSize) ); + CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); CHECK_EQ(value, 5); - CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); CHECK_EQ(value, jobSize); /* Set the number of workers and check the overlap log and job size. */ - CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) ); - CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_Z( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) ); + CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); CHECK_EQ(value, 5); - CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); CHECK_EQ(value, jobSize); ZSTD_freeCCtxParams(params); } @@ -1884,8 +1987,8 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : load dictionary into context : ", testNb++); - CHECK( ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2) ); - CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0) ); /* Begin_usingDict implies unknown srcSize, so match that */ + CHECK_Z( ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2) ); + CHECK_Z( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0) ); /* Begin_usingDict implies unknown srcSize, so match that */ DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : compress with flat dictionary : ", testNb++); @@ -1945,8 +2048,8 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : check content size on duplicated context : ", testNb++); { size_t const testSize = CNBuffSize / 3; - CHECK( ZSTD_compressBegin(ctxOrig, ZSTD_defaultCLevel()) ); - CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) ); + CHECK_Z( ZSTD_compressBegin(ctxOrig, ZSTD_defaultCLevel()) ); + CHECK_Z( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) ); CHECK_VAR(cSize, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize), (const char*)CNBuffer + dictSize, testSize) ); @@ -2780,7 +2883,7 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t const wrongSrcSize = (srcSize + 1000); ZSTD_parameters params = ZSTD_getParams(1, wrongSrcSize, 0); params.fParams.contentSizeFlag = 1; - CHECK( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) ); + CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) ); { size_t const result = ZSTD_compressEnd(cctx, decodedBuffer, CNBuffSize, CNBuffer, srcSize); if (!ZSTD_isError(result)) goto _output_error; if (ZSTD_getErrorCode(result) != ZSTD_error_srcSize_wrong) goto _output_error; @@ -2800,7 +2903,7 @@ static int basicUnitTests(U32 const seed, double compressibility) CNBuffer, srcSize, compressionLevel); if (ZSTD_isError(cSize_1pass)) goto _output_error; - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) ); { size_t const compressionResult = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize); @@ -2819,13 +2922,13 @@ static int basicUnitTests(U32 const seed, double compressibility) { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); DISPLAYLEVEL(3, "test%3i : parameters in order : ", testNb++); assert(cctx != NULL); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) ); { size_t const compressedSize = ZSTD_compress2(cctx, compressedBuffer, ZSTD_compressBound(inputSize), CNBuffer, inputSize); - CHECK(compressedSize); + CHECK_Z(compressedSize); cSize = compressedSize; xxh64 = XXH64(compressedBuffer, compressedSize, 0); } @@ -2835,13 +2938,13 @@ static int basicUnitTests(U32 const seed, double compressibility) { ZSTD_CCtx* cctx = ZSTD_createCCtx(); DISPLAYLEVEL(3, "test%3i : parameters disordered : ", testNb++); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) ); { size_t const result = ZSTD_compress2(cctx, compressedBuffer, ZSTD_compressBound(inputSize), CNBuffer, inputSize); - CHECK(result); + CHECK_Z(result); if (result != cSize) goto _output_error; /* must result in same compressed result, hence same size */ if (XXH64(compressedBuffer, result, 0) != xxh64) goto _output_error; /* must result in exactly same content, hence same hash */ DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)inputSize, (unsigned)result); @@ -2856,7 +2959,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : get dParameter bounds ", testNb++); { ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); - CHECK(bounds.error); + CHECK_Z(bounds.error); } DISPLAYLEVEL(3, "OK \n"); @@ -2890,7 +2993,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* basic block compression */ DISPLAYLEVEL(3, "test%3i : magic-less format test : ", testNb++); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) ); { ZSTD_inBuffer in = { CNBuffer, inputSize, 0 }; ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(inputSize), 0 }; size_t const result = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); @@ -2908,7 +3011,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : decompress of magic-less frame : ", testNb++); ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); - CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) ); + CHECK_Z( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) ); { ZSTD_frameHeader zfh; size_t const zfhrt = ZSTD_getFrameHeader_advanced(&zfh, compressedBuffer, cSize, ZSTD_f_zstd1_magicless); if (zfhrt != 0) goto _output_error; @@ -2930,7 +3033,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* basic block compression */ DISPLAYLEVEL(3, "test%3i : empty magic-less format test : ", testNb++); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) ); { ZSTD_inBuffer in = { CNBuffer, 0, 0 }; ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(0), 0 }; size_t const result = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); @@ -2942,7 +3045,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : decompress of empty magic-less frame : ", testNb++); ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); - CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) ); + CHECK_Z( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) ); /* one shot */ { size_t const result = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); if (result != 0) goto _output_error; @@ -2977,13 +3080,13 @@ static int basicUnitTests(U32 const seed, double compressibility) int check; if (ZSTD_isError(bounds.error)) continue; - CHECK(ZSTD_DCtx_getParameter(dctx, dParam, &value1)); + CHECK_Z(ZSTD_DCtx_getParameter(dctx, dParam, &value1)); value2 = (value1 != bounds.lowerBound) ? bounds.lowerBound : bounds.upperBound; - CHECK(ZSTD_DCtx_setParameter(dctx, dParam, value2)); - CHECK(ZSTD_DCtx_getParameter(dctx, dParam, &check)); + CHECK_Z(ZSTD_DCtx_setParameter(dctx, dParam, value2)); + CHECK_Z(ZSTD_DCtx_getParameter(dctx, dParam, &check)); if (check != value2) goto _output_error; - CHECK(ZSTD_DCtx_reset(dctx, ZSTD_reset_parameters)); - CHECK(ZSTD_DCtx_getParameter(dctx, dParam, &check)); + CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_parameters)); + CHECK_Z(ZSTD_DCtx_getParameter(dctx, dParam, &check)); if (check != value1) goto _output_error; } ZSTD_freeDCtx(dctx); @@ -3000,21 +3103,21 @@ static int basicUnitTests(U32 const seed, double compressibility) /* basic block compression */ DISPLAYLEVEL(3, "test%3i : Block compression test : ", testNb++); - CHECK( ZSTD_compressBegin(cctx, 5) ); - CHECK( ZSTD_getBlockSize(cctx) >= blockSize); + CHECK_Z( ZSTD_compressBegin(cctx, 5) ); + CHECK_Z( ZSTD_getBlockSize(cctx) >= blockSize); CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize) ); DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : Block decompression test : ", testNb++); - CHECK( ZSTD_decompressBegin(dctx) ); + CHECK_Z( ZSTD_decompressBegin(dctx) ); { CHECK_NEWV(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); if (r != blockSize) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); /* very long stream of block compression */ DISPLAYLEVEL(3, "test%3i : Huge block streaming compression test : ", testNb++); - CHECK( ZSTD_compressBegin(cctx, -199) ); /* we just want to quickly overflow internal U32 index */ - CHECK( ZSTD_getBlockSize(cctx) >= blockSize); + CHECK_Z( ZSTD_compressBegin(cctx, -199) ); /* we just want to quickly overflow internal U32 index */ + CHECK_Z( ZSTD_getBlockSize(cctx) >= blockSize); { U64 const toCompress = 5000000000ULL; /* > 4 GB */ U64 compressed = 0; while (compressed < toCompress) { @@ -3027,7 +3130,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* dictionary block compression */ DISPLAYLEVEL(3, "test%3i : Dictionary Block compression test : ", testNb++); - CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) ); + CHECK_Z( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) ); CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize)); RDG_genBuffer((char*)CNBuffer+dictSize+blockSize, blockSize, 0.0, 0.0, seed); /* create a non-compressible second block */ { CHECK_NEWV(r, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) ); /* for cctx history consistency */ @@ -3038,7 +3141,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : Dictionary Block decompression test : ", testNb++); - CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) ); + CHECK_Z( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) ); { CHECK_NEWV( r, ZSTD_decompressBlock(dctx, decodedBuffer, blockSize, compressedBuffer, cSize) ); if (r != blockSize) { DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize); @@ -3057,8 +3160,8 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : Block compression with CDict : ", testNb++); { ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 3); if (cdict==NULL) goto _output_error; - CHECK( ZSTD_compressBegin_usingCDict(cctx, cdict) ); - CHECK( ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize) ); + CHECK_Z( ZSTD_compressBegin_usingCDict(cctx, cdict) ); + CHECK_Z( ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize) ); ZSTD_freeCDict(cdict); } DISPLAYLEVEL(3, "OK \n"); @@ -3227,7 +3330,7 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t const bound = ZSTD_compressBound(_3BYTESTESTLENGTH); size_t nbSeq = 1; while (nbSeq <= maxNbSeq) { - CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, nbSeq * 3, 19)); + CHECK_Z(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, nbSeq * 3, 19)); /* Check every sequence for the first 100, then skip more rapidly. */ if (nbSeq < 100) { ++nbSeq; @@ -3256,7 +3359,7 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t const bound = ZSTD_compressBound(CNBuffSize); size_t size = 1; while (size <= CNBuffSize) { - CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, size, 3)); + CHECK_Z(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, size, 3)); /* Check every size for the first 100, then skip more rapidly. */ if (size < 100) { ++size; @@ -3291,7 +3394,7 @@ static int basicUnitTests(U32 const seed, double compressibility) void* const outputBuffer = malloc(outputSize); ZSTD_CCtx* const cctx = ZSTD_createCCtx(); if (!outputBuffer || !cctx) goto _output_error; - CHECK(ZSTD_compress_usingDict(cctx, outputBuffer, outputSize, CNBuffer, inputSize, dictBuffer, dictSize, 1)); + CHECK_Z(ZSTD_compress_usingDict(cctx, outputBuffer, outputSize, CNBuffer, inputSize, dictBuffer, dictSize, 1)); free(outputBuffer); ZSTD_freeCCtx(cctx); } @@ -3628,7 +3731,7 @@ static int longUnitTests(U32 const seed, double compressibility) while (approxIndex <= (maxIndex / 4) * 3) { CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); approxIndex += in.pos; - CHECK(in.pos == in.size); + CHECK_Z(in.pos == in.size); in.pos = 0; out.pos = 0; } @@ -3654,7 +3757,7 @@ static int longUnitTests(U32 const seed, double compressibility) while (approxIndex <= maxIndex) { CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); approxIndex += in.pos; - CHECK(in.pos == in.size); + CHECK_Z(in.pos == in.size); in.pos = 0; out.pos = 0; } @@ -3737,7 +3840,7 @@ static int longUnitTests(U32 const seed, double compressibility) RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed); RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed); - CHECK(cctx_params != NULL); + CHECK_Z(cctx_params != NULL); for (dictSize = CNBuffSize; dictSize; dictSize = dictSize >> 3) { DISPLAYLEVEL(3, "\n Testing with dictSize %u ", (U32)dictSize); @@ -3780,11 +3883,6 @@ _end: free(compressedBuffer); free(decodedBuffer); return testResult; - -_output_error: - testResult = 1; - DISPLAY("Error detected in Unit tests ! \n"); - goto _end; } diff --git a/tests/gzip/Makefile b/tests/gzip/Makefile index a50350f8d..cca310967 100644 --- a/tests/gzip/Makefile +++ b/tests/gzip/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/invalidDictionaries.c b/tests/invalidDictionaries.c index b71f7419f..66caa9e99 100644 --- a/tests/invalidDictionaries.c +++ b/tests/invalidDictionaries.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/legacy.c b/tests/legacy.c index ac4938fe5..3be386495 100644 --- a/tests/legacy.c +++ b/tests/legacy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/longmatch.c b/tests/longmatch.c index a171c0e4a..547b26190 100644 --- a/tests/longmatch.c +++ b/tests/longmatch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -36,27 +36,27 @@ compress(ZSTD_CStream *ctx, ZSTD_outBuffer out, const void *data, size_t size) int main(int argc, const char** argv) { ZSTD_CStream* ctx; - ZSTD_parameters params; - size_t rc; - unsigned windowLog; + unsigned windowLog = 18; (void)argc; (void)argv; /* Create stream */ - ctx = ZSTD_createCStream(); + ctx = ZSTD_createCCtx(); if (!ctx) { return 1; } /* Set parameters */ - memset(¶ms, 0, sizeof(params)); - params.cParams.windowLog = 18; - params.cParams.chainLog = 13; - params.cParams.hashLog = 14; - params.cParams.searchLog = 1; - params.cParams.minMatch = 7; - params.cParams.targetLength = 16; - params.cParams.strategy = ZSTD_fast; - windowLog = params.cParams.windowLog; - /* Initialize stream */ - rc = ZSTD_initCStream_advanced(ctx, NULL, 0, params, 0); - if (ZSTD_isError(rc)) { return 2; } + if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, windowLog))) + return 2; + if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, 13))) + return 2; + if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, 14))) + return 2; + if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, 1))) + return 2; + if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, 7))) + return 2; + if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, 16))) + return 2; + if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, ZSTD_fast))) + return 2; { U64 compressed = 0; const U64 toCompress = ((U64)1) << 33; @@ -97,5 +97,6 @@ int main(int argc, const char** argv) free(srcBuffer); free(dstBuffer); } + ZSTD_freeCCtx(ctx); return 0; } diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 756a43340..8971c65d6 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -566,10 +566,10 @@ resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_ double cs = 0., ds = 0., rt, cm = 0.; const double r1 = 1, r2 = 0.1, rtr = 0.5; double ret; - if(target.cSpeed) { cs = res.cSpeed / (double)target.cSpeed; } - if(target.dSpeed) { ds = res.dSpeed / (double)target.dSpeed; } - if(target.cMem != (U32)-1) { cm = (double)target.cMem / res.cMem; } - rt = ((double)srcSize / res.cSize); + if(target.cSpeed) { cs = (double)res.cSpeed / (double)target.cSpeed; } + if(target.dSpeed) { ds = (double)res.dSpeed / (double)target.dSpeed; } + if(target.cMem != (U32)-1) { cm = (double)target.cMem / (double)res.cMem; } + rt = ((double)srcSize / (double)res.cSize); ret = (MIN(1, cs) + MIN(1, ds) + MIN(1, cm))*r1 + rt * rtr + (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2; @@ -581,8 +581,8 @@ resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_ static double resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes) { - double normalizedCSpeedGain1 = ((double)result1.cSpeed / lvlRes.cSpeed) - 1; - double normalizedRatioGain1 = ((double)lvlRes.cSize / result1.cSize) - 1; + double normalizedCSpeedGain1 = ((double)result1.cSpeed / (double)lvlRes.cSpeed) - 1; + double normalizedRatioGain1 = ((double)lvlRes.cSize / (double)result1.cSize) - 1; if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) { return 0.0; } @@ -854,7 +854,7 @@ BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize) } { double const ratio = res.result.cSize ? - (double)srcSize / res.result.cSize : 0; + (double)srcSize / (double)res.result.cSize : 0; double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT; double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT; @@ -937,7 +937,7 @@ BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_t result, co } fprintf(f, "================================\n"); fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n", - (double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT); + (double)srcSize / (double)g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT); fprintf(f, "Overall Winner: \n"); @@ -977,7 +977,7 @@ BMK_print_cLevelEntry(FILE* f, const int cLevel, } /* print comment */ { double const ratio = result.cSize ? - (double)srcSize / result.cSize : 0; + (double)srcSize / (double)result.cSize : 0; double const cSpeedMBps = (double)result.cSpeed / MB_UNIT; double const dSpeedMBps = (double)result.dSpeed / MB_UNIT; @@ -1726,19 +1726,19 @@ static int allBench(BMK_benchResult_t* resultPtr, /* calculate uncertainty in compression / decompression runs */ if (benchres.cSpeed) { - U64 const loopDurationC = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed); + double const loopDurationC = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed); uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC); } if (benchres.dSpeed) { - U64 const loopDurationD = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed); + double const loopDurationD = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed); uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD); } /* optimistic assumption of benchres */ { BMK_benchResult_t resultMax = benchres; - resultMax.cSpeed = (unsigned long long)(resultMax.cSpeed * uncertaintyConstantC * VARIANCE); - resultMax.dSpeed = (unsigned long long)(resultMax.dSpeed * uncertaintyConstantD * VARIANCE); + resultMax.cSpeed = (unsigned long long)((double)resultMax.cSpeed * uncertaintyConstantC * VARIANCE); + resultMax.dSpeed = (unsigned long long)((double)resultMax.dSpeed * uncertaintyConstantD * VARIANCE); /* disregard infeasible results in feas mode */ /* disregard if resultMax < winner in infeas mode */ @@ -1850,8 +1850,8 @@ static int BMK_seed(winnerInfo_t* winners, if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) { /* Validate solution is "good enough" */ - double W_ratio = (double)buf.srcSize / testResult.cSize; - double O_ratio = (double)buf.srcSize / winners[cLevel].result.cSize; + double W_ratio = (double)buf.srcSize / (double)testResult.cSize; + double O_ratio = (double)buf.srcSize / (double)winners[cLevel].result.cSize; double W_ratioNote = log (W_ratio); double O_ratioNote = log (O_ratio); size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB); @@ -1864,11 +1864,11 @@ static int BMK_seed(winnerInfo_t* winners, double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); - double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log(testResult.cSpeed); - double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed); + double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log((double)testResult.cSpeed); + double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed); - double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log(testResult.dSpeed); - double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed); + double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log((double)testResult.dSpeed); + double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed); if (W_DMemUsed_note < O_DMemUsed_note) { /* uses too much Decompression memory for too little benefit */ diff --git a/tests/poolTests.c b/tests/poolTests.c index 08f31c069..9e62722bf 100644 --- a/tests/poolTests.c +++ b/tests/poolTests.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/rateLimiter.py b/tests/rateLimiter.py index 26293728e..3e65081e0 100755 --- a/tests/rateLimiter.py +++ b/tests/rateLimiter.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/Makefile b/tests/regression/Makefile index a440c6c94..ba8b43024 100644 --- a/tests/regression/Makefile +++ b/tests/regression/Makefile @@ -1,5 +1,5 @@ # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/config.c b/tests/regression/config.c index 30d0ca5e2..1d86fff23 100644 --- a/tests/regression/config.c +++ b/tests/regression/config.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/config.h b/tests/regression/config.h index dd8893743..a4b542a90 100644 --- a/tests/regression/config.h +++ b/tests/regression/config.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/data.c b/tests/regression/data.c index 341b02d3b..43f085f2c 100644 --- a/tests/regression/data.c +++ b/tests/regression/data.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/data.h b/tests/regression/data.h index e54e6a1dc..a4ee92072 100644 --- a/tests/regression/data.h +++ b/tests/regression/data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/levels.h b/tests/regression/levels.h index e98209d80..d15b12046 100644 --- a/tests/regression/levels.h +++ b/tests/regression/levels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/method.c b/tests/regression/method.c index 55b1154eb..f84a15ef3 100644 --- a/tests/regression/method.c +++ b/tests/regression/method.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/method.h b/tests/regression/method.h index 1a36a9399..8efdd33a0 100644 --- a/tests/regression/method.h +++ b/tests/regression/method.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/result.c b/tests/regression/result.c index 1f879c140..8ccb8751e 100644 --- a/tests/regression/result.c +++ b/tests/regression/result.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/result.h b/tests/regression/result.h index 197fa905d..8a761ea4d 100644 --- a/tests/regression/result.h +++ b/tests/regression/result.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/regression/results.csv b/tests/regression/results.csv index dee7d5721..188eea900 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -11,10 +11,10 @@ silesia.tar, level 6, compress silesia.tar, level 7, compress simple, 4576661 silesia.tar, level 9, compress simple, 4552899 silesia.tar, level 13, compress simple, 4502956 -silesia.tar, level 16, compress simple, 4360527 -silesia.tar, level 19, compress simple, 4266970 +silesia.tar, level 16, compress simple, 4360546 +silesia.tar, level 19, compress simple, 4265911 silesia.tar, uncompressed literals, compress simple, 4854086 -silesia.tar, uncompressed literals optimal, compress simple, 4266970 +silesia.tar, uncompressed literals optimal, compress simple, 4265911 silesia.tar, huffman literals, compress simple, 6179047 github.tar, level -5, compress simple, 52115 github.tar, level -3, compress simple, 45678 @@ -28,10 +28,10 @@ github.tar, level 6, compress github.tar, level 7, compress simple, 38110 github.tar, level 9, compress simple, 36760 github.tar, level 13, compress simple, 35501 -github.tar, level 16, compress simple, 40471 -github.tar, level 19, compress simple, 32149 +github.tar, level 16, compress simple, 40466 +github.tar, level 19, compress simple, 32276 github.tar, uncompressed literals, compress simple, 38831 -github.tar, uncompressed literals optimal, compress simple, 32149 +github.tar, uncompressed literals optimal, compress simple, 32276 github.tar, huffman literals, compress simple, 42560 silesia, level -5, compress cctx, 6857372 silesia, level -3, compress cctx, 6503412 @@ -45,8 +45,8 @@ silesia, level 6, compress silesia, level 7, compress cctx, 4566984 silesia, level 9, compress cctx, 4543018 silesia, level 13, compress cctx, 4493990 -silesia, level 16, compress cctx, 4359864 -silesia, level 19, compress cctx, 4296438 +silesia, level 16, compress cctx, 4360041 +silesia, level 19, compress cctx, 4296055 silesia, long distance mode, compress cctx, 4842075 silesia, multithreaded, compress cctx, 4842075 silesia, multithreaded long distance mode, compress cctx, 4842075 @@ -55,7 +55,7 @@ silesia, small hash log, compress silesia, small chain log, compress cctx, 4912197 silesia, explicit params, compress cctx, 4794052 silesia, uncompressed literals, compress cctx, 4842075 -silesia, uncompressed literals optimal, compress cctx, 4296438 +silesia, uncompressed literals optimal, compress cctx, 4296055 silesia, huffman literals, compress cctx, 6172202 silesia, multithreaded with advanced params, compress cctx, 4842075 github, level -5, compress cctx, 204407 @@ -80,11 +80,11 @@ github, level 7, compress github, level 7 with dict, compress cctx, 38755 github, level 9, compress cctx, 135122 github, level 9 with dict, compress cctx, 39398 -github, level 13, compress cctx, 132729 +github, level 13, compress cctx, 132878 github, level 13 with dict, compress cctx, 39948 -github, level 16, compress cctx, 132729 +github, level 16, compress cctx, 133209 github, level 16 with dict, compress cctx, 37568 -github, level 19, compress cctx, 132729 +github, level 19, compress cctx, 132879 github, level 19 with dict, compress cctx, 37567 github, long distance mode, compress cctx, 141069 github, multithreaded, compress cctx, 141069 @@ -94,7 +94,7 @@ github, small hash log, compress github, small chain log, compress cctx, 139242 github, explicit params, compress cctx, 140932 github, uncompressed literals, compress cctx, 136332 -github, uncompressed literals optimal, compress cctx, 132729 +github, uncompressed literals optimal, compress cctx, 132879 github, huffman literals, compress cctx, 175468 github, multithreaded with advanced params, compress cctx, 141069 silesia, level -5, zstdcli, 6857420 @@ -109,8 +109,8 @@ silesia, level 6, zstdcli, silesia, level 7, zstdcli, 4567032 silesia, level 9, zstdcli, 4543066 silesia, level 13, zstdcli, 4494038 -silesia, level 16, zstdcli, 4359912 -silesia, level 19, zstdcli, 4296486 +silesia, level 16, zstdcli, 4360089 +silesia, level 19, zstdcli, 4296103 silesia, long distance mode, zstdcli, 4833785 silesia, multithreaded, zstdcli, 4842123 silesia, multithreaded long distance mode, zstdcli, 4833785 @@ -134,14 +134,14 @@ silesia.tar, level 6, zstdcli, silesia.tar, level 7, zstdcli, 4578719 silesia.tar, level 9, zstdcli, 4552903 silesia.tar, level 13, zstdcli, 4502960 -silesia.tar, level 16, zstdcli, 4360531 -silesia.tar, level 19, zstdcli, 4266974 +silesia.tar, level 16, zstdcli, 4360550 +silesia.tar, level 19, zstdcli, 4265915 silesia.tar, no source size, zstdcli, 4854160 silesia.tar, long distance mode, zstdcli, 4845745 silesia.tar, multithreaded, zstdcli, 4854164 silesia.tar, multithreaded long distance mode, zstdcli, 4845745 silesia.tar, small window log, zstdcli, 7100701 -silesia.tar, small hash log, zstdcli, 6529289 +silesia.tar, small hash log, zstdcli, 6529264 silesia.tar, small chain log, zstdcli, 4917022 silesia.tar, explicit params, zstdcli, 4820713 silesia.tar, uncompressed literals, zstdcli, 5122571 @@ -170,11 +170,11 @@ github, level 7, zstdcli, github, level 7 with dict, zstdcli, 40745 github, level 9, zstdcli, 137122 github, level 9 with dict, zstdcli, 41393 -github, level 13, zstdcli, 134729 +github, level 13, zstdcli, 134878 github, level 13 with dict, zstdcli, 41900 -github, level 16, zstdcli, 134729 +github, level 16, zstdcli, 135209 github, level 16 with dict, zstdcli, 39577 -github, level 19, zstdcli, 134729 +github, level 19, zstdcli, 134879 github, level 19 with dict, zstdcli, 39576 github, long distance mode, zstdcli, 138332 github, multithreaded, zstdcli, 138332 @@ -184,7 +184,7 @@ github, small hash log, zstdcli, github, small chain log, zstdcli, 138341 github, explicit params, zstdcli, 136197 github, uncompressed literals, zstdcli, 167911 -github, uncompressed literals optimal, zstdcli, 159227 +github, uncompressed literals optimal, zstdcli, 154667 github, huffman literals, zstdcli, 144365 github, multithreaded with advanced params, zstdcli, 167911 github.tar, level -5, zstdcli, 52119 @@ -211,9 +211,9 @@ github.tar, level 9, zstdcli, github.tar, level 9 with dict, zstdcli, 36632 github.tar, level 13, zstdcli, 35505 github.tar, level 13 with dict, zstdcli, 37134 -github.tar, level 16, zstdcli, 40475 +github.tar, level 16, zstdcli, 40470 github.tar, level 16 with dict, zstdcli, 33378 -github.tar, level 19, zstdcli, 32153 +github.tar, level 19, zstdcli, 32280 github.tar, level 19 with dict, zstdcli, 32716 github.tar, no source size, zstdcli, 38832 github.tar, no source size with dict, zstdcli, 38004 @@ -248,8 +248,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced one pass, 4505046 silesia, level 12 row 2, advanced one pass, 4503116 silesia, level 13, advanced one pass, 4493990 -silesia, level 16, advanced one pass, 4359864 -silesia, level 19, advanced one pass, 4296438 +silesia, level 16, advanced one pass, 4360041 +silesia, level 19, advanced one pass, 4296055 silesia, no source size, advanced one pass, 4842075 silesia, long distance mode, advanced one pass, 4833710 silesia, multithreaded, advanced one pass, 4842075 @@ -282,14 +282,14 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced one pass, 4514049 silesia.tar, level 12 row 2, advanced one pass, 4513797 silesia.tar, level 13, advanced one pass, 4502956 -silesia.tar, level 16, advanced one pass, 4360527 -silesia.tar, level 19, advanced one pass, 4266970 +silesia.tar, level 16, advanced one pass, 4360546 +silesia.tar, level 19, advanced one pass, 4265911 silesia.tar, no source size, advanced one pass, 4854086 silesia.tar, long distance mode, advanced one pass, 4840452 silesia.tar, multithreaded, advanced one pass, 4854160 silesia.tar, multithreaded long distance mode, advanced one pass, 4845741 silesia.tar, small window log, advanced one pass, 7100655 -silesia.tar, small hash log, advanced one pass, 6529231 +silesia.tar, small hash log, advanced one pass, 6529206 silesia.tar, small chain log, advanced one pass, 4917041 silesia.tar, explicit params, advanced one pass, 4806855 silesia.tar, uncompressed literals, advanced one pass, 5122473 @@ -370,39 +370,39 @@ github, level 9 with dict dms, advanced github, level 9 with dict dds, advanced one pass, 39393 github, level 9 with dict copy, advanced one pass, 39398 github, level 9 with dict load, advanced one pass, 41710 -github, level 11 row 1, advanced one pass, 135119 +github, level 11 row 1, advanced one pass, 135367 github, level 11 row 1 with dict dms, advanced one pass, 39671 github, level 11 row 1 with dict dds, advanced one pass, 39671 github, level 11 row 1 with dict copy, advanced one pass, 39651 github, level 11 row 1 with dict load, advanced one pass, 41360 -github, level 11 row 2, advanced one pass, 135119 +github, level 11 row 2, advanced one pass, 135367 github, level 11 row 2 with dict dms, advanced one pass, 39671 github, level 11 row 2 with dict dds, advanced one pass, 39671 github, level 11 row 2 with dict copy, advanced one pass, 39651 github, level 11 row 2 with dict load, advanced one pass, 41360 -github, level 12 row 1, advanced one pass, 134180 +github, level 12 row 1, advanced one pass, 134402 github, level 12 row 1 with dict dms, advanced one pass, 39677 github, level 12 row 1 with dict dds, advanced one pass, 39677 github, level 12 row 1 with dict copy, advanced one pass, 39677 github, level 12 row 1 with dict load, advanced one pass, 41166 -github, level 12 row 2, advanced one pass, 134180 +github, level 12 row 2, advanced one pass, 134402 github, level 12 row 2 with dict dms, advanced one pass, 39677 github, level 12 row 2 with dict dds, advanced one pass, 39677 github, level 12 row 2 with dict copy, advanced one pass, 39677 github, level 12 row 2 with dict load, advanced one pass, 41166 -github, level 13, advanced one pass, 132729 +github, level 13, advanced one pass, 132878 github, level 13 with dict, advanced one pass, 39900 github, level 13 with dict dms, advanced one pass, 39900 github, level 13 with dict dds, advanced one pass, 39900 github, level 13 with dict copy, advanced one pass, 39948 github, level 13 with dict load, advanced one pass, 42624 -github, level 16, advanced one pass, 132729 +github, level 16, advanced one pass, 133209 github, level 16 with dict, advanced one pass, 37577 github, level 16 with dict dms, advanced one pass, 37577 github, level 16 with dict dds, advanced one pass, 37577 github, level 16 with dict copy, advanced one pass, 37568 github, level 16 with dict load, advanced one pass, 42338 -github, level 19, advanced one pass, 132729 +github, level 19, advanced one pass, 132879 github, level 19 with dict, advanced one pass, 37576 github, level 19 with dict dms, advanced one pass, 37576 github, level 19 with dict dds, advanced one pass, 37576 @@ -418,7 +418,7 @@ github, small hash log, advanced github, small chain log, advanced one pass, 136341 github, explicit params, advanced one pass, 137727 github, uncompressed literals, advanced one pass, 165911 -github, uncompressed literals optimal, advanced one pass, 157227 +github, uncompressed literals optimal, advanced one pass, 152667 github, huffman literals, advanced one pass, 142365 github, multithreaded with advanced params, advanced one pass, 165911 github.tar, level -5, advanced one pass, 52115 @@ -521,13 +521,13 @@ github.tar, level 13 with dict dms, advanced github.tar, level 13 with dict dds, advanced one pass, 37220 github.tar, level 13 with dict copy, advanced one pass, 37130 github.tar, level 13 with dict load, advanced one pass, 36010 -github.tar, level 16, advanced one pass, 40471 +github.tar, level 16, advanced one pass, 40466 github.tar, level 16 with dict, advanced one pass, 33374 github.tar, level 16 with dict dms, advanced one pass, 33206 github.tar, level 16 with dict dds, advanced one pass, 33206 github.tar, level 16 with dict copy, advanced one pass, 33374 github.tar, level 16 with dict load, advanced one pass, 39081 -github.tar, level 19, advanced one pass, 32149 +github.tar, level 19, advanced one pass, 32276 github.tar, level 19 with dict, advanced one pass, 32712 github.tar, level 19 with dict dms, advanced one pass, 32555 github.tar, level 19 with dict dds, advanced one pass, 32555 @@ -566,8 +566,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced one pass small out, 4505046 silesia, level 12 row 2, advanced one pass small out, 4503116 silesia, level 13, advanced one pass small out, 4493990 -silesia, level 16, advanced one pass small out, 4359864 -silesia, level 19, advanced one pass small out, 4296438 +silesia, level 16, advanced one pass small out, 4360041 +silesia, level 19, advanced one pass small out, 4296055 silesia, no source size, advanced one pass small out, 4842075 silesia, long distance mode, advanced one pass small out, 4833710 silesia, multithreaded, advanced one pass small out, 4842075 @@ -600,14 +600,14 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced one pass small out, 4514049 silesia.tar, level 12 row 2, advanced one pass small out, 4513797 silesia.tar, level 13, advanced one pass small out, 4502956 -silesia.tar, level 16, advanced one pass small out, 4360527 -silesia.tar, level 19, advanced one pass small out, 4266970 +silesia.tar, level 16, advanced one pass small out, 4360546 +silesia.tar, level 19, advanced one pass small out, 4265911 silesia.tar, no source size, advanced one pass small out, 4854086 silesia.tar, long distance mode, advanced one pass small out, 4840452 silesia.tar, multithreaded, advanced one pass small out, 4854160 silesia.tar, multithreaded long distance mode, advanced one pass small out, 4845741 silesia.tar, small window log, advanced one pass small out, 7100655 -silesia.tar, small hash log, advanced one pass small out, 6529231 +silesia.tar, small hash log, advanced one pass small out, 6529206 silesia.tar, small chain log, advanced one pass small out, 4917041 silesia.tar, explicit params, advanced one pass small out, 4806855 silesia.tar, uncompressed literals, advanced one pass small out, 5122473 @@ -688,39 +688,39 @@ github, level 9 with dict dms, advanced github, level 9 with dict dds, advanced one pass small out, 39393 github, level 9 with dict copy, advanced one pass small out, 39398 github, level 9 with dict load, advanced one pass small out, 41710 -github, level 11 row 1, advanced one pass small out, 135119 +github, level 11 row 1, advanced one pass small out, 135367 github, level 11 row 1 with dict dms, advanced one pass small out, 39671 github, level 11 row 1 with dict dds, advanced one pass small out, 39671 github, level 11 row 1 with dict copy, advanced one pass small out, 39651 github, level 11 row 1 with dict load, advanced one pass small out, 41360 -github, level 11 row 2, advanced one pass small out, 135119 +github, level 11 row 2, advanced one pass small out, 135367 github, level 11 row 2 with dict dms, advanced one pass small out, 39671 github, level 11 row 2 with dict dds, advanced one pass small out, 39671 github, level 11 row 2 with dict copy, advanced one pass small out, 39651 github, level 11 row 2 with dict load, advanced one pass small out, 41360 -github, level 12 row 1, advanced one pass small out, 134180 +github, level 12 row 1, advanced one pass small out, 134402 github, level 12 row 1 with dict dms, advanced one pass small out, 39677 github, level 12 row 1 with dict dds, advanced one pass small out, 39677 github, level 12 row 1 with dict copy, advanced one pass small out, 39677 github, level 12 row 1 with dict load, advanced one pass small out, 41166 -github, level 12 row 2, advanced one pass small out, 134180 +github, level 12 row 2, advanced one pass small out, 134402 github, level 12 row 2 with dict dms, advanced one pass small out, 39677 github, level 12 row 2 with dict dds, advanced one pass small out, 39677 github, level 12 row 2 with dict copy, advanced one pass small out, 39677 github, level 12 row 2 with dict load, advanced one pass small out, 41166 -github, level 13, advanced one pass small out, 132729 +github, level 13, advanced one pass small out, 132878 github, level 13 with dict, advanced one pass small out, 39900 github, level 13 with dict dms, advanced one pass small out, 39900 github, level 13 with dict dds, advanced one pass small out, 39900 github, level 13 with dict copy, advanced one pass small out, 39948 github, level 13 with dict load, advanced one pass small out, 42624 -github, level 16, advanced one pass small out, 132729 +github, level 16, advanced one pass small out, 133209 github, level 16 with dict, advanced one pass small out, 37577 github, level 16 with dict dms, advanced one pass small out, 37577 github, level 16 with dict dds, advanced one pass small out, 37577 github, level 16 with dict copy, advanced one pass small out, 37568 github, level 16 with dict load, advanced one pass small out, 42338 -github, level 19, advanced one pass small out, 132729 +github, level 19, advanced one pass small out, 132879 github, level 19 with dict, advanced one pass small out, 37576 github, level 19 with dict dms, advanced one pass small out, 37576 github, level 19 with dict dds, advanced one pass small out, 37576 @@ -736,7 +736,7 @@ github, small hash log, advanced github, small chain log, advanced one pass small out, 136341 github, explicit params, advanced one pass small out, 137727 github, uncompressed literals, advanced one pass small out, 165911 -github, uncompressed literals optimal, advanced one pass small out, 157227 +github, uncompressed literals optimal, advanced one pass small out, 152667 github, huffman literals, advanced one pass small out, 142365 github, multithreaded with advanced params, advanced one pass small out, 165911 github.tar, level -5, advanced one pass small out, 52115 @@ -839,13 +839,13 @@ github.tar, level 13 with dict dms, advanced github.tar, level 13 with dict dds, advanced one pass small out, 37220 github.tar, level 13 with dict copy, advanced one pass small out, 37130 github.tar, level 13 with dict load, advanced one pass small out, 36010 -github.tar, level 16, advanced one pass small out, 40471 +github.tar, level 16, advanced one pass small out, 40466 github.tar, level 16 with dict, advanced one pass small out, 33374 github.tar, level 16 with dict dms, advanced one pass small out, 33206 github.tar, level 16 with dict dds, advanced one pass small out, 33206 github.tar, level 16 with dict copy, advanced one pass small out, 33374 github.tar, level 16 with dict load, advanced one pass small out, 39081 -github.tar, level 19, advanced one pass small out, 32149 +github.tar, level 19, advanced one pass small out, 32276 github.tar, level 19 with dict, advanced one pass small out, 32712 github.tar, level 19 with dict dms, advanced one pass small out, 32555 github.tar, level 19 with dict dds, advanced one pass small out, 32555 @@ -884,8 +884,8 @@ silesia, level 11 row 2, advanced silesia, level 12 row 1, advanced streaming, 4505046 silesia, level 12 row 2, advanced streaming, 4503116 silesia, level 13, advanced streaming, 4493990 -silesia, level 16, advanced streaming, 4359864 -silesia, level 19, advanced streaming, 4296438 +silesia, level 16, advanced streaming, 4360041 +silesia, level 19, advanced streaming, 4296055 silesia, no source size, advanced streaming, 4842039 silesia, long distance mode, advanced streaming, 4833710 silesia, multithreaded, advanced streaming, 4842075 @@ -918,14 +918,14 @@ silesia.tar, level 11 row 2, advanced silesia.tar, level 12 row 1, advanced streaming, 4514049 silesia.tar, level 12 row 2, advanced streaming, 4513797 silesia.tar, level 13, advanced streaming, 4502956 -silesia.tar, level 16, advanced streaming, 4360527 -silesia.tar, level 19, advanced streaming, 4266970 +silesia.tar, level 16, advanced streaming, 4360546 +silesia.tar, level 19, advanced streaming, 4265911 silesia.tar, no source size, advanced streaming, 4859267 silesia.tar, long distance mode, advanced streaming, 4840452 silesia.tar, multithreaded, advanced streaming, 4854160 silesia.tar, multithreaded long distance mode, advanced streaming, 4845741 silesia.tar, small window log, advanced streaming, 7117559 -silesia.tar, small hash log, advanced streaming, 6529234 +silesia.tar, small hash log, advanced streaming, 6529209 silesia.tar, small chain log, advanced streaming, 4917021 silesia.tar, explicit params, advanced streaming, 4806873 silesia.tar, uncompressed literals, advanced streaming, 5127423 @@ -1006,39 +1006,39 @@ github, level 9 with dict dms, advanced github, level 9 with dict dds, advanced streaming, 39393 github, level 9 with dict copy, advanced streaming, 39398 github, level 9 with dict load, advanced streaming, 41710 -github, level 11 row 1, advanced streaming, 135119 +github, level 11 row 1, advanced streaming, 135367 github, level 11 row 1 with dict dms, advanced streaming, 39671 github, level 11 row 1 with dict dds, advanced streaming, 39671 github, level 11 row 1 with dict copy, advanced streaming, 39651 github, level 11 row 1 with dict load, advanced streaming, 41360 -github, level 11 row 2, advanced streaming, 135119 +github, level 11 row 2, advanced streaming, 135367 github, level 11 row 2 with dict dms, advanced streaming, 39671 github, level 11 row 2 with dict dds, advanced streaming, 39671 github, level 11 row 2 with dict copy, advanced streaming, 39651 github, level 11 row 2 with dict load, advanced streaming, 41360 -github, level 12 row 1, advanced streaming, 134180 +github, level 12 row 1, advanced streaming, 134402 github, level 12 row 1 with dict dms, advanced streaming, 39677 github, level 12 row 1 with dict dds, advanced streaming, 39677 github, level 12 row 1 with dict copy, advanced streaming, 39677 github, level 12 row 1 with dict load, advanced streaming, 41166 -github, level 12 row 2, advanced streaming, 134180 +github, level 12 row 2, advanced streaming, 134402 github, level 12 row 2 with dict dms, advanced streaming, 39677 github, level 12 row 2 with dict dds, advanced streaming, 39677 github, level 12 row 2 with dict copy, advanced streaming, 39677 github, level 12 row 2 with dict load, advanced streaming, 41166 -github, level 13, advanced streaming, 132729 +github, level 13, advanced streaming, 132878 github, level 13 with dict, advanced streaming, 39900 github, level 13 with dict dms, advanced streaming, 39900 github, level 13 with dict dds, advanced streaming, 39900 github, level 13 with dict copy, advanced streaming, 39948 github, level 13 with dict load, advanced streaming, 42624 -github, level 16, advanced streaming, 132729 +github, level 16, advanced streaming, 133209 github, level 16 with dict, advanced streaming, 37577 github, level 16 with dict dms, advanced streaming, 37577 github, level 16 with dict dds, advanced streaming, 37577 github, level 16 with dict copy, advanced streaming, 37568 github, level 16 with dict load, advanced streaming, 42338 -github, level 19, advanced streaming, 132729 +github, level 19, advanced streaming, 132879 github, level 19 with dict, advanced streaming, 37576 github, level 19 with dict dms, advanced streaming, 37576 github, level 19 with dict dds, advanced streaming, 37576 @@ -1054,7 +1054,7 @@ github, small hash log, advanced github, small chain log, advanced streaming, 136341 github, explicit params, advanced streaming, 137727 github, uncompressed literals, advanced streaming, 165911 -github, uncompressed literals optimal, advanced streaming, 157227 +github, uncompressed literals optimal, advanced streaming, 152667 github, huffman literals, advanced streaming, 142365 github, multithreaded with advanced params, advanced streaming, 165911 github.tar, level -5, advanced streaming, 52152 @@ -1157,13 +1157,13 @@ github.tar, level 13 with dict dms, advanced github.tar, level 13 with dict dds, advanced streaming, 37220 github.tar, level 13 with dict copy, advanced streaming, 37130 github.tar, level 13 with dict load, advanced streaming, 36010 -github.tar, level 16, advanced streaming, 40471 +github.tar, level 16, advanced streaming, 40466 github.tar, level 16 with dict, advanced streaming, 33374 github.tar, level 16 with dict dms, advanced streaming, 33206 github.tar, level 16 with dict dds, advanced streaming, 33206 github.tar, level 16 with dict copy, advanced streaming, 33374 github.tar, level 16 with dict load, advanced streaming, 39081 -github.tar, level 19, advanced streaming, 32149 +github.tar, level 19, advanced streaming, 32276 github.tar, level 19 with dict, advanced streaming, 32712 github.tar, level 19 with dict dms, advanced streaming, 32555 github.tar, level 19 with dict dds, advanced streaming, 32555 @@ -1194,11 +1194,11 @@ silesia, level 6, old stre silesia, level 7, old streaming, 4566984 silesia, level 9, old streaming, 4543018 silesia, level 13, old streaming, 4493990 -silesia, level 16, old streaming, 4359864 -silesia, level 19, old streaming, 4296438 +silesia, level 16, old streaming, 4360041 +silesia, level 19, old streaming, 4296055 silesia, no source size, old streaming, 4842039 silesia, uncompressed literals, old streaming, 4842075 -silesia, uncompressed literals optimal, old streaming, 4296438 +silesia, uncompressed literals optimal, old streaming, 4296055 silesia, huffman literals, old streaming, 6172207 silesia.tar, level -5, old streaming, 6856523 silesia.tar, level -3, old streaming, 6505954 @@ -1212,11 +1212,11 @@ silesia.tar, level 6, old stre silesia.tar, level 7, old streaming, 4576664 silesia.tar, level 9, old streaming, 4552900 silesia.tar, level 13, old streaming, 4502956 -silesia.tar, level 16, old streaming, 4360527 -silesia.tar, level 19, old streaming, 4266970 +silesia.tar, level 16, old streaming, 4360546 +silesia.tar, level 19, old streaming, 4265911 silesia.tar, no source size, old streaming, 4859267 silesia.tar, uncompressed literals, old streaming, 4859271 -silesia.tar, uncompressed literals optimal, old streaming, 4266970 +silesia.tar, uncompressed literals optimal, old streaming, 4265911 silesia.tar, huffman literals, old streaming, 6179056 github, level -5, old streaming, 204407 github, level -5 with dict, old streaming, 46718 @@ -1240,16 +1240,16 @@ github, level 7, old stre github, level 7 with dict, old streaming, 38758 github, level 9, old streaming, 135122 github, level 9 with dict, old streaming, 39437 -github, level 13, old streaming, 132729 +github, level 13, old streaming, 132878 github, level 13 with dict, old streaming, 39900 -github, level 16, old streaming, 132729 +github, level 16, old streaming, 133209 github, level 16 with dict, old streaming, 37577 -github, level 19, old streaming, 132729 +github, level 19, old streaming, 132879 github, level 19 with dict, old streaming, 37576 github, no source size, old streaming, 140599 github, no source size with dict, old streaming, 40654 github, uncompressed literals, old streaming, 136332 -github, uncompressed literals optimal, old streaming, 132729 +github, uncompressed literals optimal, old streaming, 132879 github, huffman literals, old streaming, 175468 github.tar, level -5, old streaming, 52152 github.tar, level -5 with dict, old streaming, 51045 @@ -1275,14 +1275,14 @@ github.tar, level 9, old stre github.tar, level 9 with dict, old streaming, 36484 github.tar, level 13, old streaming, 35501 github.tar, level 13 with dict, old streaming, 37130 -github.tar, level 16, old streaming, 40471 +github.tar, level 16, old streaming, 40466 github.tar, level 16 with dict, old streaming, 33374 -github.tar, level 19, old streaming, 32149 +github.tar, level 19, old streaming, 32276 github.tar, level 19 with dict, old streaming, 32712 github.tar, no source size, old streaming, 38828 github.tar, no source size with dict, old streaming, 38000 github.tar, uncompressed literals, old streaming, 38831 -github.tar, uncompressed literals optimal, old streaming, 32149 +github.tar, uncompressed literals optimal, old streaming, 32276 github.tar, huffman literals, old streaming, 42560 silesia, level -5, old streaming advanced, 6854744 silesia, level -3, old streaming advanced, 6503319 @@ -1296,8 +1296,8 @@ silesia, level 6, old stre silesia, level 7, old streaming advanced, 4566984 silesia, level 9, old streaming advanced, 4543018 silesia, level 13, old streaming advanced, 4493990 -silesia, level 16, old streaming advanced, 4359864 -silesia, level 19, old streaming advanced, 4296438 +silesia, level 16, old streaming advanced, 4360041 +silesia, level 19, old streaming advanced, 4296055 silesia, no source size, old streaming advanced, 4842039 silesia, long distance mode, old streaming advanced, 4842075 silesia, multithreaded, old streaming advanced, 4842075 @@ -1307,7 +1307,7 @@ silesia, small hash log, old stre silesia, small chain log, old streaming advanced, 4912197 silesia, explicit params, old streaming advanced, 4795452 silesia, uncompressed literals, old streaming advanced, 4842075 -silesia, uncompressed literals optimal, old streaming advanced, 4296438 +silesia, uncompressed literals optimal, old streaming advanced, 4296055 silesia, huffman literals, old streaming advanced, 6172207 silesia, multithreaded with advanced params, old streaming advanced, 4842075 silesia.tar, level -5, old streaming advanced, 6856523 @@ -1322,18 +1322,18 @@ silesia.tar, level 6, old stre silesia.tar, level 7, old streaming advanced, 4576664 silesia.tar, level 9, old streaming advanced, 4552900 silesia.tar, level 13, old streaming advanced, 4502956 -silesia.tar, level 16, old streaming advanced, 4360527 -silesia.tar, level 19, old streaming advanced, 4266970 +silesia.tar, level 16, old streaming advanced, 4360546 +silesia.tar, level 19, old streaming advanced, 4265911 silesia.tar, no source size, old streaming advanced, 4859267 silesia.tar, long distance mode, old streaming advanced, 4859271 silesia.tar, multithreaded, old streaming advanced, 4859271 silesia.tar, multithreaded long distance mode, old streaming advanced, 4859271 silesia.tar, small window log, old streaming advanced, 7117562 -silesia.tar, small hash log, old streaming advanced, 6529234 +silesia.tar, small hash log, old streaming advanced, 6529209 silesia.tar, small chain log, old streaming advanced, 4917021 silesia.tar, explicit params, old streaming advanced, 4806873 silesia.tar, uncompressed literals, old streaming advanced, 4859271 -silesia.tar, uncompressed literals optimal, old streaming advanced, 4266970 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4265911 silesia.tar, huffman literals, old streaming advanced, 6179056 silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271 github, level -5, old streaming advanced, 213265 @@ -1360,9 +1360,9 @@ github, level 9, old stre github, level 9 with dict, old streaming advanced, 38981 github, level 13, old streaming advanced, 138676 github, level 13 with dict, old streaming advanced, 39725 -github, level 16, old streaming advanced, 138676 +github, level 16, old streaming advanced, 138575 github, level 16 with dict, old streaming advanced, 40789 -github, level 19, old streaming advanced, 132729 +github, level 19, old streaming advanced, 132879 github, level 19 with dict, old streaming advanced, 37576 github, no source size, old streaming advanced, 140599 github, no source size with dict, old streaming advanced, 40608 @@ -1374,7 +1374,7 @@ github, small hash log, old stre github, small chain log, old streaming advanced, 139275 github, explicit params, old streaming advanced, 140937 github, uncompressed literals, old streaming advanced, 141104 -github, uncompressed literals optimal, old streaming advanced, 132729 +github, uncompressed literals optimal, old streaming advanced, 132879 github, huffman literals, old streaming advanced, 181107 github, multithreaded with advanced params, old streaming advanced, 141104 github.tar, level -5, old streaming advanced, 52152 @@ -1401,9 +1401,9 @@ github.tar, level 9, old stre github.tar, level 9 with dict, old streaming advanced, 36312 github.tar, level 13, old streaming advanced, 35501 github.tar, level 13 with dict, old streaming advanced, 35807 -github.tar, level 16, old streaming advanced, 40471 +github.tar, level 16, old streaming advanced, 40466 github.tar, level 16 with dict, old streaming advanced, 38578 -github.tar, level 19, old streaming advanced, 32149 +github.tar, level 19, old streaming advanced, 32276 github.tar, level 19 with dict, old streaming advanced, 32704 github.tar, no source size, old streaming advanced, 38828 github.tar, no source size with dict, old streaming advanced, 38015 @@ -1415,7 +1415,7 @@ github.tar, small hash log, old stre github.tar, small chain log, old streaming advanced, 41669 github.tar, explicit params, old streaming advanced, 41385 github.tar, uncompressed literals, old streaming advanced, 38831 -github.tar, uncompressed literals optimal, old streaming advanced, 32149 +github.tar, uncompressed literals optimal, old streaming advanced, 32276 github.tar, huffman literals, old streaming advanced, 42560 github.tar, multithreaded with advanced params, old streaming advanced, 38831 github, level -5 with dict, old streaming cdict, 46718 diff --git a/tests/regression/test.c b/tests/regression/test.c index 1de6be86c..07600be57 100644 --- a/tests/regression/test.c +++ b/tests/regression/test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/roundTripCrash.c b/tests/roundTripCrash.c index 9aa208cff..77411cd49 100644 --- a/tests/roundTripCrash.c +++ b/tests/roundTripCrash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/seqgen.c b/tests/seqgen.c index 1e340c851..0d8a766c8 100644 --- a/tests/seqgen.c +++ b/tests/seqgen.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/seqgen.h b/tests/seqgen.h index cea3f552b..df17398ef 100644 --- a/tests/seqgen.h +++ b/tests/seqgen.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/tests/test-license.py b/tests/test-license.py index f27cb6774..d54c16419 100755 --- a/tests/test-license.py +++ b/tests/test-license.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # ################################################################ -# Copyright (c) Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -83,8 +83,8 @@ def valid_copyright(lines): continue if "present" in line: return (False, f"Copyright line '{line}' contains 'present'!") - if "Facebook, Inc" not in line: - return (False, f"Copyright line '{line}' does not contain 'Facebook, Inc'") + if "Meta Platforms, Inc" not in line: + return (False, f"Copyright line '{line}' does not contain 'Meta Platforms, Inc'") year = YEAR_REGEX.search(line) if year is not None: return (False, f"Copyright line '{line}' contains {year.group(0)}; it should be yearless") diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index 94e3b8c5b..7117c1952 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -2,7 +2,7 @@ """Test zstd interoperability between versions""" # ################################################################ -# Copyright (c) Yann Collet, Facebook, Inc. +# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under both the BSD-style license (found in the @@ -23,7 +23,7 @@ from subprocess import Popen, PIPE repo_url = 'https://github.com/facebook/zstd.git' tmp_dir_name = 'tests/versionsTest' make_cmd = 'make' -make_args = ['-j','CFLAGS=-O1'] +make_args = ['-j','CFLAGS=-O0'] git_cmd = 'git' test_dat_src = 'README.md' test_dat = 'test_dat' @@ -86,41 +86,46 @@ def create_dict(tag, dict_source_path): if result == 0: print(dict_name + ' created') else: - print('ERROR: creating of ' + dict_name + ' failed') + raise RuntimeError('ERROR: creating of ' + dict_name + ' failed') else: print(dict_name + ' already exists') +def zstd(tag, args, input_file, output_file): + """ + Zstd compress input_file to output_file. + Need this helper because 0.5.0 is broken when stdout is not a TTY. + Throws an exception if the command returns non-zero. + """ + with open(input_file, "rb") as i: + with open(output_file, "wb") as o: + cmd = ['./zstd.' + tag] + args + print("Running: '{}', input={}, output={}" .format( + ' '.join(cmd), input_file, output_file + )) + subprocess.check_call(cmd, stdin=i, stdout=o) + + def dict_compress_sample(tag, sample): dict_name = 'dict.' + tag - DEVNULL = open(os.devnull, 'wb') - if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_01_64_' + tag + '_dictio.zst') - if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_05_64_' + tag + '_dictio.zst') - if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_09_64_' + tag + '_dictio.zst') - if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_15_64_' + tag + '_dictio.zst') - if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_18_64_' + tag + '_dictio.zst') + zstd(tag, ['-D', dict_name, '-1'], sample, sample + '_01_64_' + tag + '_dictio.zst') + zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst') + zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst') + zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst') + zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst') + zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst') # zstdFiles = glob.glob("*.zst*") # print(zstdFiles) print(tag + " : dict compression completed") def compress_sample(tag, sample): - DEVNULL = open(os.devnull, 'wb') - if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst') - if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodict.zst') - if subprocess.call(['./zstd.' + tag, '-9f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodict.zst') - if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodict.zst') - if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL) == 0: - os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodict.zst') + zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst') + zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst') + zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst') + zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst') + zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst') + zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst') # zstdFiles = glob.glob("*.zst*") # print(zstdFiles) print(tag + " : compression completed") @@ -150,23 +155,13 @@ def decompress_zst(tag): dec_error = 0 list_zst = sorted(glob.glob('*_nodict.zst')) for file_zst in list_zst: - print(file_zst, end=' ') - print(tag, end=' ') + print(file_zst + ' ' + tag) file_dec = file_zst + '_d64_' + tag + '.dec' - if tag <= 'v0.5.0': - params = ['./zstd.' + tag, '-df', file_zst, file_dec] + zstd(tag, ['-d'], file_zst, file_dec) + if not filecmp.cmp(file_dec, test_dat): + raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) else: - params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec] - if execute(params) == 0: - if not filecmp.cmp(file_dec, test_dat): - print('ERR !! ') - dec_error = 1 - else: - print('OK ') - else: - print('command does not work') - dec_error = 1 - return dec_error + print('OK ') def decompress_dict(tag): @@ -181,22 +176,13 @@ def decompress_dict(tag): if tag == 'v0.6.0' and dict_tag < 'v0.6.0': continue dict_name = 'dict.' + dict_tag - print(file_zst + ' ' + tag + ' dict=' + dict_tag, end=' ') + print(file_zst + ' ' + tag + ' dict=' + dict_tag) file_dec = file_zst + '_d64_' + tag + '.dec' - if tag <= 'v0.5.0': - params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec] + zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec) + if not filecmp.cmp(file_dec, test_dat): + raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) else: - params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec] - if execute(params) == 0: - if not filecmp.cmp(file_dec, test_dat): - print('ERR !! ') - dec_error = 1 - else: - print('OK ') - else: - print('command does not work') - dec_error = 1 - return dec_error + print('OK ') if __name__ == '__main__': @@ -267,25 +253,19 @@ if __name__ == '__main__': print('Compress test.dat by all released zstd') print('-----------------------------------------------') - error_code = 0 for tag in tags: print(tag) if tag >= 'v0.5.0': create_dict(tag, dict_source_path) dict_compress_sample(tag, test_dat) remove_duplicates() - error_code += decompress_dict(tag) + decompress_dict(tag) compress_sample(tag, test_dat) remove_duplicates() - error_code += decompress_zst(tag) + decompress_zst(tag) print('') print('Enumerate different compressed files') zstds = sorted(glob.glob('*.zst')) for zstd in zstds: print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) - - if error_code != 0: - print('====== ERROR !!! =======') - - sys.exit(error_code) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 453c45409..664ff632a 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -39,7 +39,7 @@ #include "seqgen.h" #include "util.h" #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ - +#include "external_matchfinder.h" /* zstreamExternalMatchFinder, EMF_testCase */ /*-************************************ * Constants @@ -1834,6 +1834,97 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++); + { + size_t const dstBufSize = ZSTD_compressBound(CNBufferSize); + BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize)); + size_t const checkBufSize = CNBufferSize; + BYTE* const checkBuf = (BYTE*)malloc(checkBufSize); + int enableFallback; + EMF_testCase externalMatchState; + + CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed"); + + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + + /* Reference external matchfinder outside the test loop to + * check that the reference is preserved across compressions */ + ZSTD_registerExternalMatchFinder( + zc, + &externalMatchState, + zstreamExternalMatchFinder + ); + + for (enableFallback = 0; enableFallback < 1; enableFallback++) { + size_t testCaseId; + + EMF_testCase const EMF_successCases[] = { + EMF_ONE_BIG_SEQ, + EMF_LOTS_OF_SEQS, + }; + size_t const EMF_numSuccessCases = 2; + + EMF_testCase const EMF_failureCases[] = { + EMF_ZERO_SEQS, + EMF_BIG_ERROR, + EMF_SMALL_ERROR, + }; + size_t const EMF_numFailureCases = 3; + + /* Test external matchfinder success scenarios */ + for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) { + size_t res; + externalMatchState = EMF_successCases[testCaseId]; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); + CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); + CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res)); + CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!"); + } + + /* Test external matchfinder failure scenarios */ + for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) { + size_t res; + externalMatchState = EMF_failureCases[testCaseId]; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); + if (enableFallback) { + CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res)); + CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!"); + } else { + CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!"); + CHECK( + ZSTD_getErrorCode(res) != ZSTD_error_externalMatchFinder_failed, + "EMF: Wrong error code: %s", ZSTD_getErrorName(res) + ); + } + } + + /* Test compression with external matchfinder + empty src buffer */ + { + size_t res; + externalMatchState = EMF_ZERO_SEQS; + ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0); + CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); + CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!"); + } + } + + /* Test that reset clears the external matchfinder */ + ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); + externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); + CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); + + free(dstBuf); + free(checkBuf); + } + DISPLAYLEVEL(3, "OK \n"); + _end: FUZ_freeDictionary(dictionary); ZSTD_freeCStream(zc); diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c index 8673ca348..aef29be0b 100644 --- a/zlibWrapper/examples/zwrapbench.c +++ b/zlibWrapper/examples/zwrapbench.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -388,7 +388,7 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize, markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r", marks[markNb], displayName, (unsigned)srcSize, (unsigned)cSize, ratio, - (double)srcSize / fastestC ); + (double)srcSize / (double)fastestC ); (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ #if 1 @@ -527,8 +527,8 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize, markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", marks[markNb], displayName, (unsigned)srcSize, (unsigned)cSize, ratio, - (double)srcSize / fastestC, - (double)srcSize / fastestD ); + (double)srcSize / (double)fastestC, + (double)srcSize / (double)fastestD ); /* CRC Checking */ { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); @@ -558,8 +558,8 @@ static int BMK_benchMem(z_const void* srcBuffer, size_t srcSize, } /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */ if (g_displayLevel == 1) { - double cSpeed = (double)srcSize / fastestC; - double dSpeed = (double)srcSize / fastestD; + double cSpeed = (double)srcSize / (double)fastestC; + double dSpeed = (double)srcSize / (double)fastestD; if (g_additionalParam) DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam); else diff --git a/zlibWrapper/gzcompatibility.h b/zlibWrapper/gzcompatibility.h index 3a46127b8..9d11b984f 100644 --- a/zlibWrapper/gzcompatibility.h +++ b/zlibWrapper/gzcompatibility.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zlibWrapper/zstd_zlibwrapper.c b/zlibWrapper/zstd_zlibwrapper.c index 386c0f0ac..f31cb59db 100644 --- a/zlibWrapper/zstd_zlibwrapper.c +++ b/zlibWrapper/zstd_zlibwrapper.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/zlibWrapper/zstd_zlibwrapper.h b/zlibWrapper/zstd_zlibwrapper.h index c39cf6379..230bf8411 100644 --- a/zlibWrapper/zstd_zlibwrapper.h +++ b/zlibWrapper/zstd_zlibwrapper.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the