diff --git a/.circleci/config.yml b/.circleci/config.yml index b08634408..42e4042db 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,7 +15,8 @@ references: sudo apt-get -y install \ gcc-multilib-powerpc-linux-gnu gcc-arm-linux-gnueabi \ libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ - libc6-dev-ppc64-powerpc-cross zstd gzip coreutils + libc6-dev-ppc64-powerpc-cross zstd gzip coreutils \ + libcurl4-openssl-dev jobs: # the first half of the jobs are in this test @@ -82,6 +83,49 @@ jobs: cp $ZSTD_VERSION.tar* $CIRCLE_ARTIFACTS - store_artifacts: path: /tmp/circleci-artifacts + # This step should only be run in a cron job + regression-test: + docker: + - image: circleci/buildpack-deps:bionic + environment: + CIRCLE_ARTIFACTS: /tmp/circleci-artifacts + steps: + - checkout + - *install-dependencies + # Restore the cached resources. + - restore_cache: + # We try our best to bust the cache when the data changes by hashing + # data.c. If that doesn't work, simply update the version number here + # and below. If we fail to bust the cache, the regression testing will + # still work, since it has its own stamp, but will need to redownload + # everything. + keys: + - regression-cache-{{ checksum "tests/regression/data.c" }}-v0 + - run: + name: Regression Test + command: | + make -C programs zstd + make -C tests/regression test + mkdir -p $CIRCLE_ARTIFACTS + ./tests/regression/test \ + --cache tests/regression/cache \ + --output $CIRCLE_ARTIFACTS/results.csv \ + --zstd programs/zstd + echo "NOTE: The new results.csv is uploaded as an artifact to this job" + echo " If this fails, go to the Artifacts pane in CircleCI, " + echo " download /tmp/circleci-artifacts/results.csv, and if they " + echo " are still good, copy it into the repo and commit it." + echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv" + diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv + # Only save the cache on success (default), since if the failure happened + # before we stamp the data cache, we will have a bad cache for this key. + - save_cache: + key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0 + paths: + - tests/regression/cache + - store_artifacts: + path: /tmp/circleci-artifacts + workflows: version: 2 @@ -96,6 +140,13 @@ workflows: filters: tags: only: /.*/ + # Create a branch called regression and set it to dev to force a + # regression test run + - regression-test: + filters: + branches: + only: + - regression # Only run on release tags. - publish-github-release: requires: @@ -106,6 +157,20 @@ workflows: ignore: /.*/ tags: only: /^v\d+\.\d+\.\d+$/ + nightly: + triggers: + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - master + - dev + jobs: + # Run daily long regression tests + - regression-test + + # Longer tests #- make -C tests test-zstd-nolegacy && make clean diff --git a/.travis.yml b/.travis.yml index 226acaa37..8cb40b1e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,11 +3,13 @@ language: c dist: trusty sudo: required + addons: apt: update: true matrix: + fast_finish: true include: # Ubuntu 14.04 - env: Cmd='make test' @@ -49,6 +51,11 @@ matrix: - if: tag =~ ^v[0-9]\.[0-9] env: Cmd='make -C tests checkTag && tests/checkTag $TRAVIS_BRANCH' + - dist: xenial + env: BUILD_SYSTEM='meson' + allow_failures: + - env: BUILD_SYSTEM='meson' + git: depth: 1 @@ -59,9 +66,28 @@ branches: - travisTest script: - - JOB_NUMBER=$(echo $TRAVIS_JOB_NUMBER | sed -e 's:[0-9][0-9]*\.\(.*\):\1:') - - echo JOB_NUMBER=$JOB_NUMBER TRAVIS_BRANCH=$TRAVIS_BRANCH TRAVIS_EVENT_TYPE=$TRAVIS_EVENT_TYPE TRAVIS_PULL_REQUEST=$TRAVIS_PULL_REQUEST + - JOB_NUMBER=$(printf '%s' "${TRAVIS_JOB_NUMBER}" | sed -E 's@[0-9]+\.([0-9]+)@\1@') + - printf 'JOB_NUMBER=%s TRAVIS_BRANCH=%s TRAVIS_EVENT_TYPE=%s TRAVIS_PULL_REQUEST=%s\n' + "${JOB_NUMBER}" "${TRAVIS_BRANCH}" "${TRAVIS_EVENT_TYPE}" "${TRAVIS_PULL_REQUEST}" + - if [ "${BUILD_SYSTEM}" = meson ]; then + set -x; + sudo apt-get install -qq liblz4-dev valgrind tree + && curl -o ~/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' + && python3 ~/get-pip.py --user + && pip3 install --user meson ninja + && export CC=clang CXX=clang++ + && meson --buildtype=debug + -Db_lundef=false + -Dauto_features=enabled + -Dbuild_{programs,tests,contrib}=true + -Ddefault_library=both + build/meson builddir + && cd "$_" + && DESTDIR=./staging ninja install + && tree ./staging; + travis_terminate "$?"; + fi - export FUZZERTEST=-T2mn; export ZSTREAM_TESTTIME=-T2mn; export DECODECORPUS_TESTTIME=-T1mn; - sh -c "$Cmd" || travis_terminate 1; + sh -c "${Cmd}" || travis_terminate 1; diff --git a/Makefile b/Makefile index 1b1d27c15..ed78d1956 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,12 @@ ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD Dr HOST_OS = POSIX CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release -EGREP = egrep --color=never +HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) +EGREP_OPTIONS ?= +ifeq ($HAVE_COLORNEVER, 1) +EGREP_OPTIONS += --color=never +endif +EGREP = egrep $(EGREP_OPTIONS) # Print a two column output of targets and their description. To add a target description, put a # comment in the Makefile with the format "## : ". For example: diff --git a/README.md b/README.md index dc99dc0fd..146264cf5 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ By default, `CMAKE_BUILD_TYPE` is set to `Release`. #### Meson -A Meson project is provided within `contrib/meson`. +A Meson project is provided within `build/meson`. #### Visual Studio (Windows) @@ -135,6 +135,10 @@ Going into `build` directory, you will find additional possibilities: - Automated build scripts for Visual compiler by [@KrzysFR](https://github.com/KrzysFR), in `build/VS_scripts`, which will build `zstd` cli and `libzstd` library without any need to open Visual Studio solution. +#### Buck + +You can build the zstd binary via buck by executing: `buck build programs:zstd` from the root of the repo. +The output binary will be in `buck-out/gen/programs/`. ### Status diff --git a/build/meson/GetZstdLibraryVersion.py b/build/meson/GetZstdLibraryVersion.py new file mode 100644 index 000000000..53c61008a --- /dev/null +++ b/build/meson/GetZstdLibraryVersion.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +# ############################################################################# +# Copyright (c) 2018-present lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# +import re +import sys + + +def find_version(filepath): + version_file_data = None + with open(filepath) as fd: + version_file_data = fd.read() + + patterns = r"""#\s*define\s+ZSTD_VERSION_MAJOR\s+([0-9]+) +#\s*define\s+ZSTD_VERSION_MINOR\s+([0-9]+) +#\s*define\s+ZSTD_VERSION_RELEASE\s+([0-9]+) +""" + regex = re.compile(patterns, re.MULTILINE) + version_match = regex.search(version_file_data) + if version_match: + return version_match.groups() + raise Exception("Unable to find version string.") + + +def main(): + import argparse + parser = argparse.ArgumentParser(description='Print zstd version from lib/zstd.h') + parser.add_argument('file', help='path to lib/zstd.h') + args = parser.parse_args() + filepath = args.file + version_tup = find_version(filepath) + print('.'.join(version_tup)) + + +if __name__ == '__main__': + main() diff --git a/build/meson/InstallSymlink.py b/build/meson/InstallSymlink.py new file mode 100644 index 000000000..9c68cb33e --- /dev/null +++ b/build/meson/InstallSymlink.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# ############################################################################# +# Copyright (c) 2018-present lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# +import errno +import os + + +def mkdir_p(path, dir_mode=0o777): + try: + os.makedirs(path, mode=dir_mode) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def install_symlink(src, dst, install_dir, dst_is_dir=False, dir_mode=0o777): + if not os.path.exists(install_dir): + mkdir_p(install_dir, dir_mode) + if not os.path.isdir(install_dir): + raise NotADirectoryError(install_dir) + + new_dst = os.path.join(install_dir, dst) + if os.path.islink(new_dst) and os.readlink(new_dst) == src: + print('File exists: {!r} -> {!r}'.format(new_dst, src)) + return + print('Installing symlink {!r} -> {!r}'.format(new_dst, src)) + os.symlink(src, new_dst, dst_is_dir) + + +def main(): + import argparse + parser = argparse.ArgumentParser(description='Install a symlink', + usage='InstallSymlink.py [-h] [-d] [-m MODE] src dst install_dir\n\n' + 'example:\n' + '\tInstallSymlink.py dash sh /bin\n' + '\tDESTDIR=./staging InstallSymlink.py dash sh /bin') + parser.add_argument('src', help='target to link') + parser.add_argument('dst', help='link name') + parser.add_argument('install_dir', help='installation directory') + parser.add_argument('-d', '--isdir', + action='store_true', + help='dst is a directory') + parser.add_argument('-m', '--mode', + help='directory mode on creating if not exist', + default='0o777') + args = parser.parse_args() + + src = args.src + dst = args.dst + install_dir = args.install_dir + dst_is_dir = args.isdir + dir_mode = int(args.mode, 8) + + DESTDIR = os.environ.get('DESTDIR') + if DESTDIR: + install_dir = DESTDIR + install_dir if os.path.isabs(install_dir) \ + else os.path.join(DESTDIR, install_dir) + + install_symlink(src, dst, install_dir, dst_is_dir, dir_mode) + + +if __name__ == '__main__': + main() diff --git a/build/meson/README.md b/build/meson/README.md new file mode 100644 index 000000000..dae503fef --- /dev/null +++ b/build/meson/README.md @@ -0,0 +1,38 @@ +Meson build system for zstandard +================================ + +Meson is a build system designed to optimize programmer productivity. +It aims to do this by providing simple, out-of-the-box support for +modern software development tools and practices, such as unit tests, +coverage reports, Valgrind, CCache and the like. + +This Meson build system is provided with no guarantee and maintained +by Dima Krasner . + +It outputs one `libzstd`, either shared or static, depending on +`default_library` option. + +## How to build + +`cd` to this meson directory (`build/meson`) + +```sh +meson --buildtype=release -D with-contrib=true -D with-tests=true -D with-contrib=true builddir +cd builddir +ninja # to build +ninja install # to install +``` + +You might want to install it in staging directory: + +```sh +DESTDIR=./staging ninja install +``` + +To configure build options, use: + +```sh +meson configure +``` + +See [man meson(1)](https://manpages.debian.org/testing/meson/meson.1.en.html). diff --git a/build/meson/contrib/gen_html/meson.build b/build/meson/contrib/gen_html/meson.build new file mode 100644 index 000000000..cabff209d --- /dev/null +++ b/build/meson/contrib/gen_html/meson.build @@ -0,0 +1,29 @@ +# ############################################################################# +# Copyright (c) 2018-present lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# + +zstd_rootdir = '../../../..' + +gen_html_includes = include_directories(join_paths(zstd_rootdir, 'programs'), + join_paths(zstd_rootdir, 'lib'), + join_paths(zstd_rootdir, 'lib/common'), + join_paths(zstd_rootdir, 'contrib/gen_html')) + +gen_html = executable('gen_html', + join_paths(zstd_rootdir, 'contrib/gen_html/gen_html.cpp'), + include_directories: gen_html_includes, + install: false) + +# Update zstd manual +zstd_manual_html = custom_target('zstd_manual.html', + output : 'zstd_manual.html', + command : [gen_html, + zstd_version, + join_paths(meson.current_source_dir(), zstd_rootdir, 'lib/zstd.h'), + '@OUTPUT@'], + install : false) diff --git a/build/meson/contrib/meson.build b/build/meson/contrib/meson.build new file mode 100644 index 000000000..7f6d03a4c --- /dev/null +++ b/build/meson/contrib/meson.build @@ -0,0 +1,12 @@ +# ############################################################################# +# Copyright (c) 2018-present Dima Krasner +# lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# + +subdir('pzstd') +subdir('gen_html') diff --git a/build/meson/contrib/pzstd/meson.build b/build/meson/contrib/pzstd/meson.build new file mode 100644 index 000000000..8f3822fd7 --- /dev/null +++ b/build/meson/contrib/pzstd/meson.build @@ -0,0 +1,24 @@ +# ############################################################################# +# Copyright (c) 2018-present lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# + +zstd_rootdir = '../../../..' + +pzstd_includes = include_directories(join_paths(zstd_rootdir, 'programs'), + join_paths(zstd_rootdir, 'contrib/pzstd')) +pzstd_sources = [join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'contrib/pzstd/main.cpp'), + join_paths(zstd_rootdir, 'contrib/pzstd/Options.cpp'), + join_paths(zstd_rootdir, 'contrib/pzstd/Pzstd.cpp'), + join_paths(zstd_rootdir, 'contrib/pzstd/SkippableFrame.cpp')] +pzstd = executable('pzstd', + pzstd_sources, + cpp_args: [ '-DNDEBUG', '-Wno-shadow', '-pedantic' ], + include_directories: pzstd_includes, + dependencies: [ libzstd_dep, thread_dep ], + install: true) diff --git a/build/meson/lib/meson.build b/build/meson/lib/meson.build new file mode 100644 index 000000000..bce6b7c56 --- /dev/null +++ b/build/meson/lib/meson.build @@ -0,0 +1,130 @@ +# ############################################################################# +# Copyright (c) 2018-present Dima Krasner +# lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# + +zstd_rootdir = '../../..' + +libzstd_includes = [include_directories(join_paths(zstd_rootdir,'lib'), + join_paths(zstd_rootdir, 'lib/common'), + join_paths(zstd_rootdir, 'lib/compress'), + join_paths(zstd_rootdir, 'lib/decompress'), + join_paths(zstd_rootdir, 'lib/dictBuilder'), + join_paths(zstd_rootdir, 'lib/deprecated'))] + +libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'), + join_paths(zstd_rootdir, 'lib/common/fse_decompress.c'), + join_paths(zstd_rootdir, 'lib/common/threading.c'), + join_paths(zstd_rootdir, 'lib/common/pool.c'), + join_paths(zstd_rootdir, 'lib/common/zstd_common.c'), + join_paths(zstd_rootdir, 'lib/common/error_private.c'), + join_paths(zstd_rootdir, 'lib/common/xxhash.c'), + join_paths(zstd_rootdir, 'lib/compress/hist.c'), + join_paths(zstd_rootdir, 'lib/compress/fse_compress.c'), + join_paths(zstd_rootdir, 'lib/compress/huf_compress.c'), + join_paths(zstd_rootdir, 'lib/compress/zstd_compress.c'), + join_paths(zstd_rootdir, 'lib/compress/zstdmt_compress.c'), + join_paths(zstd_rootdir, 'lib/compress/zstd_fast.c'), + join_paths(zstd_rootdir, 'lib/compress/zstd_double_fast.c'), + join_paths(zstd_rootdir, 'lib/compress/zstd_lazy.c'), + join_paths(zstd_rootdir, 'lib/compress/zstd_opt.c'), + join_paths(zstd_rootdir, 'lib/compress/zstd_ldm.c'), + join_paths(zstd_rootdir, 'lib/decompress/huf_decompress.c'), + join_paths(zstd_rootdir, 'lib/decompress/zstd_decompress.c'), + join_paths(zstd_rootdir, 'lib/decompress/zstd_decompress_block.c'), + join_paths(zstd_rootdir, 'lib/decompress/zstd_ddict.c'), + join_paths(zstd_rootdir, 'lib/dictBuilder/cover.c'), + join_paths(zstd_rootdir, 'lib/dictBuilder/fastcover.c'), + join_paths(zstd_rootdir, 'lib/dictBuilder/divsufsort.c'), + join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.c'), + join_paths(zstd_rootdir, 'lib/deprecated/zbuff_common.c'), + join_paths(zstd_rootdir, 'lib/deprecated/zbuff_compress.c'), + join_paths(zstd_rootdir, 'lib/deprecated/zbuff_decompress.c')] + +# Explicit define legacy support +add_project_arguments('-DZSTD_LEGACY_SUPPORT=@0@'.format(legacy_level), + language: 'c') + +if legacy_level == 0 + message('Legacy support: DISABLED') +else + # See ZSTD_LEGACY_SUPPORT of lib/README.md + message('Enable legacy support back to version 0.@0@'.format(legacy_level)) + + libzstd_includes += [ include_directories(join_paths(zstd_rootdir, 'lib/legacy')) ] + foreach i : [1, 2, 3, 4, 5, 6, 7] + if legacy_level <= i + libzstd_sources += join_paths(zstd_rootdir, 'lib/legacy/zstd_v0@0@.c'.format(i)) + endif + endforeach +endif + +libzstd_deps = [] +if use_multi_thread + message('Enable multi-threading support') + add_project_arguments('-DZSTD_MULTITHREAD', language: 'c') + libzstd_deps = [ thread_dep ] +endif + +libzstd_c_args = [] +if cc_id == compiler_msvc + if default_library_type != 'static' + libzstd_sources += [windows_mod.compile_resources( + join_paths(zstd_rootdir, 'build/VS2010/libzstd-dll/libzstd-dll.rc'))] + libzstd_c_args += ['-DZSTD_DLL_EXPORT=1', + '-DZSTD_HEAPMODE=0', + '-D_CONSOLE', + '-D_CRT_SECURE_NO_WARNINGS'] + else + libzstd_c_args += ['-DZSTD_HEAPMODE=0', + '-D_CRT_SECURE_NO_WARNINGS'] + endif +endif + +mingw_ansi_stdio_flags = [] +if host_machine_os == os_windows and cc_id == compiler_gcc + mingw_ansi_stdio_flags = [ '-D__USE_MINGW_ANSI_STDIO' ] +endif +libzstd_c_args += mingw_ansi_stdio_flags + +libzstd_debug_cflags = [] +if use_debug + libzstd_c_args += '-DDEBUGLEVEL=@0@'.format(debug_level) + if cc_id == compiler_gcc or cc_id == compiler_clang + libzstd_debug_cflags = ['-Wstrict-aliasing=1', '-Wswitch-enum', + '-Wdeclaration-after-statement', '-Wstrict-prototypes', + '-Wundef', '-Wpointer-arith', '-Wformat-security', '-Wvla', + '-Wformat=2', '-Winit-self', '-Wfloat-equal', '-Wwrite-strings', + '-Wredundant-decls', '-Wmissing-prototypes', '-Wc++-compat'] + endif +endif +libzstd_c_args += cc.get_supported_arguments(libzstd_debug_cflags) + +libzstd = library('zstd', + libzstd_sources, + include_directories: libzstd_includes, + c_args: libzstd_c_args, + dependencies: libzstd_deps, + install: true, + version: zstd_libversion, + soversion: '1') + +libzstd_dep = declare_dependency(link_with: libzstd, + include_directories: libzstd_includes) + +pkgconfig.generate(name: 'libzstd', + filebase: 'libzstd', + libraries: [libzstd], + description: 'fast lossless compression algorithm library', + version: zstd_libversion, + url: 'http://www.zstd.net/') + +install_headers(join_paths(zstd_rootdir, 'lib/zstd.h'), + join_paths(zstd_rootdir, 'lib/deprecated/zbuff.h'), + join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.h'), + join_paths(zstd_rootdir, 'lib/common/zstd_errors.h')) diff --git a/build/meson/meson.build b/build/meson/meson.build new file mode 100644 index 000000000..2e1f9d06b --- /dev/null +++ b/build/meson/meson.build @@ -0,0 +1,171 @@ +# ############################################################################# +# Copyright (c) 2018-present Dima Krasner +# lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# + +project('zstd', + ['c', 'cpp'], + license: ['BSD', 'GPLv2'], + default_options : ['c_std=c99', + 'cpp_std=c++11', + 'buildtype=release'], + version: '1.3.8', + meson_version: '>=0.47.0') + +cc = meson.get_compiler('c') +cxx = meson.get_compiler('cpp') +pkgconfig = import('pkgconfig') +python3 = import('python').find_installation() +windows_mod = import('windows') + +host_machine_os = host_machine.system() +os_windows = 'windows' +os_linux = 'linux' +os_darwin = 'darwin' +os_freebsd = 'freebsd' +os_sun = 'sunos' + +cc_id = cc.get_id() +compiler_gcc = 'gcc' +compiler_clang = 'clang' +compiler_msvc = 'msvc' + +zstd_version = meson.project_version() +zstd_libversion = '' + +# ============================================================================= +# Project directories +# ============================================================================= + +zstd_rootdir = '../..' + +# ============================================================================= +# Installation directories +# ============================================================================= + +if host_machine_os == os_windows + zstd_prefix = '.' + zstd_bindir = 'bin' + zstd_datadir = 'share' + zstd_mandir = join_paths(zstd_datadir, 'man') +else + zstd_prefix = get_option('prefix') + zstd_bindir = join_paths(zstd_prefix, get_option('bindir')) + zstd_datadir = join_paths(zstd_prefix, get_option('datadir')) + zstd_mandir = join_paths(zstd_prefix, get_option('mandir')) +endif + +zstd_docdir = join_paths(zstd_datadir, 'doc', meson.project_name()) + +# ============================================================================= +# Project options +# ============================================================================= + +# Built-in options +use_debug = get_option('debug') +buildtype = get_option('buildtype') + +# Custom options +debug_level = get_option('debug_level') +legacy_level = get_option('legacy_level') +use_backtrace = get_option('backtrace') +use_static_runtime = get_option('static_runtime') + +build_programs = get_option('build_programs') +build_contrib = get_option('build_contrib') +build_tests = get_option('build_tests') + +feature_multi_thread = get_option('multi_thread') +feature_zlib = get_option('zlib') +feature_lzma = get_option('lzma') +feature_lz4 = get_option('lz4') + +# ============================================================================= +# Helper scripts for Meson +# ============================================================================= + +GetZstdLibraryVersion_py = files('GetZstdLibraryVersion.py') + +# ============================================================================= +# Getting project version from zstd.h +# ============================================================================= + +zstd_h_file = join_paths(meson.current_source_dir(), zstd_rootdir, 'lib/zstd.h') +r = run_command(python3, GetZstdLibraryVersion_py, zstd_h_file) +if r.returncode() == 0 + output = r.stdout().strip() + if output.version_compare('>@0@'.format(zstd_version)) + zstd_version = output + message('Project version is now: @0@'.format(zstd_version)) + endif +endif + +if host_machine_os != os_windows + zstd_libversion = zstd_version +endif + +# ============================================================================= +# Dependencies +# ============================================================================= + +libm_dep = cc.find_library('m', required: build_tests) +thread_dep = dependency('threads', required: feature_multi_thread) +use_multi_thread = thread_dep.found() +# Arguments in dependency should be equivalent to those passed to pkg-config +zlib_dep = dependency('zlib', required: feature_zlib) +use_zlib = zlib_dep.found() +lzma_dep = dependency('liblzma', required: feature_lzma) +use_lzma = lzma_dep.found() +lz4_dep = dependency('liblz4', required: feature_lz4) +use_lz4 = lz4_dep.found() + +# ============================================================================= +# Compiler flags +# ============================================================================= + +add_project_arguments('-DXXH_NAMESPACE=ZSTD_', language: ['c']) + +if [compiler_gcc, compiler_clang].contains(cc_id) + common_warning_flags = [ '-Wextra', '-Wundef', '-Wshadow', '-Wcast-align', '-Wcast-qual' ] + if cc_id == compiler_clang + # Should use Meson's own --werror build option + #common_warning_flags += '-Werror' + common_warning_flags += ['-Wconversion', '-Wno-sign-conversion', '-Wdocumentation'] + endif + cc_compile_flags = cc.get_supported_arguments(common_warning_flags + ['-Wstrict-prototypes']) + cxx_compile_flags = cxx.get_supported_arguments(common_warning_flags) + add_project_arguments(cc_compile_flags, language : 'c') + add_project_arguments(cxx_compile_flags, language : 'cpp') +elif cc_id == compiler_msvc + msvc_compile_flags = [ '/D_UNICODE', '/DUNICODE' ] + if use_multi_thread + msvc_compile_flags += '/MP' + endif + if enable_static_runtime + msvc_compile_flags += '/MT' + endif + add_project_arguments(msvc_compile_flags, language: ['c', 'cpp']) +endif + +# ============================================================================= +# Subdirs +# ============================================================================= + +subdir('lib') + +if build_programs + subdir('programs') +endif + +if build_tests + subdir('tests') +endif + +if build_contrib + subdir('contrib') +endif diff --git a/build/meson/meson_options.txt b/build/meson/meson_options.txt new file mode 100644 index 000000000..349d915c7 --- /dev/null +++ b/build/meson/meson_options.txt @@ -0,0 +1,36 @@ +# ############################################################################# +# Copyright (c) 2018-present Dima Krasner +# lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# + +# Read guidelines from https://wiki.gnome.org/Initiatives/GnomeGoals/MesonPorting + +option('legacy_level', type: 'integer', min: 0, max: 7, value: '5', + description: 'Support any legacy format: 7 to 1 for v0.7+ to v0.1+') +option('debug_level', type: 'integer', min: 0, max: 9, value: 1, + description: 'Enable run-time debug. See lib/common/debug.h') +option('backtrace', type: 'boolean', value: false, + description: 'Display a stack backtrace when execution generates a runtime exception') +option('static_runtime', type: 'boolean', value: false, + description: 'Link to static run-time libraries on MSVC') + +option('build_programs', type: 'boolean', value: true, + description: 'Enable programs build') +option('build_tests', type: 'boolean', value: false, + description: 'Enable tests build') +option('build_contrib', type: 'boolean', value: false, + description: 'Enable contrib build') + +option('multi_thread', type: 'feature', value: 'enabled', + description: 'Enable multi-threading when pthread is detected') +option('zlib', type: 'feature', value: 'auto', + description: 'Enable zlib support') +option('lzma', type: 'feature', value: 'auto', + description: 'Enable lzma support') +option('lz4', type: 'feature', value: 'auto', + description: 'Enable lz4 support') diff --git a/build/meson/programs/meson.build b/build/meson/programs/meson.build new file mode 100644 index 000000000..333ad8e8b --- /dev/null +++ b/build/meson/programs/meson.build @@ -0,0 +1,101 @@ +# ############################################################################# +# Copyright (c) 2018-present Dima Krasner +# lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# + +zstd_rootdir = '../../..' + +zstd_programs_sources = [join_paths(zstd_rootdir, 'programs/zstdcli.c'), + join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'programs/fileio.c'), + join_paths(zstd_rootdir, 'programs/benchfn.c'), + join_paths(zstd_rootdir, 'programs/benchzstd.c'), + join_paths(zstd_rootdir, 'programs/datagen.c'), + join_paths(zstd_rootdir, 'programs/dibio.c')] + +zstd_c_args = libzstd_debug_cflags +if use_multi_thread + zstd_c_args += [ '-DZSTD_MULTITHREAD' ] +endif + +zstd_deps = [ libzstd_dep ] +if use_zlib + zstd_deps += [ zlib_dep ] + zstd_c_args += [ '-DZSTD_GZCOMPRESS', '-DZSTD_GZDECOMPRESS' ] +endif + +if use_lzma + zstd_deps += [ lzma_dep ] + zstd_c_args += [ '-DZSTD_LZMACOMPRESS', '-DZSTD_LZMADECOMPRESS' ] +endif + +if use_lz4 + zstd_deps += [ lz4_dep ] + zstd_c_args += [ '-DZSTD_LZ4COMPRESS', '-DZSTD_LZ4DECOMPRESS' ] +endif + +export_dynamic_on_windows = false +# explicit backtrace enable/disable for Linux & Darwin +if not use_backtrace + zstd_c_args += '-DBACKTRACE_ENABLE=0' +elif use_debug and host_machine_os == os_windows # MinGW target + zstd_c_args += '-DBACKTRACE_ENABLE=1' + export_dynamic_on_windows = true +endif + +if cc_id == compiler_msvc + if default_library_type != 'static' + zstd_programs_sources += [windows_mod.compile_resources( + join_paths(zstd_rootdir, 'build/VS2010/zstd/zstd.rc'))] + endif +endif + +zstd = executable('zstd', + zstd_programs_sources, + c_args: zstd_c_args, + dependencies: zstd_deps, + export_dynamic: export_dynamic_on_windows, # Since Meson 0.45.0 + install: true) + +zstd_frugal_sources = [join_paths(zstd_rootdir, 'programs/zstdcli.c'), + join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'programs/fileio.c')] + +# Minimal target, with only zstd compression and decompression. +# No bench. No legacy. +executable('zstd-frugal', + zstd_frugal_sources, + dependencies: libzstd_dep, + c_args: [ '-DZSTD_NOBENCH', '-DZSTD_NODICT' ], + install: true) + +install_data(join_paths(zstd_rootdir, 'programs/zstdgrep'), + join_paths(zstd_rootdir, 'programs/zstdless'), + install_dir: zstd_bindir) + +# ============================================================================= +# Programs and manpages installing +# ============================================================================= + +install_man(join_paths(zstd_rootdir, 'programs/zstd.1'), + join_paths(zstd_rootdir, 'programs/zstdgrep.1'), + join_paths(zstd_rootdir, 'programs/zstdless.1')) + +InstallSymlink_py = '../InstallSymlink.py' +zstd_man1_dir = join_paths(zstd_mandir, 'man1') +man1_EXT = host_machine_os != os_windows ? '.1.gz' : '.1' + +foreach f : ['zstdcat', 'unzstd'] + meson.add_install_script(InstallSymlink_py, 'zstd', f, zstd_bindir) + meson.add_install_script(InstallSymlink_py, 'zstd' + man1_EXT, f + man1_EXT, zstd_man1_dir) +endforeach + +if use_multi_thread + meson.add_install_script(InstallSymlink_py, 'zstd', 'zstdmt', zstd_bindir) + meson.add_install_script(InstallSymlink_py, 'zstd' + man1_EXT, 'zstdmt' + man1_EXT, zstd_man1_dir) +endif diff --git a/build/meson/tests/meson.build b/build/meson/tests/meson.build new file mode 100644 index 000000000..aa9cd86f4 --- /dev/null +++ b/build/meson/tests/meson.build @@ -0,0 +1,218 @@ +# ############################################################################# +# Copyright (c) 2018-present Dima Krasner +# lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# + +zstd_rootdir = '../../..' + +tests_supported_oses = [os_linux, 'gnu/kfreebsd', os_darwin, 'gnu', 'openbsd', + os_freebsd, 'netbsd', 'dragonfly', os_sun] + +# ============================================================================= +# Test flags +# ============================================================================= + +FUZZER_FLAGS = ['--no-big-tests'] +FUZZERTEST = '-T200s' +ZSTREAM_TESTTIME = '-T90s' +DECODECORPUS_TESTTIME = '-T30' +ZSTDRTTEST = ['--test-large-data'] + +# ============================================================================= +# Executables +# ============================================================================= + +test_includes = [ include_directories(join_paths(zstd_rootdir, 'programs')) ] + +datagen_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'), + join_paths(zstd_rootdir, 'tests/datagencli.c')] +datagen = executable('datagen', + datagen_sources, + c_args: [ '-DNDEBUG' ], + include_directories: test_includes, + dependencies: libzstd_dep, + install: false) + +fullbench_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'), + join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'programs/benchfn.c'), + join_paths(zstd_rootdir, 'programs/benchzstd.c'), + join_paths(zstd_rootdir, 'tests/fullbench.c')] +fullbench = executable('fullbench', + fullbench_sources, + include_directories: test_includes, + dependencies: libzstd_dep, + install: false) + +fuzzer_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'), + join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'tests/fuzzer.c')] +fuzzer = executable('fuzzer', + fuzzer_sources, + include_directories: test_includes, + dependencies: libzstd_dep, + install: false) + +zbufftest_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'), + join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'tests/zbufftest.c')] +zbufftest = executable('zbufftest', + zbufftest_sources, + c_args: ['-Wno-deprecated-declarations'], + include_directories: test_includes, + dependencies: libzstd_dep, + install: false) + +zstreamtest_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'), + join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'tests/seqgen.c'), + join_paths(zstd_rootdir, 'tests/zstreamtest.c')] +zstreamtest = executable('zstreamtest', + zstreamtest_sources, + include_directories: test_includes, + dependencies: libzstd_dep, + install: false) + +paramgrill_sources = [join_paths(zstd_rootdir, 'programs/benchfn.c'), + join_paths(zstd_rootdir, 'programs/benchzstd.c'), + join_paths(zstd_rootdir, 'programs/datagen.c'), + join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'tests/paramgrill.c')] +paramgrill = executable('paramgrill', + paramgrill_sources, + include_directories: test_includes, + dependencies: [ libzstd_dep, libm_dep ], + install: false) + +roundTripCrash_sources = [join_paths(zstd_rootdir, 'tests/roundTripCrash.c')] +roundTripCrash = executable('roundTripCrash', + roundTripCrash_sources, + dependencies: [ libzstd_dep ], + install: false) + +longmatch_sources = [join_paths(zstd_rootdir, 'tests/longmatch.c')] +longmatch = executable('longmatch', + longmatch_sources, + dependencies: [ libzstd_dep ], + install: false) + +invalidDictionaries_sources = [join_paths(zstd_rootdir, 'tests/invalidDictionaries.c')] +invalidDictionaries = executable('invalidDictionaries', + invalidDictionaries_sources, + dependencies: [ libzstd_dep ], + install: false) + +legacy_sources = [join_paths(zstd_rootdir, 'tests/legacy.c')] +legacy = executable('legacy', + legacy_sources, + # Use -Dlegacy_level build option to control it + #c_args: '-DZSTD_LEGACY_SUPPORT=4', + dependencies: [ libzstd_dep ], + install: false) + +decodecorpus_sources = [join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'tests/decodecorpus.c')] +decodecorpus = executable('decodecorpus', + decodecorpus_sources, + include_directories: test_includes, + dependencies: [ libzstd_dep, libm_dep ], + install: false) + +symbols_sources = [join_paths(zstd_rootdir, 'tests/symbols.c')] +symbols = executable('symbols', + symbols_sources, + include_directories: test_includes, + c_args: host_machine_os == os_windows ? '-DZSTD_DLL_IMPORT=1' : [], + dependencies: [ libzstd_dep ], + install: false) + +poolTests_sources = [join_paths(zstd_rootdir, 'programs/util.c'), + join_paths(zstd_rootdir, 'tests/poolTests.c'), + join_paths(zstd_rootdir, 'lib/common/pool.c'), + join_paths(zstd_rootdir, 'lib/common/threading.c'), + join_paths(zstd_rootdir, 'lib/common/zstd_common.c'), + join_paths(zstd_rootdir, 'lib/common/error_private.c')] +poolTests = executable('poolTests', + poolTests_sources, + include_directories: test_includes, + dependencies: [ libzstd_dep, thread_dep ], + install: false) + +checkTag_sources = [join_paths(zstd_rootdir, 'tests/checkTag.c')] +checkTag = executable('checkTag', + checkTag_sources, + dependencies: [ libzstd_dep ], + install: false) + +# ============================================================================= +# Tests (Use "meson test --list" to list all tests) +# ============================================================================= + +if tests_supported_oses.contains(host_machine_os) + valgrind_prog = find_program('valgrind', ['/usr/bin/valgrind'], required: true) + valgrindTest_py = files('valgrindTest.py') + test('valgrindTest', + valgrindTest_py, + args: [valgrind_prog.path(), zstd, datagen, fuzzer, fullbench], + depends: [zstd, datagen, fuzzer, fullbench], + timeout: 600) # Timeout should work on HDD drive +endif + +if host_machine_os != os_windows + playTests_sh = find_program(join_paths(zstd_rootdir, 'tests/playTests.sh'), required: true) + test('test-zstd', + playTests_sh, + args: ZSTDRTTEST, + env: ['ZSTD=' + zstd.full_path()], + depends: [datagen], + timeout: 600) # Timeout should work on HDD drive +endif + +test('test-fullbench-1', + fullbench, + args: ['-i1'], + depends: [datagen], + timeout: 60) +test('test-fullbench-2', + fullbench, + args: ['-i1', '-P0'], + depends: [datagen], + timeout: 60) + +if use_zlib + test('test-fuzzer', + fuzzer, + args: ['-v', FUZZERTEST] + FUZZER_FLAGS, + timeout: 240) +endif + +test('test-zbuff', + zbufftest, + args: [ZSTREAM_TESTTIME], + timeout: 120) +test('test-zstream-1', + zstreamtest, + args: ['-v', ZSTREAM_TESTTIME] + FUZZER_FLAGS, + timeout: 120) +test('test-zstream-2', + zstreamtest, + args: ['-mt', '-t1', ZSTREAM_TESTTIME] + FUZZER_FLAGS, + timeout: 120) +test('test-zstream-3', + zstreamtest, + args: ['--newapi', '-t1', ZSTREAM_TESTTIME] + FUZZER_FLAGS, + timeout: 120) +test('test-longmatch', longmatch, timeout: 36) +test('test-invalidDictionaries', invalidDictionaries) # should be fast +test('test-symbols', symbols) # should be fast +test('test-legacy', legacy) # should be fast +test('test-decodecorpus', + decodecorpus, + args: ['-t', DECODECORPUS_TESTTIME], + timeout: 60) +test('test-poolTests', poolTests) # should be fast diff --git a/build/meson/tests/valgrindTest.py b/build/meson/tests/valgrindTest.py new file mode 100644 index 000000000..218f7458b --- /dev/null +++ b/build/meson/tests/valgrindTest.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# ############################################################################# +# Copyright (c) 2018-present lzutao +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ############################################################################# +import os +import subprocess +import tempfile + + +def valgrindTest(valgrind, datagen, fuzzer, zstd, fullbench): + VALGRIND_ARGS = [valgrind, '--leak-check=full', '--show-leak-kinds=all', '--error-exitcode=1'] + + print('\n ---- valgrind tests : memory analyzer ----') + + subprocess.check_call([*VALGRIND_ARGS, datagen, '-g50M'], stdout=subprocess.DEVNULL) + + if subprocess.call([*VALGRIND_ARGS, zstd], + stdout=subprocess.DEVNULL) == 0: + raise subprocess.CalledProcessError('zstd without argument should have failed') + + with subprocess.Popen([datagen, '-g80'], stdout=subprocess.PIPE) as p1, \ + subprocess.Popen([*VALGRIND_ARGS, zstd, '-', '-c'], + stdin=p1.stdout, + stdout=subprocess.DEVNULL) as p2: + p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits. + p2.communicate() + if p2.returncode != 0: + raise subprocess.CalledProcessError() + + with subprocess.Popen([datagen, '-g16KB'], stdout=subprocess.PIPE) as p1, \ + subprocess.Popen([*VALGRIND_ARGS, zstd, '-vf', '-', '-c'], + stdin=p1.stdout, + stdout=subprocess.DEVNULL) as p2: + p1.stdout.close() + p2.communicate() + if p2.returncode != 0: + raise subprocess.CalledProcessError() + + with tempfile.NamedTemporaryFile() as tmp_fd: + with subprocess.Popen([datagen, '-g2930KB'], stdout=subprocess.PIPE) as p1, \ + subprocess.Popen([*VALGRIND_ARGS, zstd, '-5', '-vf', '-', '-o', tmp_fd.name], + stdin=p1.stdout) as p2: + p1.stdout.close() + p2.communicate() + if p2.returncode != 0: + raise subprocess.CalledProcessError() + + subprocess.check_call([*VALGRIND_ARGS, zstd, '-vdf', tmp_fd.name, '-c'], + stdout=subprocess.DEVNULL) + + with subprocess.Popen([datagen, '-g64MB'], stdout=subprocess.PIPE) as p1, \ + subprocess.Popen([*VALGRIND_ARGS, zstd, '-vf', '-', '-c'], + stdin=p1.stdout, + stdout=subprocess.DEVNULL) as p2: + p1.stdout.close() + p2.communicate() + if p2.returncode != 0: + raise subprocess.CalledProcessError() + + subprocess.check_call([*VALGRIND_ARGS, fuzzer, '-T1mn', '-t1']) + subprocess.check_call([*VALGRIND_ARGS, fullbench, '-i1']) + + +def main(): + import argparse + parser = argparse.ArgumentParser(description='Valgrind tests : memory analyzer') + parser.add_argument('valgrind', help='valgrind path') + parser.add_argument('zstd', help='zstd path') + parser.add_argument('datagen', help='datagen path') + parser.add_argument('fuzzer', help='fuzzer path') + parser.add_argument('fullbench', help='fullbench path') + + args = parser.parse_args() + + valgrind = args.valgrind + zstd = args.zstd + datagen = args.datagen + fuzzer = args.fuzzer + fullbench = args.fullbench + + valgrindTest(valgrind, datagen, fuzzer, zstd, fullbench) + + +if __name__ == '__main__': + main() diff --git a/contrib/meson/README b/contrib/meson/README deleted file mode 100644 index 0b5331e6d..000000000 --- a/contrib/meson/README +++ /dev/null @@ -1,3 +0,0 @@ -This Meson project is provided with no guarantee and maintained by Dima Krasner . - -It outputs one libzstd, either shared or static, depending on default_library. diff --git a/contrib/meson/meson.build b/contrib/meson/meson.build deleted file mode 100644 index 98c9b0293..000000000 --- a/contrib/meson/meson.build +++ /dev/null @@ -1,144 +0,0 @@ -project('zstd', 'c', license: 'BSD') - -libm = meson.get_compiler('c').find_library('m', required: true) - -lib_dir = join_paths('..', '..', 'lib') -common_dir = join_paths(lib_dir, 'common') -compress_dir = join_paths(lib_dir, 'compress') -decompress_dir = join_paths(lib_dir, 'decompress') -dictbuilder_dir = join_paths(lib_dir, 'dictBuilder') -deprecated_dir = join_paths(lib_dir, 'deprecated') - -libzstd_srcs = [ - join_paths(common_dir, 'entropy_common.c'), - join_paths(common_dir, 'fse_decompress.c'), - join_paths(common_dir, 'threading.c'), - join_paths(common_dir, 'pool.c'), - join_paths(common_dir, 'zstd_common.c'), - join_paths(common_dir, 'error_private.c'), - join_paths(common_dir, 'xxhash.c'), - join_paths(compress_dir, 'fse_compress.c'), - join_paths(compress_dir, 'hist.c'), - join_paths(compress_dir, 'huf_compress.c'), - join_paths(compress_dir, 'zstd_compress.c'), - join_paths(compress_dir, 'zstd_fast.c'), - join_paths(compress_dir, 'zstd_double_fast.c'), - join_paths(compress_dir, 'zstd_lazy.c'), - join_paths(compress_dir, 'zstd_opt.c'), - join_paths(compress_dir, 'zstd_ldm.c'), - join_paths(compress_dir, 'zstdmt_compress.c'), - join_paths(decompress_dir, 'huf_decompress.c'), - join_paths(decompress_dir, 'zstd_decompress.c'), - join_paths(dictbuilder_dir, 'cover.c'), - join_paths(dictbuilder_dir, 'divsufsort.c'), - join_paths(dictbuilder_dir, 'zdict.c'), - join_paths(deprecated_dir, 'zbuff_common.c'), - join_paths(deprecated_dir, 'zbuff_compress.c'), - join_paths(deprecated_dir, 'zbuff_decompress.c') -] - -libzstd_includes = [include_directories(common_dir, dictbuilder_dir, compress_dir, lib_dir)] - -legacy = get_option('legacy_support') -if legacy == '0' - legacy = 'false' -endif -if legacy != 'false' - if legacy == 'true' - legacy = '1' - endif - #See ZSTD_LEGACY_SUPPORT of programs/README.md - message('Enabling legacy support back to version 0.' + legacy) - legacy_int = legacy.to_int() - if legacy_int > 7 - legacy_int = 7 - endif - libzstd_cflags = ['-DZSTD_LEGACY_SUPPORT=' + legacy] - - legacy_dir = join_paths(lib_dir, 'legacy') - libzstd_includes += [include_directories(legacy_dir)] - if legacy_int <= 1 - libzstd_srcs += join_paths(legacy_dir, 'zstd_v01.c') - endif - if legacy_int <= 2 - libzstd_srcs += join_paths(legacy_dir, 'zstd_v02.c') - endif - if legacy_int <= 3 - libzstd_srcs += join_paths(legacy_dir, 'zstd_v03.c') - endif - if legacy_int <= 4 - libzstd_srcs += join_paths(legacy_dir, 'zstd_v04.c') - endif - if legacy_int <= 5 - libzstd_srcs += join_paths(legacy_dir, 'zstd_v05.c') - endif - if legacy_int <= 6 - libzstd_srcs += join_paths(legacy_dir, 'zstd_v06.c') - endif - if legacy_int <= 7 - libzstd_srcs += join_paths(legacy_dir, 'zstd_v07.c') - endif -else - libzstd_cflags = [] -endif - -if get_option('multithread') - message('Enabling multi-threading support') - add_global_arguments('-DZSTD_MULTITHREAD', language: 'c') - libzstd_deps = [dependency('threads')] -else - libzstd_deps = [] -endif - -libzstd = library('zstd', - libzstd_srcs, - include_directories: libzstd_includes, - c_args: libzstd_cflags, - dependencies: libzstd_deps, - install: true, - soversion: '1', - ) - -programs_dir = join_paths('..', '..', 'programs') - -zstd = executable('zstd', - join_paths(programs_dir, 'bench.c'), - join_paths(programs_dir, 'datagen.c'), - join_paths(programs_dir, 'dibio.c'), - join_paths(programs_dir, 'fileio.c'), - join_paths(programs_dir, 'zstdcli.c'), - include_directories: libzstd_includes, - c_args: ['-DZSTD_NODICT', '-DZSTD_NOBENCH'], - link_with: libzstd, - install: true) - -tests_dir = join_paths('..', '..', 'tests') -datagen_c = join_paths(programs_dir, 'datagen.c') -test_includes = libzstd_includes + [include_directories(programs_dir)] - -fullbench = executable('fullbench', - datagen_c, join_paths(tests_dir, 'fullbench.c'), - include_directories: test_includes, - link_with: libzstd) -test('fullbench', fullbench) - -fuzzer = executable('fuzzer', - datagen_c, join_paths(tests_dir, 'fuzzer.c'), - include_directories: test_includes, - link_with: libzstd) -test('fuzzer', fuzzer) - -if target_machine.system() != 'windows' - paramgrill = executable('paramgrill', - datagen_c, join_paths(tests_dir, 'paramgrill.c'), - join_paths(programs_dir, 'bench.c'), - include_directories: test_includes, - link_with: libzstd, - dependencies: libm) - test('paramgrill', paramgrill) - - datagen = executable('datagen', - datagen_c, join_paths(tests_dir, 'datagencli.c'), - include_directories: test_includes, - link_with: libzstd) -endif diff --git a/contrib/meson/meson_options.txt b/contrib/meson/meson_options.txt deleted file mode 100644 index 99845c8aa..000000000 --- a/contrib/meson/meson_options.txt +++ /dev/null @@ -1,3 +0,0 @@ -option('multithread', type: 'boolean', value: false) -option('legacy_support', type: 'string', value: '4', - description: 'True or false, or 7 to 1 for v0.7+ to v0.1+.') diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index d74b41802..d88a7cc8e 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -487,8 +487,9 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); /* frame parameters */ ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) - * Content size must be known at the beginning of compression, - * it is provided using ZSTD_CCtx_setPledgedSrcSize() */ + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ @@ -524,12 +525,13 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); * ZSTD_c_forceMaxWindow * ZSTD_c_forceAttachDict * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. */ ZSTD_c_experimentalParam1=500, ZSTD_c_experimentalParam2=10, ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4 + ZSTD_c_experimentalParam4=1001 } ZSTD_cParameter;
typedef struct {
@@ -562,14 +564,17 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 
 
size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
 

Total input data size to be compressed as a single frame. - This value will be controlled at end of frame, and trigger an error if not respected. + Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + This value will also be controlled at end of frame, and trigger an error if not respected. @result : 0, or an error code (which can be tested with ZSTD_isError()). Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. Note 2 : pledgedSrcSize is only valid once, for the next frame. - It's discarded at the end of the frame. - Note 3 : If all data is provided and consumed in a single round, + It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + Note 3 : Whenever all input data is provided and consumed in a single round, + for example with ZSTD_compress2(), + or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), this value is automatically overriden by srcSize instead.


@@ -652,6 +657,8 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); void* dst, size_t dstCapacity, const void* src, size_t srcSize);

Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + ZSTD_compress2() always starts a new frame. + Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - The function is always blocking, returns when compression is completed. Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c index 97bf8cb5e..511b35676 100644 --- a/examples/dictionary_compression.c +++ b/examples/dictionary_compression.c @@ -7,71 +7,13 @@ * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ - - #include // malloc, exit #include // printf #include // strerror #include // errno #include // stat #include // presumes zstd library is installed - - -static off_t fsize_orDie(const char *filename) -{ - struct stat st; - if (stat(filename, &st) == 0) return st.st_size; - /* error */ - perror(filename); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(2); -} - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc"); - exit(3); -} - -static void* loadFile_orDie(const char* fileName, size_t* size) -{ - off_t const buffSize = fsize_orDie(fileName); - FILE* const inFile = fopen_orDie(fileName, "rb"); - void* const buffer = malloc_orDie(buffSize); - size_t const readSize = fread(buffer, 1, buffSize, inFile); - if (readSize != (size_t)buffSize) { - fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno)); - exit(4); - } - fclose(inFile); - *size = buffSize; - return buffer; -} - -static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize) -{ - FILE* const oFile = fopen_orDie(fileName, "wb"); - size_t const wSize = fwrite(buff, 1, buffSize, oFile); - if (wSize != (size_t)buffSize) { - fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno)); - exit(5); - } - if (fclose(oFile)) { - perror(fileName); - exit(6); - } -} +#include "utils.h" /* createDict() : `dictFileName` is supposed to have been created using `zstd --train` */ diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c index 07e6e24c6..69f56d56b 100644 --- a/examples/dictionary_decompression.c +++ b/examples/dictionary_decompression.c @@ -17,49 +17,7 @@ #include // stat #define ZSTD_STATIC_LINKING_ONLY // ZSTD_findDecompressedSize #include // presumes zstd library is installed - - -static off_t fsize_orDie(const char *filename) -{ - struct stat st; - if (stat(filename, &st) == 0) return st.st_size; - /* error */ - perror(filename); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(2); -} - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc"); - exit(3); -} - -static void* loadFile_orDie(const char* fileName, size_t* size) -{ - off_t const buffSize = fsize_orDie(fileName); - FILE* const inFile = fopen_orDie(fileName, "rb"); - void* const buffer = malloc_orDie(buffSize); - size_t const readSize = fread(buffer, 1, buffSize, inFile); - if (readSize != (size_t)buffSize) { - fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno)); - exit(4); - } - fclose(inFile); - *size = buffSize; - return buffer; -} +#include "utils.h" /* createDict() : `dictFileName` is supposed to have been created using `zstd --train` */ @@ -74,7 +32,6 @@ static ZSTD_DDict* createDict_orDie(const char* dictFileName) return ddict; } - static void decompress(const char* fname, const ZSTD_DDict* ddict) { size_t cSize; diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c index 4308a2e4d..442ff40ae 100644 --- a/examples/multiple_streaming_compression.c +++ b/examples/multiple_streaming_compression.c @@ -20,53 +20,7 @@ #include // errno #define ZSTD_STATIC_LINKING_ONLY // streaming API defined as "experimental" for the time being #include // presumes zstd library is installed - - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc:"); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(3); -} - -static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(4); -} - -static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(5); -} - -static size_t fclose_orDie(FILE* file) -{ - if (!fclose(file)) return 0; - /* error */ - perror("fclose"); - exit(6); -} - +#include "utils.h" typedef struct { void* buffIn; @@ -95,7 +49,6 @@ static void freeResources(resources ress) free(ress.buffOut); } - static void compressFile_orDie(resources ress, const char* fname, const char* outName, int cLevel) { FILE* const fin = fopen_orDie(fname, "rb"); @@ -125,7 +78,6 @@ static void compressFile_orDie(resources ress, const char* fname, const char* ou fclose_orDie(fin); } - int main(int argc, const char** argv) { const char* const exeName = argv[0]; diff --git a/examples/simple_compression.c b/examples/simple_compression.c index 9ade424a2..0193dd40f 100644 --- a/examples/simple_compression.c +++ b/examples/simple_compression.c @@ -8,78 +8,13 @@ * You may select, at your option, one of the above-listed licenses. */ - - #include // malloc, free, exit #include // fprintf, perror, fopen, etc. #include // strlen, strcat, memset, strerror #include // errno #include // stat #include // presumes zstd library is installed - - -static off_t fsize_orDie(const char *filename) -{ - struct stat st; - if (stat(filename, &st) == 0) return st.st_size; - /* error */ - perror(filename); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(2); -} - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror(NULL); - exit(3); -} - -static void* loadFile_orDie(const char* fileName, size_t* size) -{ - off_t const fileSize = fsize_orDie(fileName); - size_t const buffSize = (size_t)fileSize; - if ((off_t)buffSize < fileSize) { /* narrowcast overflow */ - fprintf(stderr, "%s : filesize too large \n", fileName); - exit(4); - } - FILE* const inFile = fopen_orDie(fileName, "rb"); - void* const buffer = malloc_orDie(buffSize); - size_t const readSize = fread(buffer, 1, buffSize, inFile); - if (readSize != (size_t)buffSize) { - fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno)); - exit(5); - } - fclose(inFile); /* can't fail, read only */ - *size = buffSize; - return buffer; -} - - -static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize) -{ - FILE* const oFile = fopen_orDie(fileName, "wb"); - size_t const wSize = fwrite(buff, 1, buffSize, oFile); - if (wSize != (size_t)buffSize) { - fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno)); - exit(6); - } - if (fclose(oFile)) { - perror(fileName); - exit(7); - } -} - +#include "utils.h" static void compress_orDie(const char* fname, const char* oname) { @@ -103,7 +38,6 @@ static void compress_orDie(const char* fname, const char* oname) free(cBuff); } - static char* createOutFilename_orDie(const char* filename) { size_t const inL = strlen(filename); diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c index c1818a95c..ee055dd67 100644 --- a/examples/simple_decompression.c +++ b/examples/simple_decompression.c @@ -15,50 +15,7 @@ #include // stat #define ZSTD_STATIC_LINKING_ONLY // ZSTD_findDecompressedSize #include // presumes zstd library is installed - - -static off_t fsize_orDie(const char *filename) -{ - struct stat st; - if (stat(filename, &st) == 0) return st.st_size; - /* error */ - fprintf(stderr, "stat: %s : %s \n", filename, strerror(errno)); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - fprintf(stderr, "fopen: %s : %s \n", filename, strerror(errno)); - exit(2); -} - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size + !size); /* avoid allocating size of 0 : may return NULL (implementation dependent) */ - if (buff) return buff; - /* error */ - fprintf(stderr, "malloc: %s \n", strerror(errno)); - exit(3); -} - -static void* loadFile_orDie(const char* fileName, size_t* size) -{ - off_t const buffSize = fsize_orDie(fileName); - FILE* const inFile = fopen_orDie(fileName, "rb"); - void* const buffer = malloc_orDie(buffSize); - size_t const readSize = fread(buffer, 1, buffSize, inFile); - if (readSize != (size_t)buffSize) { - fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno)); - exit(4); - } - fclose(inFile); /* can't fail (read only) */ - *size = buffSize; - return buffer; -} - +#include "utils.h" static void decompress(const char* fname) { @@ -90,7 +47,6 @@ static void decompress(const char* fname) free(cBuff); } - int main(int argc, const char** argv) { const char* const exeName = argv[0]; diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c index 9287ff398..e056f86f0 100644 --- a/examples/streaming_compression.c +++ b/examples/streaming_compression.c @@ -13,53 +13,7 @@ #include // fprintf, perror, feof, fopen, etc. #include // strlen, memset, strcat #include // presumes zstd library is installed - - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc:"); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(3); -} - -static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(4); -} - -static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(5); -} - -static size_t fclose_orDie(FILE* file) -{ - if (!fclose(file)) return 0; - /* error */ - perror("fclose"); - exit(6); -} - +#include "utils.h" static void compressFile_orDie(const char* fname, const char* outName, int cLevel) { @@ -102,8 +56,7 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve ZSTD_freeCStream(cstream); fclose_orDie(fout); - fclose_orDie(fin); - free(buffIn); + fclose_orDie(fin); free(buffIn); free(buffOut); } diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c index 504a5e316..13c3c509e 100644 --- a/examples/streaming_decompression.c +++ b/examples/streaming_decompression.c @@ -14,53 +14,7 @@ #include // strerror #include // errno #include // presumes zstd library is installed - - -static void* malloc_orDie(size_t size) -{ - void* const buff = malloc(size); - if (buff) return buff; - /* error */ - perror("malloc:"); - exit(1); -} - -static FILE* fopen_orDie(const char *filename, const char *instruction) -{ - FILE* const inFile = fopen(filename, instruction); - if (inFile) return inFile; - /* error */ - perror(filename); - exit(3); -} - -static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) -{ - size_t const readSize = fread(buffer, 1, sizeToRead, file); - if (readSize == sizeToRead) return readSize; /* good */ - if (feof(file)) return readSize; /* good, reached end of file */ - /* error */ - perror("fread"); - exit(4); -} - -static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) -{ - size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); - if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ - /* error */ - perror("fwrite"); - exit(5); -} - -static size_t fclose_orDie(FILE* file) -{ - if (!fclose(file)) return 0; - /* error */ - perror("fclose"); - exit(6); -} - +#include "utils.h" static void decompressFile_orDie(const char* fname) { diff --git a/examples/utils.h b/examples/utils.h new file mode 100644 index 000000000..55a329e23 --- /dev/null +++ b/examples/utils.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * This header file has common utility functions used in examples. + */ +#ifndef UTILS_H +#define UTILS_H + +#include // malloc, free, exit +#include // fprintf, perror, fopen, etc. +#include // strlen, strcat, memset, strerror +#include // errno +#include // stat + +/* + * Define the returned error code from utility functions. + */ +typedef enum { + ERROR_fsize = 1, + ERROR_fopen = 2, + ERROR_fclose = 3, + ERROR_fread = 4, + ERROR_fwrite = 5, + ERROR_loadFile = 6, + ERROR_saveFile = 7, + ERROR_malloc = 8, + ERROR_largeFile = 9, +} UTILS_ErrorCode; + +/*! fsize_orDie() : + * Get the size of a given file path. + * + * @return The size of a given file path. + */ +static off_t fsize_orDie(const char *filename) +{ + struct stat st; + if (stat(filename, &st) == 0) return st.st_size; + /* error */ + perror(filename); + exit(ERROR_fsize); +} + +/*! fopen_orDie() : + * Open a file using given file path and open option. + * + * @return If successful this function will return a FILE pointer to an + * opened file otherwise it sends an error to stderr and exits. + */ +static FILE* fopen_orDie(const char *filename, const char *instruction) +{ + FILE* const inFile = fopen(filename, instruction); + if (inFile) return inFile; + /* error */ + perror(filename); + exit(ERROR_fopen); +} + +/*! fclose_orDie() : + * Close an opened file using given FILE pointer. + */ +static void fclose_orDie(FILE* file) +{ + if (!fclose(file)) { return; }; + /* error */ + perror("fclose"); + exit(ERROR_fclose); +} + +/*! fread_orDie() : + * + * Read sizeToRead bytes from a given file, storing them at the + * location given by buffer. + * + * @return The number of bytes read. + */ +static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file) +{ + size_t const readSize = fread(buffer, 1, sizeToRead, file); + if (readSize == sizeToRead) return readSize; /* good */ + if (feof(file)) return readSize; /* good, reached end of file */ + /* error */ + perror("fread"); + exit(ERROR_fread); +} + +/*! fwrite_orDie() : + * + * Write sizeToWrite bytes to a file pointed to by file, obtaining + * them from a location given by buffer. + * + * Note: This function will send an error to stderr and exit if it + * cannot write data to the given file pointer. + * + * @return The number of bytes written. + */ +static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file) +{ + size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file); + if (writtenSize == sizeToWrite) return sizeToWrite; /* good */ + /* error */ + perror("fwrite"); + exit(ERROR_fwrite); +} + +/*! malloc_orDie() : + * Allocate memory. + * + * @return If successful this function returns a pointer to allo- + * cated memory. If there is an error, this function will send that + * error to stderr and exit. + */ +static void* malloc_orDie(size_t size) +{ + void* const buff = malloc(size); + if (buff) return buff; + /* error */ + perror("malloc"); + exit(ERROR_malloc); +} + +/*! loadFile_orDie() : + * Read size bytes from a file. + * + * Note: This function will send an error to stderr and exit if it + * cannot read data from the given file path. + * + * @return If successful this function will return a pointer to read + * data otherwise it will printout an error to stderr and exit. + */ +static void* loadFile_orDie(const char* fileName, size_t* size) +{ + off_t const fileSize = fsize_orDie(fileName); + size_t const buffSize = (size_t)fileSize; + if ((off_t)buffSize < fileSize) { /* narrowcast overflow */ + fprintf(stderr, "%s : filesize too large \n", fileName); + exit(ERROR_largeFile); + } + FILE* const inFile = fopen_orDie(fileName, "rb"); + void* const buffer = malloc_orDie(buffSize); + size_t const readSize = fread(buffer, 1, buffSize, inFile); + if (readSize != (size_t)buffSize) { + fprintf(stderr, "fread: %s : %s \n", fileName, strerror(errno)); + exit(ERROR_fread); + } + fclose(inFile); /* can't fail, read only */ + *size = buffSize; + return buffer; +} + +/*! saveFile_orDie() : + * + * Save buffSize bytes to a given file path, obtaining them from a location pointed + * to by buff. + * + * Note: This function will send an error to stderr and exit if it + * cannot write to a given file. + */ +static void saveFile_orDie(const char* fileName, const void* buff, size_t buffSize) +{ + FILE* const oFile = fopen_orDie(fileName, "wb"); + size_t const wSize = fwrite(buff, 1, buffSize, oFile); + if (wSize != (size_t)buffSize) { + fprintf(stderr, "fwrite: %s : %s \n", fileName, strerror(errno)); + exit(ERROR_fwrite); + } + if (fclose(oFile)) { + perror(fileName); + exit(ERROR_fclose); + } +} + +#endif diff --git a/lib/BUCK b/lib/BUCK index bd93b082a..637c20d66 100644 --- a/lib/BUCK +++ b/lib/BUCK @@ -1,6 +1,7 @@ cxx_library( name='zstd', header_namespace='', + exported_headers=['zstd.h'], visibility=['PUBLIC'], deps=[ ':common', @@ -17,7 +18,7 @@ cxx_library( exported_headers=subdir_glob([ ('compress', 'zstd*.h'), ]), - srcs=glob(['compress/zstd*.c']), + srcs=glob(['compress/zstd*.c', 'compress/hist.c']), deps=[':common'], ) @@ -40,7 +41,7 @@ cxx_library( header_namespace='', visibility=['PUBLIC'], exported_headers=subdir_glob([ - ('decprecated', '*.h'), + ('deprecated', '*.h'), ]), srcs=glob(['deprecated/*.c']), deps=[':common'], @@ -118,6 +119,7 @@ cxx_library( 'decompress/huf_decompress.c', ], deps=[ + ':debug', ':bitstream', ':compiler', ':errors', @@ -204,9 +206,20 @@ cxx_library( ], ) +cxx_library( + name='debug', + header_namespace='', + visibility=['PUBLIC'], + exported_headers=subdir_glob([ + ('common', 'debug.h'), + ]), + srcs=['common/debug.c'], +) + cxx_library( name='common', deps=[ + ':debug', ':bitstream', ':compiler', ':cpu', diff --git a/lib/Makefile b/lib/Makefile index b39786cf2..3fddf4fcd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -31,7 +31,12 @@ DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) FLAGS = $(CPPFLAGS) $(CFLAGS) -GREP = grep --color=never +HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) +GREP_OPTIONS ?= +ifeq ($HAVE_COLORNEVER, 1) +GREP_OPTIONS += --color=never +endif +GREP = grep $(GREP_OPTIONS) ZSTDCOMMON_FILES := $(sort $(wildcard common/*.c)) ZSTDCOMP_FILES := $(sort $(wildcard compress/*.c)) diff --git a/lib/common/cpu.h b/lib/common/cpu.h index eeb428ad5..5f0923fc9 100644 --- a/lib/common/cpu.h +++ b/lib/common/cpu.h @@ -78,7 +78,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { __asm__( "pushl %%ebx\n\t" "cpuid\n\t" - "movl %%ebx, %%eax\n\r" + "movl %%ebx, %%eax\n\t" "popl %%ebx" : "=a"(f7b), "=c"(f7c) : "a"(7), "c"(0) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index fb1a38a23..fde44bd5c 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4091,19 +4091,21 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - size_t oPos = 0; - size_t iPos = 0; - size_t const result = ZSTD_compressStream2_simpleArgs(cctx, - dst, dstCapacity, &oPos, - src, srcSize, &iPos, - ZSTD_e_end); - assert(iPos == srcSize); - if (ZSTD_isError(result)) return result; - if (result != 0) { /* compression not completed, due to lack of output space */ - assert(oPos == dstCapacity); - return ERROR(dstSize_tooSmall); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + if (ZSTD_isError(result)) return result; + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + return ERROR(dstSize_tooSmall); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; } - return oPos; } /*====== Finalize ======*/ diff --git a/lib/zstd.h b/lib/zstd.h index 17e109b79..6c873544b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -583,8 +583,9 @@ typedef enum { /* frame parameters */ ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) - * Content size must be known at the beginning of compression, - * it is provided using ZSTD_CCtx_setPledgedSrcSize() */ + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ @@ -620,12 +621,13 @@ typedef enum { * ZSTD_c_forceMaxWindow * ZSTD_c_forceAttachDict * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. */ ZSTD_c_experimentalParam1=500, ZSTD_c_experimentalParam2=10, ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4 + ZSTD_c_experimentalParam4=1001 } ZSTD_cParameter; @@ -659,14 +661,17 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param /*! ZSTD_CCtx_setPledgedSrcSize() : * Total input data size to be compressed as a single frame. - * This value will be controlled at end of frame, and trigger an error if not respected. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. * Note 2 : pledgedSrcSize is only valid once, for the next frame. - * It's discarded at the end of the frame. - * Note 3 : If all data is provided and consumed in a single round, + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), * this value is automatically overriden by srcSize instead. */ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); @@ -750,6 +755,8 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); /*! ZSTD_compress2() : * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() * - The function is always blocking, returns when compression is completed. * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. diff --git a/programs/Makefile b/programs/Makefile index 77c1d6a2d..d1910fbb4 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -29,7 +29,12 @@ LIBVER := $(shell echo $(LIBVER_SCRIPT)) ZSTD_VERSION = $(LIBVER) -GREP = grep --color=never +HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) +GREP_OPTIONS ?= +ifeq ($HAVE_COLORNEVER, 1) +GREP_OPTIONS += --color=never +endif +GREP = grep $(GREP_OPTIONS) ifeq ($(shell $(CC) -v 2>&1 | $(GREP) -c "gcc version "), 1) ALIGN_LOOP = -falign-loops=32 @@ -91,7 +96,7 @@ VOID = /dev/null # thread detection NO_THREAD_MSG := ==> no threads, building without multithreading support -HAVE_PTHREAD := $(shell printf '\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_pthread$(EXT) -x c - -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0) +HAVE_PTHREAD := $(shell printf '\#include \nint main(void) { return 0; }' > have_pthread.c && $(CC) $(FLAGS) -o have_pthread$(EXT) have_pthread.c -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0; rm have_pthread.c) HAVE_THREAD := $(shell [ "$(HAVE_PTHREAD)" -eq "1" -o -n "$(filter Windows%,$(OS))" ] && echo 1 || echo 0) ifeq ($(HAVE_THREAD), 1) THREAD_MSG := ==> building with threading support @@ -103,7 +108,7 @@ endif # zlib detection NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support -HAVE_ZLIB := $(shell printf '\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_zlib$(EXT) -x c - -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0) +HAVE_ZLIB := $(shell printf '\#include \nint main(void) { return 0; }' > have_zlib.c && $(CC) $(FLAGS) -o have_zlib$(EXT) have_zlib.c -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0; rm have_zlib.c) ifeq ($(HAVE_ZLIB), 1) ZLIB_MSG := ==> building zstd with .gz compression support ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS @@ -114,7 +119,7 @@ endif # lzma detection NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support -HAVE_LZMA := $(shell printf '\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_lzma$(EXT) -x c - -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0) +HAVE_LZMA := $(shell printf '\#include \nint main(void) { return 0; }' > have_lzma.c && $(CC) $(FLAGS) -o have_lzma$(EXT) have_lzma.c -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0; rm have_lzma.c) ifeq ($(HAVE_LZMA), 1) LZMA_MSG := ==> building zstd with .xz/.lzma compression support LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS @@ -125,7 +130,7 @@ endif # lz4 detection NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support -HAVE_LZ4 := $(shell printf '\#include \n\#include \nint main(void) { return 0; }' | $(CC) $(FLAGS) -o have_lz4$(EXT) -x c - -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0) +HAVE_LZ4 := $(shell printf '\#include \n\#include \nint main(void) { return 0; }' > have_lz4.c && $(CC) $(FLAGS) -o have_lz4$(EXT) have_lz4.c -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0; rm have_lz4.c) ifeq ($(HAVE_LZ4), 1) LZ4_MSG := ==> building zstd with .lz4 compression support LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS @@ -275,7 +280,12 @@ preview-man: clean-man man #----------------------------------------------------------------------------- ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku)) -EGREP = egrep --color=never +HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0) +EGREP_OPTIONS ?= +ifeq ($HAVE_COLORNEVER, 1) +EGREP_OPTIONS += --color=never +endif +EGREP = egrep $(EGREP_OPTIONS) # Print a two column output of targets and their description. To add a target description, put a # comment in the Makefile with the format "## : ". For example: diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 71f0f4d7a..57440e3c0 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -151,7 +151,7 @@ static int usage_advanced(const char* programName) #ifdef UTIL_HAS_CREATEFILELIST DISPLAY( " -r : operate recursively on directories \n"); #endif - DISPLAY( "--format=zstd : compress files to the .zstd format (default) \n"); + DISPLAY( "--format=zstd : compress files to the .zst format (default) \n"); #ifdef ZSTD_GZCOMPRESS DISPLAY( "--format=gzip : compress files to the .gz format \n"); #endif diff --git a/programs/zstdgrep b/programs/zstdgrep index 9f871c03f..a10e0710a 100755 --- a/programs/zstdgrep +++ b/programs/zstdgrep @@ -31,94 +31,101 @@ grep_args="" hyphen=0 silent=0 -prg=$(basename $0) +prg=$(basename "$0") # handle being called 'zegrep' or 'zfgrep' -case ${prg} in - *zegrep) - grep_args="-E";; - *zfgrep) - grep_args="-F";; +case "${prg}" in + *zegrep) grep_args="-E";; + *zfgrep) grep_args="-F";; esac # skip all options and pass them on to grep taking care of options # with arguments, and if -e was supplied -while [ $# -gt 0 -a ${endofopts} -eq 0 ] -do - case $1 in +while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do + case "$1" in # from GNU grep-2.5.1 -- keep in sync! - -[ABCDXdefm]) - if [ $# -lt 2 ] - then - echo "${prg}: missing argument for $1 flag" >&2 - exit 1 - fi - case $1 in - -e) - pattern="$2" - pattern_found=1 - shift 2 - break - ;; - *) - ;; - esac - grep_args="${grep_args} $1 $2" - shift 2 - ;; - --) - shift - endofopts=1 - ;; - -) - hyphen=1 - shift - ;; - -h) - silent=1 - shift - ;; - -*) - grep_args="${grep_args} $1" - shift - ;; - *) - # pattern to grep for - endofopts=1 - ;; + -[ABCDXdefm]) + if [ "$#" -lt 2 ]; then + printf '%s: missing argument for %s flag\n' "${prg}" "$1" >&2 + exit 1 + fi + case "$1" in + -e) + pattern="$2" + pattern_found=1 + shift 2 + break + ;; + *) + ;; + esac + grep_args="${grep_args} $1 $2" + shift 2 + ;; + --) + shift + endofopts=1 + ;; + -) + hyphen=1 + shift + ;; + -h) + silent=1 + shift + ;; + -*) + grep_args="${grep_args} $1" + shift + ;; + *) + # pattern to grep for + endofopts=1 + ;; esac done # if no -e option was found, take next argument as grep-pattern -if [ ${pattern_found} -lt 1 ] -then - if [ $# -ge 1 ]; then - pattern="$1" - shift - elif [ ${hyphen} -gt 0 ]; then - pattern="-" +if [ "${pattern_found}" -lt 1 ]; then + if [ "$#" -ge 1 ]; then + pattern="$1" + shift + elif [ "${hyphen}" -gt 0 ]; then + pattern="-" else - echo "${prg}: missing pattern" >&2 - exit 1 + printf '%s: missing pattern\n' "${prg}" >&2 + exit 1 fi fi +EXIT_CODE=0 # call grep ... -if [ $# -lt 1 ] -then +if [ "$#" -lt 1 ]; then # ... on stdin - ${zcat} -fq - | ${grep} ${grep_args} -- "${pattern}" - + set -f # Disable file name generation (globbing). + # shellcheck disable=SC2086 + "${zcat}" -fq - | "${grep}" ${grep_args} -- "${pattern}" - + EXIT_CODE=$? + set +f else # ... on all files given on the command line - if [ ${silent} -lt 1 -a $# -gt 1 ]; then - grep_args="-H ${grep_args}" + if [ "${silent}" -lt 1 ] && [ "$#" -gt 1 ]; then + grep_args="-H ${grep_args}" fi - while [ $# -gt 0 ] - do - ${zcat} -fq -- "$1" | ${grep} --label="${1}" ${grep_args} -- "${pattern}" - - shift + CUR_EXIT_CODE=0 + EXIT_CODE=1 + set -f + while [ "$#" -gt 0 ]; do + # shellcheck disable=SC2086 + "${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" - + CUR_EXIT_CODE=$? + if [ "${CUR_EXIT_CODE}" -eq 0 ] && [ "${EXIT_CODE}" -ne 1 ]; then + EXIT_CODE=0 + fi + shift done + set +f fi -exit 0 +exit "${EXIT_CODE}" diff --git a/tests/Makefile b/tests/Makefile index f363001b1..25bd5c84e 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -320,7 +320,7 @@ test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zstream32 test-all: test test32 valgrindTest test-decodecorpus-cli -.PHONY: test-zstd test-zstd32 test-zstd-nolegacy +.PHONY: test-zstd test-zstd32 test-zstd-nolegacy test-zstdgrep test-zstd: ZSTD = $(PRGDIR)/zstd test-zstd: zstd @@ -352,6 +352,10 @@ test-gzstd: gzstd $(PRGDIR)/zstd -dcf - use_dictionary && !data_has_dict(data); +} + +int config_get_level(config_t const* config) { + param_values_t const params = config->param_values; + size_t i; + for (size_t i = 0; i < params.size; ++i) { + if (params.data[i].param == ZSTD_c_compressionLevel) + return (int)params.data[i].value; + } + return CONFIG_NO_LEVEL; +} + +ZSTD_parameters config_get_zstd_params( + config_t const* config, + uint64_t srcSize, + size_t dictSize) { + ZSTD_parameters zparams = {}; + param_values_t const params = config->param_values; + int level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + level = 3; + zparams = ZSTD_getParams( + level, + config->no_pledged_src_size ? ZSTD_CONTENTSIZE_UNKNOWN : srcSize, + dictSize); + for (size_t i = 0; i < params.size; ++i) { + unsigned const value = params.data[i].value; + switch (params.data[i].param) { + case ZSTD_c_contentSizeFlag: + zparams.fParams.contentSizeFlag = value; + break; + case ZSTD_c_checksumFlag: + zparams.fParams.checksumFlag = value; + break; + case ZSTD_c_dictIDFlag: + zparams.fParams.noDictIDFlag = !value; + break; + case ZSTD_c_windowLog: + zparams.cParams.windowLog = value; + break; + case ZSTD_c_chainLog: + zparams.cParams.chainLog = value; + break; + case ZSTD_c_hashLog: + zparams.cParams.hashLog = value; + break; + case ZSTD_c_searchLog: + zparams.cParams.searchLog = value; + break; + case ZSTD_c_minMatch: + zparams.cParams.minMatch = value; + break; + case ZSTD_c_targetLength: + zparams.cParams.targetLength = value; + break; + case ZSTD_c_compressionStrategy: + zparams.cParams.strategy = (ZSTD_strategy)value; + break; + default: + break; + } + } + return zparams; +} diff --git a/tests/regression/config.h b/tests/regression/config.h new file mode 100644 index 000000000..3cd0308a0 --- /dev/null +++ b/tests/regression/config.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include + +#define ZSTD_STATIC_LINKING_ONLY +#include + +#include "data.h" + +typedef struct { + ZSTD_cParameter param; + int value; +} param_value_t; + +typedef struct { + size_t size; + param_value_t const* data; +} param_values_t; + +/** + * The config tells the compression method what options to use. + */ +typedef struct { + const char* name; /**< Identifies the config in the results table */ + /** + * Optional arguments to pass to the CLI. If not set, CLI-based methods + * will skip this config. + */ + char const* cli_args; + /** + * Parameters to pass to the advanced API. If the advanced API isn't used, + * the parameters will be derived from these. + */ + param_values_t param_values; + /** + * Boolean parameter that says if we should use a dictionary. If the data + * doesn't have a dictionary, this config is skipped. Defaults to no. + */ + int use_dictionary; + /** + * Boolean parameter that says if we should pass the pledged source size + * when the method allows it. Defaults to yes. + */ + int no_pledged_src_size; +} config_t; + +/** + * Returns true if the config should skip this data. + * For instance, if the config requires a dictionary but the data doesn't have + * one. + */ +int config_skip_data(config_t const* config, data_t const* data); + +#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1) +/** + * Returns the compression level specified by the config, or CONFIG_NO_LEVEL if + * no level is specified. Note that 0 is a valid compression level, meaning + * default. + */ +int config_get_level(config_t const* config); + +/** + * Returns the compression parameters specified by the config. + */ +ZSTD_parameters config_get_zstd_params( + config_t const* config, + uint64_t srcSize, + size_t dictSize); + +/** + * The NULL-terminated list of configs. + */ +extern config_t const* const* configs; + +#endif diff --git a/tests/regression/data.c b/tests/regression/data.c new file mode 100644 index 000000000..86e7687de --- /dev/null +++ b/tests/regression/data.c @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "data.h" + +#include +#include +#include +#include + +#include + +#include + +#include "mem.h" +#include "util.h" +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + +/** + * Data objects + */ + +#define REGRESSION_RELEASE(x) \ + "https://github.com/facebook/zstd/releases/download/regression-data/" x + +data_t silesia = { + .name = "silesia", + .type = data_type_dir, + .data = + { + .url = REGRESSION_RELEASE("silesia.tar.zst"), + .xxhash64 = 0x48a199f92f93e977LL, + }, +}; + +data_t silesia_tar = { + .name = "silesia.tar", + .type = data_type_file, + .data = + { + .url = REGRESSION_RELEASE("silesia.tar.zst"), + .xxhash64 = 0x48a199f92f93e977LL, + }, +}; + +data_t github = { + .name = "github", + .type = data_type_dir, + .data = + { + .url = REGRESSION_RELEASE("github.tar.zst"), + .xxhash64 = 0xa9b1b44b020df292LL, + }, + .dict = + { + .url = REGRESSION_RELEASE("github.dict.zst"), + .xxhash64 = 0x1eddc6f737d3cb53LL, + + }, +}; + +static data_t* g_data[] = { + &silesia, + &silesia_tar, + &github, + NULL, +}; + +data_t const* const* data = (data_t const* const*)g_data; + +/** + * data helpers. + */ + +int data_has_dict(data_t const* data) { + return data->dict.url != NULL; +} + +/** + * data buffer helper functions (documented in header). + */ + +data_buffer_t data_buffer_create(size_t const capacity) { + data_buffer_t buffer = {}; + + buffer.data = (uint8_t*)malloc(capacity); + if (buffer.data == NULL) + return buffer; + buffer.capacity = capacity; + return buffer; +} + +data_buffer_t data_buffer_read(char const* filename) { + data_buffer_t buffer = {}; + + uint64_t const size = UTIL_getFileSize(filename); + if (size == UTIL_FILESIZE_UNKNOWN) { + fprintf(stderr, "unknown size for %s\n", filename); + return buffer; + } + + buffer.data = (uint8_t*)malloc(size); + if (buffer.data == NULL) { + fprintf(stderr, "malloc failed\n"); + return buffer; + } + buffer.capacity = size; + + FILE* file = fopen(filename, "rb"); + if (file == NULL) { + fprintf(stderr, "file null\n"); + goto err; + } + buffer.size = fread(buffer.data, 1, buffer.capacity, file); + fclose(file); + if (buffer.size != buffer.capacity) { + fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity); + goto err; + } + + return buffer; +err: + free(buffer.data); + memset(&buffer, 0, sizeof(buffer)); + return buffer; +} + +data_buffer_t data_buffer_get_data(data_t const* data) { + data_buffer_t const kEmptyBuffer = {}; + + if (data->type != data_type_file) + return kEmptyBuffer; + + return data_buffer_read(data->data.path); +} + +data_buffer_t data_buffer_get_dict(data_t const* data) { + data_buffer_t const kEmptyBuffer = {}; + + if (!data_has_dict(data)) + return kEmptyBuffer; + + return data_buffer_read(data->dict.path); +} + +int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) { + size_t const size = + buffer1.size < buffer2.size ? buffer1.size : buffer2.size; + int const cmp = memcmp(buffer1.data, buffer2.data, size); + if (cmp != 0) + return cmp; + if (buffer1.size < buffer2.size) + return -1; + if (buffer1.size == buffer2.size) + return 0; + assert(buffer1.size > buffer2.size); + return 1; +} + +void data_buffer_free(data_buffer_t buffer) { + free(buffer.data); +} + +/** + * data filenames helpers. + */ + +data_filenames_t data_filenames_get(data_t const* data) { + data_filenames_t filenames = {.buffer = NULL, .size = 0}; + char const* path = data->data.path; + + filenames.filenames = UTIL_createFileList( + &path, + 1, + &filenames.buffer, + &filenames.size, + /* followLinks */ 0); + return filenames; +} + +void data_filenames_free(data_filenames_t filenames) { + UTIL_freeFileList(filenames.filenames, filenames.buffer); +} + +/** + * data buffers helpers. + */ + +data_buffers_t data_buffers_get(data_t const* data) { + data_buffers_t buffers = {.size = 0}; + data_filenames_t filenames = data_filenames_get(data); + if (filenames.size == 0) + return buffers; + + data_buffer_t* buffersPtr = + (data_buffer_t*)malloc(filenames.size * sizeof(data_buffer_t)); + if (buffersPtr == NULL) + return buffers; + buffers.buffers = (data_buffer_t const*)buffersPtr; + buffers.size = filenames.size; + + for (size_t i = 0; i < filenames.size; ++i) { + buffersPtr[i] = data_buffer_read(filenames.filenames[i]); + if (buffersPtr[i].data == NULL) { + data_buffers_t const kEmptyBuffer = {}; + data_buffers_free(buffers); + return kEmptyBuffer; + } + } + + return buffers; +} + +/** + * Frees the data buffers. + */ +void data_buffers_free(data_buffers_t buffers) { + free((data_buffer_t*)buffers.buffers); +} + +/** + * Initialization and download functions. + */ + +static char* g_data_dir = NULL; + +/* mkdir -p */ +static int ensure_directory_exists(char const* indir) { + char* const dir = strdup(indir); + char* end = dir; + int ret = 0; + if (dir == NULL) { + ret = EINVAL; + goto out; + } + do { + /* Find the next directory level. */ + for (++end; *end != '\0' && *end != '/'; ++end) + ; + /* End the string there, make the directory, and restore the string. */ + char const save = *end; + *end = '\0'; + int const isdir = UTIL_isDirectory(dir); + ret = mkdir(dir, S_IRWXU); + *end = save; + /* Its okay if the directory already exists. */ + if (ret == 0 || (errno == EEXIST && isdir)) + continue; + ret = errno; + fprintf(stderr, "mkdir() failed\n"); + goto out; + } while (*end != '\0'); + + ret = 0; +out: + free(dir); + return ret; +} + +/** Concatenate 3 strings into a new buffer. */ +static char* cat3(char const* str1, char const* str2, char const* str3) { + size_t const size1 = strlen(str1); + size_t const size2 = strlen(str2); + size_t const size3 = str3 == NULL ? 0 : strlen(str3); + size_t const size = size1 + size2 + size3 + 1; + char* const dst = (char*)malloc(size); + if (dst == NULL) + return NULL; + strcpy(dst, str1); + strcpy(dst + size1, str2); + if (str3 != NULL) + strcpy(dst + size1 + size2, str3); + assert(strlen(dst) == size1 + size2 + size3); + return dst; +} + +static char* cat2(char const* str1, char const* str2) { + return cat3(str1, str2, NULL); +} + +/** + * State needed by the curl callback. + * It takes data from curl, hashes it, and writes it to the file. + */ +typedef struct { + FILE* file; + XXH64_state_t xxhash64; + int error; +} curl_data_t; + +/** Create the curl state. */ +static curl_data_t curl_data_create( + data_resource_t const* resource, + data_type_t type) { + curl_data_t cdata = {}; + + XXH64_reset(&cdata.xxhash64, 0); + + assert(UTIL_isDirectory(g_data_dir)); + + if (type == data_type_file) { + /* Decompress the resource and store to the path. */ + char* cmd = cat3("zstd -dqfo '", resource->path, "'"); + if (cmd == NULL) { + cdata.error = ENOMEM; + return cdata; + } + cdata.file = popen(cmd, "w"); + free(cmd); + } else { + /* Decompress and extract the resource to the cache directory. */ + char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'"); + if (cmd == NULL) { + cdata.error = ENOMEM; + return cdata; + } + cdata.file = popen(cmd, "w"); + free(cmd); + } + if (cdata.file == NULL) { + cdata.error = errno; + } + + return cdata; +} + +/** Free the curl state. */ +static int curl_data_free(curl_data_t cdata) { + return pclose(cdata.file); +} + +/** curl callback. Updates the hash, and writes to the file. */ +static size_t curl_write(void* data, size_t size, size_t count, void* ptr) { + curl_data_t* cdata = (curl_data_t*)ptr; + size_t const written = fwrite(data, size, count, cdata->file); + XXH64_update(&cdata->xxhash64, data, written * size); + return written; +} + +static int curl_download_resource( + CURL* curl, + data_resource_t const* resource, + data_type_t type) { + curl_data_t cdata; + /* Download the data. */ + if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0) + return EINVAL; + if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0) + return EINVAL; + cdata = curl_data_create(resource, type); + if (cdata.error != 0) + return cdata.error; + int const curl_err = curl_easy_perform(curl); + int const close_err = curl_data_free(cdata); + if (curl_err) { + fprintf( + stderr, + "downloading '%s' for '%s' failed\n", + resource->url, + resource->path); + return EIO; + } + if (close_err) { + fprintf(stderr, "writing data to '%s' failed\n", resource->path); + return EIO; + } + /* check that the file exists. */ + if (type == data_type_file && !UTIL_isRegularFile(resource->path)) { + fprintf(stderr, "output file '%s' does not exist\n", resource->path); + return EIO; + } + if (type == data_type_dir && !UTIL_isDirectory(resource->path)) { + fprintf( + stderr, "output directory '%s' does not exist\n", resource->path); + return EIO; + } + /* Check that the hash matches. */ + if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) { + fprintf( + stderr, + "checksum does not match: 0x%llxLL != 0x%llxLL\n", + (unsigned long long)XXH64_digest(&cdata.xxhash64), + (unsigned long long)resource->xxhash64); + return EINVAL; + } + + return 0; +} + +/** Download a single data object. */ +static int curl_download_datum(CURL* curl, data_t const* data) { + int ret; + ret = curl_download_resource(curl, &data->data, data->type); + if (ret != 0) + return ret; + if (data_has_dict(data)) { + ret = curl_download_resource(curl, &data->dict, data_type_file); + if (ret != 0) + return ret; + } + return ret; +} + +/** Download all the data. */ +static int curl_download_data(data_t const* const* data) { + if (curl_global_init(CURL_GLOBAL_ALL) != 0) + return EFAULT; + + curl_data_t cdata = {}; + CURL* curl = curl_easy_init(); + int err = EFAULT; + + if (curl == NULL) + return EFAULT; + + if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0) + goto out; + + assert(data != NULL); + for (; *data != NULL; ++data) { + if (curl_download_datum(curl, *data) != 0) + goto out; + } + + err = 0; +out: + curl_easy_cleanup(curl); + curl_global_cleanup(); + return err; +} + +/** Fill the path member variable of the data objects. */ +static int data_create_paths(data_t* const* data, char const* dir) { + size_t const dirlen = strlen(dir); + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t* const datum = *data; + datum->data.path = cat3(dir, "/", datum->name); + if (datum->data.path == NULL) + return ENOMEM; + if (data_has_dict(datum)) { + datum->dict.path = cat2(datum->data.path, ".dict"); + if (datum->dict.path == NULL) + return ENOMEM; + } + } + return 0; +} + +/** Free the path member variable of the data objects. */ +static void data_free_paths(data_t* const* data) { + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t* datum = *data; + free((void*)datum->data.path); + free((void*)datum->dict.path); + datum->data.path = NULL; + datum->dict.path = NULL; + } +} + +static char const kStampName[] = "STAMP"; + +static void xxh_update_le(XXH64_state_t* state, uint64_t data) { + if (!MEM_isLittleEndian()) + data = MEM_swap64(data); + XXH64_update(state, &data, sizeof(data)); +} + +/** Hash the data to create the stamp. */ +static uint64_t stamp_hash(data_t const* const* data) { + XXH64_state_t state; + + XXH64_reset(&state, 0); + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t const* datum = *data; + /* We don't care about the URL that we fetch from. */ + /* The path is derived from the name. */ + XXH64_update(&state, datum->name, strlen(datum->name)); + xxh_update_le(&state, datum->data.xxhash64); + xxh_update_le(&state, datum->dict.xxhash64); + xxh_update_le(&state, datum->type); + } + return XXH64_digest(&state); +} + +/** Check if the stamp matches the stamp in the cache directory. */ +static int stamp_check(char const* dir, data_t const* const* data) { + char* stamp = cat3(dir, "/", kStampName); + uint64_t const expected = stamp_hash(data); + XXH64_canonical_t actual; + FILE* stampfile = NULL; + int matches = 0; + + if (stamp == NULL) + goto out; + if (!UTIL_isRegularFile(stamp)) { + fprintf(stderr, "stamp does not exist: recreating the data cache\n"); + goto out; + } + + stampfile = fopen(stamp, "rb"); + if (stampfile == NULL) { + fprintf(stderr, "could not open stamp: recreating the data cache\n"); + goto out; + } + + size_t b; + if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) { + fprintf(stderr, "invalid stamp: recreating the data cache\n"); + goto out; + } + + matches = (expected == XXH64_hashFromCanonical(&actual)); + if (matches) + fprintf(stderr, "stamp matches: reusing the cached data\n"); + else + fprintf(stderr, "stamp does not match: recreating the data cache\n"); + +out: + free(stamp); + if (stampfile != NULL) + fclose(stampfile); + return matches; +} + +/** On success write a new stamp, on failure delete the old stamp. */ +static int +stamp_write(char const* dir, data_t const* const* data, int const data_err) { + char* stamp = cat3(dir, "/", kStampName); + FILE* stampfile = NULL; + int err = EIO; + + if (stamp == NULL) + return ENOMEM; + + if (data_err != 0) { + err = data_err; + goto out; + } + XXH64_canonical_t hash; + + XXH64_canonicalFromHash(&hash, stamp_hash(data)); + + stampfile = fopen(stamp, "wb"); + if (stampfile == NULL) + goto out; + if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1) + goto out; + err = 0; + fprintf(stderr, "stamped new data cache\n"); +out: + if (err != 0) + /* Ignore errors. */ + unlink(stamp); + free(stamp); + if (stampfile != NULL) + fclose(stampfile); + return err; +} + +int data_init(char const* dir) { + int err; + + if (dir == NULL) + return EINVAL; + + /* This must be first to simplify logic. */ + err = ensure_directory_exists(dir); + if (err != 0) + return err; + + /* Save the cache directory. */ + g_data_dir = strdup(dir); + if (g_data_dir == NULL) + return ENOMEM; + + err = data_create_paths(g_data, dir); + if (err != 0) + return err; + + /* If the stamp matches then we are good to go. + * This must be called before any modifications to the data cache. + * After this point, we MUST call stamp_write() to update the STAMP, + * since we've updated the data cache. + */ + if (stamp_check(dir, data)) + return 0; + + err = curl_download_data(data); + if (err != 0) + goto out; + +out: + /* This must be last, since it must know if data_init() succeeded. */ + stamp_write(dir, data, err); + return err; +} + +void data_finish(void) { + data_free_paths(g_data); + free(g_data_dir); + g_data_dir = NULL; +} diff --git a/tests/regression/data.h b/tests/regression/data.h new file mode 100644 index 000000000..717fe1294 --- /dev/null +++ b/tests/regression/data.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DATA_H +#define DATA_H + +#include +#include + +typedef enum { + data_type_file = 1, /**< This data is a file. *.zst */ + data_type_dir = 2, /**< This data is a directory. *.tar.zst */ +} data_type_t; + +typedef struct { + char const* url; /**< Where to get this resource. */ + uint64_t xxhash64; /**< Hash of the url contents. */ + char const* path; /**< The path of the unpacked resource (derived). */ +} data_resource_t; + +typedef struct { + data_resource_t data; + data_resource_t dict; + data_type_t type; /**< The type of the data. */ + char const* name; /**< The logical name of the data (no extension). */ +} data_t; + +/** + * The NULL-terminated list of data objects. + */ +extern data_t const* const* data; + + +int data_has_dict(data_t const* data); + +/** + * Initializes the data module and downloads the data necessary. + * Caches the downloads in dir. We add a stamp file in the directory after + * a successful download. If a stamp file already exists, and matches our + * current data stamp, we will use the cached data without downloading. + * + * @param dir The directory to cache the downloaded data into. + * + * @returns 0 on success. + */ +int data_init(char const* dir); + +/** + * Must be called at exit to free resources allocated by data_init(). + */ +void data_finish(void); + +typedef struct { + uint8_t* data; + size_t size; + size_t capacity; +} data_buffer_t; + +/** + * Read the file that data points to into a buffer. + * NOTE: data must be a file, not a directory. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_get_data(data_t const* data); + +/** + * Read the dictionary that the data points to into a buffer. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_get_dict(data_t const* data); + +/** + * Read the contents of filename into a buffer. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_read(char const* filename); + +/** + * Create a buffer with the specified capacity. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_create(size_t capacity); + +/** + * Calls memcmp() on the contents [0, size) of both buffers. + */ +int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2); + +/** + * Frees an allocated buffer. + */ +void data_buffer_free(data_buffer_t buffer); + +typedef struct { + char* buffer; + char const** filenames; + unsigned size; +} data_filenames_t; + +/** + * Get a recursive list of filenames in the data object. If it is a file, it + * will only contain one entry. If it is a directory, it will recursively walk + * the directory. + * + * @returns The list of filenames, which has size 0 and NULL pointers on error. + */ +data_filenames_t data_filenames_get(data_t const* data); + +/** + * Frees the filenames table. + */ +void data_filenames_free(data_filenames_t filenames); + +typedef struct { + data_buffer_t const* buffers; + size_t size; +} data_buffers_t; + +/** + * @returns a list of buffers for every file in data. It is zero sized on error. + */ +data_buffers_t data_buffers_get(data_t const* data); + +/** + * Frees the data buffers. + */ +void data_buffers_free(data_buffers_t buffers); + +#endif diff --git a/tests/regression/levels.h b/tests/regression/levels.h new file mode 100644 index 000000000..f96689075 --- /dev/null +++ b/tests/regression/levels.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef LEVEL +# error LEVEL(x) must be defined +#endif +#ifndef FAST_LEVEL +# error FAST_LEVEL(x) must be defined +#endif + +/** + * The levels are chosen to trigger every strategy in every source size, + * as well as some fast levels and the default level. + * If you change the compression levels, you should probably update these. + */ + +FAST_LEVEL(5) + +FAST_LEVEL(3) + +FAST_LEVEL(1) +LEVEL(0) +LEVEL(1) + +LEVEL(3) +LEVEL(4) +LEVEL(5) +LEVEL(6) +LEVEL(7) + +LEVEL(9) + +LEVEL(13) + +LEVEL(16) + +LEVEL(19) diff --git a/tests/regression/method.c b/tests/regression/method.c new file mode 100644 index 000000000..a43e91489 --- /dev/null +++ b/tests/regression/method.c @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "method.h" + +#include +#include + +#include + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +static char const* g_zstdcli = NULL; + +void method_set_zstdcli(char const* zstdcli) { + g_zstdcli = zstdcli; +} + +/** + * Macro to get a pointer of type, given ptr, which is a member variable with + * the given name, member. + * + * method_state_t* base = ...; + * buffer_state_t* state = container_of(base, buffer_state_t, base); + */ +#define container_of(ptr, type, member) \ + ((type*)(ptr == NULL ? NULL : (char*)(ptr)-offsetof(type, member))) + +/** State to reuse the same buffers between compression calls. */ +typedef struct { + method_state_t base; + data_buffers_t inputs; /**< The input buffer for each file. */ + data_buffer_t dictionary; /**< The dictionary. */ + data_buffer_t compressed; /**< The compressed data buffer. */ + data_buffer_t decompressed; /**< The decompressed data buffer. */ +} buffer_state_t; + +static size_t buffers_max_size(data_buffers_t buffers) { + size_t max = 0; + for (size_t i = 0; i < buffers.size; ++i) { + if (buffers.buffers[i].size > max) + max = buffers.buffers[i].size; + } + return max; +} + +static method_state_t* buffer_state_create(data_t const* data) { + buffer_state_t* state = (buffer_state_t*)calloc(1, sizeof(buffer_state_t)); + if (state == NULL) + return NULL; + state->base.data = data; + state->inputs = data_buffers_get(data); + state->dictionary = data_buffer_get_dict(data); + size_t const max_size = buffers_max_size(state->inputs); + state->compressed = data_buffer_create(ZSTD_compressBound(max_size)); + state->decompressed = data_buffer_create(max_size); + return &state->base; +} + +static void buffer_state_destroy(method_state_t* base) { + if (base == NULL) + return; + buffer_state_t* state = container_of(base, buffer_state_t, base); + free(state); +} + +static int buffer_state_bad( + buffer_state_t const* state, + config_t const* config) { + if (state == NULL) { + fprintf(stderr, "buffer_state_t is NULL\n"); + return 1; + } + if (state->inputs.size == 0 || state->compressed.data == NULL || + state->decompressed.data == NULL) { + fprintf(stderr, "buffer state allocation failure\n"); + return 1; + } + if (config->use_dictionary && state->dictionary.data == NULL) { + fprintf(stderr, "dictionary loading failed\n"); + return 1; + } + return 0; +} + +static result_t simple_compress(method_state_t* base, config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + /* Keep the tests short by skipping directories, since behavior shouldn't + * change. + */ + if (base->data->type != data_type_file) + return result_error(result_error_skip); + + if (config->use_dictionary || config->no_pledged_src_size) + return result_error(result_error_skip); + + /* If the config doesn't specify a level, skip. */ + int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return result_error(result_error_skip); + + data_buffer_t const input = state->inputs.buffers[0]; + + /* Compress, decompress, and check the result. */ + state->compressed.size = ZSTD_compress( + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + level); + if (ZSTD_isError(state->compressed.size)) + return result_error(result_error_compression_error); + + state->decompressed.size = ZSTD_decompress( + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size); + if (ZSTD_isError(state->decompressed.size)) + return result_error(result_error_decompression_error); + if (data_buffer_compare(input, state->decompressed)) + return result_error(result_error_round_trip_error); + + result_data_t data; + data.total_size = state->compressed.size; + return result_data(data); +} + +static result_t compress_cctx_compress( + method_state_t* base, + config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + if (config->no_pledged_src_size) + return result_error(result_error_skip); + + if (base->data->type != data_type_dir) + return result_error(result_error_skip); + + int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return result_error(result_error_skip); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (cctx == NULL || dctx == NULL) { + fprintf(stderr, "context creation failed\n"); + return result_error(result_error_system_error); + } + + result_t result; + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t const input = state->inputs.buffers[i]; + + if (config->use_dictionary) + state->compressed.size = ZSTD_compress_usingDict( + cctx, + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + state->dictionary.data, + state->dictionary.size, + level); + else + state->compressed.size = ZSTD_compressCCtx( + cctx, + state->compressed.data, + state->compressed.capacity, + input.data, + input.size, + level); + if (ZSTD_isError(state->compressed.size)) { + result = result_error(result_error_compression_error); + goto out; + } + + if (config->use_dictionary) + state->decompressed.size = ZSTD_decompress_usingDict( + dctx, + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size, + state->dictionary.data, + state->dictionary.size); + else + state->decompressed.size = ZSTD_decompressDCtx( + dctx, + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size); + if (ZSTD_isError(state->decompressed.size)) { + result = result_error(result_error_decompression_error); + goto out; + } + if (data_buffer_compare(input, state->decompressed)) { + result = result_error(result_error_round_trip_error); + goto out; + } + + data.total_size += state->compressed.size; + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + return result; +} + +/** Generic state creation function. */ +static method_state_t* method_state_create(data_t const* data) { + method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t)); + if (state == NULL) + return NULL; + state->data = data; + return state; +} + +static void method_state_destroy(method_state_t* state) { + free(state); +} + +static result_t cli_compress(method_state_t* state, config_t const* config) { + if (config->cli_args == NULL) + return result_error(result_error_skip); + + /* We don't support no pledged source size with directories. Too slow. */ + if (state->data->type == data_type_dir && config->no_pledged_src_size) + return result_error(result_error_skip); + + if (g_zstdcli == NULL) + return result_error(result_error_system_error); + + /* '' -cqr [-D ''] '' */ + char cmd[1024]; + size_t const cmd_size = snprintf( + cmd, + sizeof(cmd), + "'%s' -cqr %s %s%s%s %s '%s'", + g_zstdcli, + config->cli_args, + config->use_dictionary ? "-D '" : "", + config->use_dictionary ? state->data->dict.path : "", + config->use_dictionary ? "'" : "", + config->no_pledged_src_size ? "<" : "", + state->data->data.path); + if (cmd_size >= sizeof(cmd)) { + fprintf(stderr, "command too large: %s\n", cmd); + return result_error(result_error_system_error); + } + FILE* zstd = popen(cmd, "r"); + if (zstd == NULL) { + fprintf(stderr, "failed to popen command: %s\n", cmd); + return result_error(result_error_system_error); + } + + char out[4096]; + size_t total_size = 0; + while (1) { + size_t const size = fread(out, 1, sizeof(out), zstd); + total_size += size; + if (size != sizeof(out)) + break; + } + if (ferror(zstd) || pclose(zstd) != 0) { + fprintf(stderr, "zstd failed with command: %s\n", cmd); + return result_error(result_error_compression_error); + } + + result_data_t const data = {.total_size = total_size}; + return result_data(data); +} + +static int advanced_config( + ZSTD_CCtx* cctx, + buffer_state_t* state, + config_t const* config) { + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + for (size_t p = 0; p < config->param_values.size; ++p) { + param_value_t const pv = config->param_values.data[p]; + if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, pv.param, pv.value))) { + return 1; + } + } + if (config->use_dictionary) { + if (ZSTD_isError(ZSTD_CCtx_loadDictionary( + cctx, state->dictionary.data, state->dictionary.size))) { + return 1; + } + } + return 0; +} + +static result_t advanced_one_pass_compress_output_adjustment( + method_state_t* base, + config_t const* config, + size_t const subtract) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + result_t result; + + if (!cctx || advanced_config(cctx, state, config)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t const input = state->inputs.buffers[i]; + + if (!config->no_pledged_src_size) { + if (ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(cctx, input.size))) { + result = result_error(result_error_compression_error); + goto out; + } + } + size_t const size = ZSTD_compress2( + cctx, + state->compressed.data, + ZSTD_compressBound(input.size) - subtract, + input.data, + input.size); + if (ZSTD_isError(size)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += size; + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + return result; +} + +static result_t advanced_one_pass_compress( + method_state_t* base, + config_t const* config) { + return advanced_one_pass_compress_output_adjustment(base, config, 0); +} + +static result_t advanced_one_pass_compress_small_output( + method_state_t* base, + config_t const* config) { + return advanced_one_pass_compress_output_adjustment(base, config, 1); +} + +static result_t advanced_streaming_compress( + method_state_t* base, + config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + result_t result; + + if (!cctx || advanced_config(cctx, state, config)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t input = state->inputs.buffers[i]; + + if (!config->no_pledged_src_size) { + if (ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(cctx, input.size))) { + result = result_error(result_error_compression_error); + goto out; + } + } + + while (input.size > 0) { + ZSTD_inBuffer in = {input.data, MIN(input.size, 4096)}; + input.data += in.size; + input.size -= in.size; + ZSTD_EndDirective const op = + input.size > 0 ? ZSTD_e_continue : ZSTD_e_end; + size_t ret = 0; + while (in.pos < in.size || (op == ZSTD_e_end && ret != 0)) { + ZSTD_outBuffer out = {state->compressed.data, + MIN(state->compressed.capacity, 1024)}; + ret = ZSTD_compressStream2(cctx, &out, &in, op); + if (ZSTD_isError(ret)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += out.pos; + } + } + } + + result = result_data(data); +out: + ZSTD_freeCCtx(cctx); + return result; +} + +static result_t old_streaming_compress( + method_state_t* base, + config_t const* config) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return result_error(result_error_skip); + + ZSTD_CStream* zcs = ZSTD_createCStream(); + result_t result; + if (zcs == NULL) { + result = result_error(result_error_compression_error); + goto out; + } + size_t zret; + if (config->use_dictionary) { + zret = ZSTD_initCStream_usingDict( + zcs, state->dictionary.data, state->dictionary.size, level); + } else { + zret = ZSTD_initCStream(zcs, level); + } + if (ZSTD_isError(zret)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t input = state->inputs.buffers[i]; + zret = ZSTD_resetCStream( + zcs, + config->no_pledged_src_size ? ZSTD_CONTENTSIZE_UNKNOWN + : input.size); + if (ZSTD_isError(zret)) { + result = result_error(result_error_compression_error); + goto out; + } + + while (input.size > 0) { + ZSTD_inBuffer in = {input.data, MIN(input.size, 4096)}; + input.data += in.size; + input.size -= in.size; + ZSTD_EndDirective const op = + input.size > 0 ? ZSTD_e_continue : ZSTD_e_end; + zret = 0; + while (in.pos < in.size || (op == ZSTD_e_end && zret != 0)) { + ZSTD_outBuffer out = {state->compressed.data, + MIN(state->compressed.capacity, 1024)}; + if (op == ZSTD_e_continue || in.pos < in.size) + zret = ZSTD_compressStream(zcs, &out, &in); + else + zret = ZSTD_endStream(zcs, &out); + if (ZSTD_isError(zret)) { + result = result_error(result_error_compression_error); + goto out; + } + data.total_size += out.pos; + } + } + } + + result = result_data(data); +out: + ZSTD_freeCStream(zcs); + return result; +} + +method_t const simple = { + .name = "compress simple", + .create = buffer_state_create, + .compress = simple_compress, + .destroy = buffer_state_destroy, +}; + +method_t const compress_cctx = { + .name = "compress cctx", + .create = buffer_state_create, + .compress = compress_cctx_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_one_pass = { + .name = "advanced one pass", + .create = buffer_state_create, + .compress = advanced_one_pass_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_one_pass_small_out = { + .name = "advanced one pass small out", + .create = buffer_state_create, + .compress = advanced_one_pass_compress, + .destroy = buffer_state_destroy, +}; + +method_t const advanced_streaming = { + .name = "advanced streaming", + .create = buffer_state_create, + .compress = advanced_streaming_compress, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming = { + .name = "old streaming", + .create = buffer_state_create, + .compress = old_streaming_compress, + .destroy = buffer_state_destroy, +}; + +method_t const cli = { + .name = "zstdcli", + .create = method_state_create, + .compress = cli_compress, + .destroy = method_state_destroy, +}; + +static method_t const* g_methods[] = { + &simple, + &compress_cctx, + &cli, + &advanced_one_pass, + &advanced_one_pass_small_out, + &advanced_streaming, + &old_streaming, + NULL, +}; + +method_t const* const* methods = g_methods; diff --git a/tests/regression/method.h b/tests/regression/method.h new file mode 100644 index 000000000..d70b776b1 --- /dev/null +++ b/tests/regression/method.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef METHOD_H +#define METHOD_H + +#include + +#include "data.h" +#include "config.h" +#include "result.h" + +/** + * The base class for state that methods keep. + * All derived method state classes must have a member of this type. + */ +typedef struct { + data_t const* data; +} method_state_t; + +/** + * A method that compresses the data using config. + */ +typedef struct { + char const* name; /**< The identifier for this method in the results. */ + /** + * Creates a state that must contain a member variable of method_state_t, + * and returns a pointer to that member variable. + * + * This method can be used to do expensive work that only depends on the + * data, like loading the data file into a buffer. + */ + method_state_t* (*create)(data_t const* data); + /** + * Compresses the data in the state using the given config. + * + * @param state A pointer to the state returned by create(). + * + * @returns The total compressed size on success, or an error code. + */ + result_t (*compress)(method_state_t* state, config_t const* config); + /** + * Frees the state. + */ + void (*destroy)(method_state_t* state); +} method_t; + +/** + * Set the zstd cli path. Must be called before any methods are used. + */ +void method_set_zstdcli(char const* zstdcli); + +/** + * A NULL-terminated list of methods. + */ +extern method_t const* const* methods; + +#endif diff --git a/tests/regression/result.c b/tests/regression/result.c new file mode 100644 index 000000000..31439b08c --- /dev/null +++ b/tests/regression/result.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "result.h" + +char const* result_get_error_string(result_t result) { + switch (result_get_error(result)) { + case result_error_ok: + return "okay"; + case result_error_skip: + return "skip"; + case result_error_system_error: + return "system error"; + case result_error_compression_error: + return "compression error"; + case result_error_decompression_error: + return "decompression error"; + case result_error_round_trip_error: + return "round trip error"; + } +} diff --git a/tests/regression/result.h b/tests/regression/result.h new file mode 100644 index 000000000..8c80cf85a --- /dev/null +++ b/tests/regression/result.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef RESULT_H +#define RESULT_H + +#include + +/** + * The error type enum. + */ +typedef enum { + result_error_ok, /**< No error. */ + result_error_skip, /**< This method was skipped. */ + result_error_system_error, /**< Some internal error happened. */ + result_error_compression_error, /**< Compression failed. */ + result_error_decompression_error, /**< Decompression failed. */ + result_error_round_trip_error, /**< Data failed to round trip. */ +} result_error_t; + +/** + * The success type. + */ +typedef struct { + size_t total_size; /**< The total compressed size. */ +} result_data_t; + +/** + * The result type. + * Do not access the member variables directory, use the helper functions. + */ +typedef struct { + result_error_t internal_error; + result_data_t internal_data; +} result_t; + +/** + * Create a result of the error type. + */ +static result_t result_error(result_error_t error); +/** + * Create a result of the success type. + */ +static result_t result_data(result_data_t data); + +/** + * Check if the result is an error or skip. + */ +static int result_is_error(result_t result); +/** + * Check if the result error is skip. + */ +static int result_is_skip(result_t result); +/** + * Get the result error or okay. + */ +static result_error_t result_get_error(result_t result); +/** + * Get the result data. The result MUST be checked with result_is_error() first. + */ +static result_data_t result_get_data(result_t result); + +static result_t result_error(result_error_t error) { + result_t result = { + .internal_error = error, + }; + return result; +} + +static result_t result_data(result_data_t data) { + result_t result = { + .internal_error = result_error_ok, + .internal_data = data, + }; + return result; +} + +static int result_is_error(result_t result) { + return result_get_error(result) != result_error_ok; +} + +static int result_is_skip(result_t result) { + return result_get_error(result) == result_error_skip; +} + +static result_error_t result_get_error(result_t result) { + return result.internal_error; +} + +char const* result_get_error_string(result_t result); + +static result_data_t result_get_data(result_t result) { + return result.internal_data; +} + +#endif diff --git a/tests/regression/results.csv b/tests/regression/results.csv new file mode 100644 index 000000000..3e9d791a5 --- /dev/null +++ b/tests/regression/results.csv @@ -0,0 +1,350 @@ +Data, Config, Method, Total compressed size +silesia.tar, level -5, compress simple, 7160438 +silesia.tar, level -3, compress simple, 6789024 +silesia.tar, level -1, compress simple, 6195462 +silesia.tar, level 0, compress simple, 4875071 +silesia.tar, level 1, compress simple, 5339697 +silesia.tar, level 3, compress simple, 4875071 +silesia.tar, level 4, compress simple, 4813104 +silesia.tar, level 5, compress simple, 4726961 +silesia.tar, level 6, compress simple, 4654401 +silesia.tar, level 7, compress simple, 4591933 +silesia.tar, level 9, compress simple, 4554098 +silesia.tar, level 13, compress simple, 4503496 +silesia.tar, level 16, compress simple, 4387233 +silesia.tar, level 19, compress simple, 4283123 +silesia, level -5, compress cctx, 7152294 +silesia, level -3, compress cctx, 6789969 +silesia, level -1, compress cctx, 6191548 +silesia, level 0, compress cctx, 4862377 +silesia, level 1, compress cctx, 5318036 +silesia, level 3, compress cctx, 4862377 +silesia, level 4, compress cctx, 4800629 +silesia, level 5, compress cctx, 4715005 +silesia, level 6, compress cctx, 4644055 +silesia, level 7, compress cctx, 4581559 +silesia, level 9, compress cctx, 4543862 +silesia, level 13, compress cctx, 4493931 +silesia, level 16, compress cctx, 4381885 +silesia, level 19, compress cctx, 4296899 +github, level -5, compress cctx, 232744 +github, level -5 with dict, compress cctx, 45704 +github, level -3, compress cctx, 220611 +github, level -3 with dict, compress cctx, 44510 +github, level -1, compress cctx, 176575 +github, level -1 with dict, compress cctx, 41586 +github, level 0, compress cctx, 136397 +github, level 0 with dict, compress cctx, 38700 +github, level 1, compress cctx, 143457 +github, level 1 with dict, compress cctx, 41538 +github, level 3, compress cctx, 136397 +github, level 3 with dict, compress cctx, 38700 +github, level 4, compress cctx, 136144 +github, level 4 with dict, compress cctx, 38639 +github, level 5, compress cctx, 135106 +github, level 5 with dict, compress cctx, 38934 +github, level 6, compress cctx, 135108 +github, level 6 with dict, compress cctx, 38628 +github, level 7, compress cctx, 135108 +github, level 7 with dict, compress cctx, 38741 +github, level 9, compress cctx, 135108 +github, level 9 with dict, compress cctx, 39335 +github, level 13, compress cctx, 133741 +github, level 13 with dict, compress cctx, 39670 +github, level 16, compress cctx, 133741 +github, level 16 with dict, compress cctx, 37928 +github, level 19, compress cctx, 133717 +github, level 19 with dict, compress cctx, 37567 +silesia, level -5, zstdcli, 7152342 +silesia, level -3, zstdcli, 6790021 +silesia, level -1, zstdcli, 6191597 +silesia, level 0, zstdcli, 4862425 +silesia, level 1, zstdcli, 5318084 +silesia, level 3, zstdcli, 4862425 +silesia, level 4, zstdcli, 4800677 +silesia, level 5, zstdcli, 4715053 +silesia, level 6, zstdcli, 4644103 +silesia, level 7, zstdcli, 4581607 +silesia, level 9, zstdcli, 4543910 +silesia, level 13, zstdcli, 4493979 +silesia, level 16, zstdcli, 4381933 +silesia, level 19, zstdcli, 4296947 +silesia.tar, level -5, zstdcli, 7159586 +silesia.tar, level -3, zstdcli, 6791018 +silesia.tar, level -1, zstdcli, 6196283 +silesia.tar, level 0, zstdcli, 4876730 +silesia.tar, level 1, zstdcli, 5340312 +silesia.tar, level 3, zstdcli, 4876730 +silesia.tar, level 4, zstdcli, 4817723 +silesia.tar, level 5, zstdcli, 4730389 +silesia.tar, level 6, zstdcli, 4655708 +silesia.tar, level 7, zstdcli, 4593407 +silesia.tar, level 9, zstdcli, 4556135 +silesia.tar, level 13, zstdcli, 4503500 +silesia.tar, level 16, zstdcli, 4387237 +silesia.tar, level 19, zstdcli, 4283127 +silesia.tar, no source size, zstdcli, 4876726 +github, level -5, zstdcli, 234744 +github, level -5 with dict, zstdcli, 47528 +github, level -3, zstdcli, 222611 +github, level -3 with dict, zstdcli, 46394 +github, level -1, zstdcli, 178575 +github, level -1 with dict, zstdcli, 43401 +github, level 0, zstdcli, 138397 +github, level 0 with dict, zstdcli, 40316 +github, level 1, zstdcli, 145457 +github, level 1 with dict, zstdcli, 43242 +github, level 3, zstdcli, 138397 +github, level 3 with dict, zstdcli, 40316 +github, level 4, zstdcli, 138144 +github, level 4 with dict, zstdcli, 40292 +github, level 5, zstdcli, 137106 +github, level 5 with dict, zstdcli, 40938 +github, level 6, zstdcli, 137108 +github, level 6 with dict, zstdcli, 40632 +github, level 7, zstdcli, 137108 +github, level 7 with dict, zstdcli, 40766 +github, level 9, zstdcli, 137108 +github, level 9 with dict, zstdcli, 41326 +github, level 13, zstdcli, 135741 +github, level 13 with dict, zstdcli, 41670 +github, level 16, zstdcli, 135741 +github, level 16 with dict, zstdcli, 39940 +github, level 19, zstdcli, 135717 +github, level 19 with dict, zstdcli, 39576 +silesia, level -5, advanced one pass, 7152294 +silesia, level -3, advanced one pass, 6789969 +silesia, level -1, advanced one pass, 6191548 +silesia, level 0, advanced one pass, 4862377 +silesia, level 1, advanced one pass, 5318036 +silesia, level 3, advanced one pass, 4862377 +silesia, level 4, advanced one pass, 4800629 +silesia, level 5, advanced one pass, 4715005 +silesia, level 6, advanced one pass, 4644055 +silesia, level 7, advanced one pass, 4581559 +silesia, level 9, advanced one pass, 4543862 +silesia, level 13, advanced one pass, 4493931 +silesia, level 16, advanced one pass, 4381885 +silesia, level 19, advanced one pass, 4296899 +silesia, no source size, advanced one pass, 4862377 +silesia.tar, level -5, advanced one pass, 7160438 +silesia.tar, level -3, advanced one pass, 6789024 +silesia.tar, level -1, advanced one pass, 6195462 +silesia.tar, level 0, advanced one pass, 4875071 +silesia.tar, level 1, advanced one pass, 5339697 +silesia.tar, level 3, advanced one pass, 4875071 +silesia.tar, level 4, advanced one pass, 4813104 +silesia.tar, level 5, advanced one pass, 4726961 +silesia.tar, level 6, advanced one pass, 4654401 +silesia.tar, level 7, advanced one pass, 4591933 +silesia.tar, level 9, advanced one pass, 4554098 +silesia.tar, level 13, advanced one pass, 4503496 +silesia.tar, level 16, advanced one pass, 4387233 +silesia.tar, level 19, advanced one pass, 4283123 +silesia.tar, no source size, advanced one pass, 4875071 +github, level -5, advanced one pass, 232744 +github, level -5 with dict, advanced one pass, 45528 +github, level -3, advanced one pass, 220611 +github, level -3 with dict, advanced one pass, 44394 +github, level -1, advanced one pass, 176575 +github, level -1 with dict, advanced one pass, 41401 +github, level 0, advanced one pass, 136397 +github, level 0 with dict, advanced one pass, 38316 +github, level 1, advanced one pass, 143457 +github, level 1 with dict, advanced one pass, 41242 +github, level 3, advanced one pass, 136397 +github, level 3 with dict, advanced one pass, 38316 +github, level 4, advanced one pass, 136144 +github, level 4 with dict, advanced one pass, 38292 +github, level 5, advanced one pass, 135106 +github, level 5 with dict, advanced one pass, 38938 +github, level 6, advanced one pass, 135108 +github, level 6 with dict, advanced one pass, 38632 +github, level 7, advanced one pass, 135108 +github, level 7 with dict, advanced one pass, 38766 +github, level 9, advanced one pass, 135108 +github, level 9 with dict, advanced one pass, 39326 +github, level 13, advanced one pass, 133741 +github, level 13 with dict, advanced one pass, 39670 +github, level 16, advanced one pass, 133741 +github, level 16 with dict, advanced one pass, 37940 +github, level 19, advanced one pass, 133717 +github, level 19 with dict, advanced one pass, 37576 +github, no source size, advanced one pass, 136397 +silesia, level -5, advanced one pass small out, 7152294 +silesia, level -3, advanced one pass small out, 6789969 +silesia, level -1, advanced one pass small out, 6191548 +silesia, level 0, advanced one pass small out, 4862377 +silesia, level 1, advanced one pass small out, 5318036 +silesia, level 3, advanced one pass small out, 4862377 +silesia, level 4, advanced one pass small out, 4800629 +silesia, level 5, advanced one pass small out, 4715005 +silesia, level 6, advanced one pass small out, 4644055 +silesia, level 7, advanced one pass small out, 4581559 +silesia, level 9, advanced one pass small out, 4543862 +silesia, level 13, advanced one pass small out, 4493931 +silesia, level 16, advanced one pass small out, 4381885 +silesia, level 19, advanced one pass small out, 4296899 +silesia, no source size, advanced one pass small out, 4862377 +silesia.tar, level -5, advanced one pass small out, 7160438 +silesia.tar, level -3, advanced one pass small out, 6789024 +silesia.tar, level -1, advanced one pass small out, 6195462 +silesia.tar, level 0, advanced one pass small out, 4875071 +silesia.tar, level 1, advanced one pass small out, 5339697 +silesia.tar, level 3, advanced one pass small out, 4875071 +silesia.tar, level 4, advanced one pass small out, 4813104 +silesia.tar, level 5, advanced one pass small out, 4726961 +silesia.tar, level 6, advanced one pass small out, 4654401 +silesia.tar, level 7, advanced one pass small out, 4591933 +silesia.tar, level 9, advanced one pass small out, 4554098 +silesia.tar, level 13, advanced one pass small out, 4503496 +silesia.tar, level 16, advanced one pass small out, 4387233 +silesia.tar, level 19, advanced one pass small out, 4283123 +silesia.tar, no source size, advanced one pass small out, 4875071 +github, level -5, advanced one pass small out, 232744 +github, level -5 with dict, advanced one pass small out, 45528 +github, level -3, advanced one pass small out, 220611 +github, level -3 with dict, advanced one pass small out, 44394 +github, level -1, advanced one pass small out, 176575 +github, level -1 with dict, advanced one pass small out, 41401 +github, level 0, advanced one pass small out, 136397 +github, level 0 with dict, advanced one pass small out, 38316 +github, level 1, advanced one pass small out, 143457 +github, level 1 with dict, advanced one pass small out, 41242 +github, level 3, advanced one pass small out, 136397 +github, level 3 with dict, advanced one pass small out, 38316 +github, level 4, advanced one pass small out, 136144 +github, level 4 with dict, advanced one pass small out, 38292 +github, level 5, advanced one pass small out, 135106 +github, level 5 with dict, advanced one pass small out, 38938 +github, level 6, advanced one pass small out, 135108 +github, level 6 with dict, advanced one pass small out, 38632 +github, level 7, advanced one pass small out, 135108 +github, level 7 with dict, advanced one pass small out, 38766 +github, level 9, advanced one pass small out, 135108 +github, level 9 with dict, advanced one pass small out, 39326 +github, level 13, advanced one pass small out, 133741 +github, level 13 with dict, advanced one pass small out, 39670 +github, level 16, advanced one pass small out, 133741 +github, level 16 with dict, advanced one pass small out, 37940 +github, level 19, advanced one pass small out, 133717 +github, level 19 with dict, advanced one pass small out, 37576 +github, no source size, advanced one pass small out, 136397 +silesia, level -5, advanced streaming, 7152294 +silesia, level -3, advanced streaming, 6789973 +silesia, level -1, advanced streaming, 6191549 +silesia, level 0, advanced streaming, 4862377 +silesia, level 1, advanced streaming, 5318036 +silesia, level 3, advanced streaming, 4862377 +silesia, level 4, advanced streaming, 4800629 +silesia, level 5, advanced streaming, 4715005 +silesia, level 6, advanced streaming, 4644055 +silesia, level 7, advanced streaming, 4581559 +silesia, level 9, advanced streaming, 4543862 +silesia, level 13, advanced streaming, 4493931 +silesia, level 16, advanced streaming, 4381885 +silesia, level 19, advanced streaming, 4296899 +silesia, no source size, advanced streaming, 4862341 +silesia.tar, level -5, advanced streaming, 7160440 +silesia.tar, level -3, advanced streaming, 6789026 +silesia.tar, level -1, advanced streaming, 6195465 +silesia.tar, level 0, advanced streaming, 4875071 +silesia.tar, level 1, advanced streaming, 5339701 +silesia.tar, level 3, advanced streaming, 4875071 +silesia.tar, level 4, advanced streaming, 4813104 +silesia.tar, level 5, advanced streaming, 4726977 +silesia.tar, level 6, advanced streaming, 4654404 +silesia.tar, level 7, advanced streaming, 4591934 +silesia.tar, level 9, advanced streaming, 4554105 +silesia.tar, level 13, advanced streaming, 4503496 +silesia.tar, level 16, advanced streaming, 4387233 +silesia.tar, level 19, advanced streaming, 4283123 +silesia.tar, no source size, advanced streaming, 4875067 +github, level -5, advanced streaming, 232744 +github, level -5 with dict, advanced streaming, 45528 +github, level -3, advanced streaming, 220611 +github, level -3 with dict, advanced streaming, 44394 +github, level -1, advanced streaming, 176575 +github, level -1 with dict, advanced streaming, 41401 +github, level 0, advanced streaming, 136397 +github, level 0 with dict, advanced streaming, 38316 +github, level 1, advanced streaming, 143457 +github, level 1 with dict, advanced streaming, 41242 +github, level 3, advanced streaming, 136397 +github, level 3 with dict, advanced streaming, 38316 +github, level 4, advanced streaming, 136144 +github, level 4 with dict, advanced streaming, 38292 +github, level 5, advanced streaming, 135106 +github, level 5 with dict, advanced streaming, 38938 +github, level 6, advanced streaming, 135108 +github, level 6 with dict, advanced streaming, 38632 +github, level 7, advanced streaming, 135108 +github, level 7 with dict, advanced streaming, 38766 +github, level 9, advanced streaming, 135108 +github, level 9 with dict, advanced streaming, 39326 +github, level 13, advanced streaming, 133741 +github, level 13 with dict, advanced streaming, 39670 +github, level 16, advanced streaming, 133741 +github, level 16 with dict, advanced streaming, 37940 +github, level 19, advanced streaming, 133717 +github, level 19 with dict, advanced streaming, 37576 +github, no source size, advanced streaming, 136397 +silesia, level -5, old streaming, 7152294 +silesia, level -3, old streaming, 6789973 +silesia, level -1, old streaming, 6191549 +silesia, level 0, old streaming, 4862377 +silesia, level 1, old streaming, 5318036 +silesia, level 3, old streaming, 4862377 +silesia, level 4, old streaming, 4800629 +silesia, level 5, old streaming, 4715005 +silesia, level 6, old streaming, 4644055 +silesia, level 7, old streaming, 4581559 +silesia, level 9, old streaming, 4543862 +silesia, level 13, old streaming, 4493931 +silesia, level 16, old streaming, 4381885 +silesia, level 19, old streaming, 4296899 +silesia, no source size, old streaming, 4862341 +silesia.tar, level -5, old streaming, 7160440 +silesia.tar, level -3, old streaming, 6789026 +silesia.tar, level -1, old streaming, 6195465 +silesia.tar, level 0, old streaming, 4875071 +silesia.tar, level 1, old streaming, 5339701 +silesia.tar, level 3, old streaming, 4875071 +silesia.tar, level 4, old streaming, 4813104 +silesia.tar, level 5, old streaming, 4726977 +silesia.tar, level 6, old streaming, 4654404 +silesia.tar, level 7, old streaming, 4591934 +silesia.tar, level 9, old streaming, 4554105 +silesia.tar, level 13, old streaming, 4503496 +silesia.tar, level 16, old streaming, 4387233 +silesia.tar, level 19, old streaming, 4283123 +silesia.tar, no source size, old streaming, 4875067 +github, level -5, old streaming, 232744 +github, level -5 with dict, old streaming, 45528 +github, level -3, old streaming, 220611 +github, level -3 with dict, old streaming, 44394 +github, level -1, old streaming, 176575 +github, level -1 with dict, old streaming, 41401 +github, level 0, old streaming, 136397 +github, level 0 with dict, old streaming, 38316 +github, level 1, old streaming, 143457 +github, level 1 with dict, old streaming, 41242 +github, level 3, old streaming, 136397 +github, level 3 with dict, old streaming, 38316 +github, level 4, old streaming, 136144 +github, level 4 with dict, old streaming, 38292 +github, level 5, old streaming, 135106 +github, level 5 with dict, old streaming, 38938 +github, level 6, old streaming, 135108 +github, level 6 with dict, old streaming, 38632 +github, level 7, old streaming, 135108 +github, level 7 with dict, old streaming, 38766 +github, level 9, old streaming, 135108 +github, level 9 with dict, old streaming, 39326 +github, level 13, old streaming, 133741 +github, level 13 with dict, old streaming, 39670 +github, level 16, old streaming, 133741 +github, level 16 with dict, old streaming, 37940 +github, level 19, old streaming, 133717 +github, level 19 with dict, old streaming, 37576 +github, no source size, old streaming, 141003 diff --git a/tests/regression/test.c b/tests/regression/test.c new file mode 100644 index 000000000..9e7b83c19 --- /dev/null +++ b/tests/regression/test.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +#include "config.h" +#include "data.h" +#include "method.h" + +static int g_max_name_len = 0; + +/** Check if a name contains a comma or is too long. */ +static int is_name_bad(char const* name) { + if (name == NULL) + return 1; + int const len = strlen(name); + if (len > g_max_name_len) + g_max_name_len = len; + for (; *name != '\0'; ++name) + if (*name == ',') + return 1; + return 0; +} + +/** Check if any of the names contain a comma. */ +static int are_names_bad() { + for (size_t method = 0; methods[method] != NULL; ++method) + if (is_name_bad(methods[method]->name)) { + fprintf(stderr, "method name %s is bad\n", methods[method]->name); + return 1; + } + for (size_t datum = 0; data[datum] != NULL; ++datum) + if (is_name_bad(data[datum]->name)) { + fprintf(stderr, "data name %s is bad\n", data[datum]->name); + return 1; + } + for (size_t config = 0; configs[config] != NULL; ++config) + if (is_name_bad(configs[config]->name)) { + fprintf(stderr, "config name %s is bad\n", configs[config]->name); + return 1; + } + return 0; +} + +/** + * Option parsing using getopt. + * When you add a new option update: long_options, long_extras, and + * short_options. + */ + +/** Option variables filled by parse_args. */ +static char const* g_output = NULL; +static char const* g_diff = NULL; +static char const* g_cache = NULL; +static char const* g_zstdcli = NULL; +static char const* g_config = NULL; +static char const* g_data = NULL; +static char const* g_method = NULL; + +typedef enum { + required_option, + optional_option, + help_option, +} option_type; + +/** + * Extra state that we need to keep per-option that we can't store in getopt. + */ +struct option_extra { + int id; /**< The short option name, used as an id. */ + char const* help; /**< The help message. */ + option_type opt_type; /**< The option type: required, optional, or help. */ + char const** value; /**< The value to set or NULL if no_argument. */ +}; + +/** The options. */ +static struct option long_options[] = { + {"cache", required_argument, NULL, 'c'}, + {"output", required_argument, NULL, 'o'}, + {"zstd", required_argument, NULL, 'z'}, + {"config", required_argument, NULL, 128}, + {"data", required_argument, NULL, 129}, + {"method", required_argument, NULL, 130}, + {"diff", required_argument, NULL, 'd'}, + {"help", no_argument, NULL, 'h'}, +}; + +static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]); + +/** The extra info for the options. Must be in the same order as the options. */ +static struct option_extra long_extras[] = { + {'c', "the cache directory", required_option, &g_cache}, + {'o', "write the results here", required_option, &g_output}, + {'z', "zstd cli tool", required_option, &g_zstdcli}, + {128, "use this config", optional_option, &g_config}, + {129, "use this data", optional_option, &g_data}, + {130, "use this method", optional_option, &g_method}, + {'d', "compare the results to this file", optional_option, &g_diff}, + {'h', "display this message", help_option, NULL}, +}; + +/** The short options. Must correspond to the options. */ +static char const short_options[] = "c:d:ho:z:"; + +/** Return the help string for the option type. */ +static char const* required_message(option_type opt_type) { + switch (opt_type) { + case required_option: + return "[required]"; + case optional_option: + return "[optional]"; + case help_option: + return ""; + default: + assert(0); + return NULL; + } +} + +/** Print the help for the program. */ +static void print_help(void) { + fprintf(stderr, "regression test runner\n"); + size_t const nargs = sizeof(long_options) / sizeof(long_options[0]); + for (size_t i = 0; i < nargs; ++i) { + if (long_options[i].val < 128) { + /* Long / short - help [option type] */ + fprintf( + stderr, + "--%s / -%c \t- %s %s\n", + long_options[i].name, + long_options[i].val, + long_extras[i].help, + required_message(long_extras[i].opt_type)); + } else { + /* Short / long - help [option type] */ + fprintf( + stderr, + "--%s \t- %s %s\n", + long_options[i].name, + long_extras[i].help, + required_message(long_extras[i].opt_type)); + } + } +} + +/** Parse the arguments. Teturn 0 on success. Print help on failure. */ +static int parse_args(int argc, char** argv) { + int option_index = 0; + int c; + + while (1) { + c = getopt_long(argc, argv, short_options, long_options, &option_index); + if (c == -1) + break; + + int found = 0; + for (size_t i = 0; i < nargs; ++i) { + if (c == long_extras[i].id && long_extras[i].value != NULL) { + *long_extras[i].value = optarg; + found = 1; + break; + } + } + if (found) + continue; + + switch (c) { + case 'h': + case '?': + default: + print_help(); + return 1; + } + } + + int bad = 0; + for (size_t i = 0; i < nargs; ++i) { + if (long_extras[i].opt_type != required_option) + continue; + if (long_extras[i].value == NULL) + continue; + if (*long_extras[i].value != NULL) + continue; + fprintf( + stderr, + "--%s is a required argument but is not set\n", + long_options[i].name); + bad = 1; + } + if (bad) { + fprintf(stderr, "\n"); + print_help(); + return 1; + } + + return 0; +} + +/** Helper macro to print to stderr and a file. */ +#define tprintf(file, ...) \ + do { \ + fprintf(file, __VA_ARGS__); \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) +/** Helper macro to flush stderr and a file. */ +#define tflush(file) \ + do { \ + fflush(file); \ + fflush(stderr); \ + } while (0) + +void tprint_names( + FILE* results, + char const* data_name, + char const* config_name, + char const* method_name) { + int const data_padding = g_max_name_len - strlen(data_name); + int const config_padding = g_max_name_len - strlen(config_name); + int const method_padding = g_max_name_len - strlen(method_name); + + tprintf( + results, + "%s, %*s%s, %*s%s, %*s", + data_name, + data_padding, + "", + config_name, + config_padding, + "", + method_name, + method_padding, + ""); +} + +/** + * Run all the regression tests and record the results table to results and + * stderr progressively. + */ +static int run_all(FILE* results) { + tprint_names(results, "Data", "Config", "Method"); + tprintf(results, "Total compressed size\n"); + for (size_t method = 0; methods[method] != NULL; ++method) { + if (g_method != NULL && strcmp(methods[method]->name, g_method)) + continue; + for (size_t datum = 0; data[datum] != NULL; ++datum) { + if (g_data != NULL && strcmp(data[datum]->name, g_data)) + continue; + /* Create the state common to all configs */ + method_state_t* state = methods[method]->create(data[datum]); + for (size_t config = 0; configs[config] != NULL; ++config) { + if (g_config != NULL && strcmp(configs[config]->name, g_config)) + continue; + if (config_skip_data(configs[config], data[datum])) + continue; + /* Print the result for the (method, data, config) tuple. */ + result_t const result = + methods[method]->compress(state, configs[config]); + if (result_is_skip(result)) + continue; + tprint_names( + results, + data[datum]->name, + configs[config]->name, + methods[method]->name); + if (result_is_error(result)) { + tprintf(results, "%s\n", result_get_error_string(result)); + } else { + tprintf( + results, + "%llu\n", + (unsigned long long)result_get_data(result).total_size); + } + tflush(results); + } + methods[method]->destroy(state); + } + } + return 0; +} + +/** memcmp() the old results file and the new results file. */ +static int diff_results(char const* actual_file, char const* expected_file) { + data_buffer_t const actual = data_buffer_read(actual_file); + data_buffer_t const expected = data_buffer_read(expected_file); + int ret = 1; + + if (actual.data == NULL) { + fprintf(stderr, "failed to open results '%s' for diff\n", actual_file); + goto out; + } + if (expected.data == NULL) { + fprintf( + stderr, + "failed to open previous results '%s' for diff\n", + expected_file); + goto out; + } + + ret = data_buffer_compare(actual, expected); + if (ret != 0) { + fprintf( + stderr, + "actual results '%s' does not match expected results '%s'\n", + actual_file, + expected_file); + } else { + fprintf(stderr, "actual results match expected results\n"); + } +out: + data_buffer_free(actual); + data_buffer_free(expected); + return ret; +} + +int main(int argc, char** argv) { + /* Parse args and validate modules. */ + int ret = parse_args(argc, argv); + if (ret != 0) + return ret; + + if (are_names_bad()) + return 1; + + /* Initialize modules. */ + method_set_zstdcli(g_zstdcli); + ret = data_init(g_cache); + if (ret != 0) { + fprintf(stderr, "data_init() failed with error=%s\n", strerror(ret)); + return 1; + } + + /* Run the regression tests. */ + ret = 1; + FILE* results = fopen(g_output, "w"); + if (results == NULL) { + fprintf(stderr, "Failed to open the output file\n"); + goto out; + } + ret = run_all(results); + fclose(results); + + if (ret != 0) + goto out; + + if (g_diff) + /* Diff the new results with the previous results. */ + ret = diff_results(g_output, g_diff); + +out: + data_finish(); + return ret; +}