diff --git a/.circleci/config.yml b/.circleci/config.yml index b08634408..42e4042db 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,7 +15,8 @@ references: sudo apt-get -y install \ gcc-multilib-powerpc-linux-gnu gcc-arm-linux-gnueabi \ libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross \ - libc6-dev-ppc64-powerpc-cross zstd gzip coreutils + libc6-dev-ppc64-powerpc-cross zstd gzip coreutils \ + libcurl4-openssl-dev jobs: # the first half of the jobs are in this test @@ -82,6 +83,49 @@ jobs: cp $ZSTD_VERSION.tar* $CIRCLE_ARTIFACTS - store_artifacts: path: /tmp/circleci-artifacts + # This step should only be run in a cron job + regression-test: + docker: + - image: circleci/buildpack-deps:bionic + environment: + CIRCLE_ARTIFACTS: /tmp/circleci-artifacts + steps: + - checkout + - *install-dependencies + # Restore the cached resources. + - restore_cache: + # We try our best to bust the cache when the data changes by hashing + # data.c. If that doesn't work, simply update the version number here + # and below. If we fail to bust the cache, the regression testing will + # still work, since it has its own stamp, but will need to redownload + # everything. + keys: + - regression-cache-{{ checksum "tests/regression/data.c" }}-v0 + - run: + name: Regression Test + command: | + make -C programs zstd + make -C tests/regression test + mkdir -p $CIRCLE_ARTIFACTS + ./tests/regression/test \ + --cache tests/regression/cache \ + --output $CIRCLE_ARTIFACTS/results.csv \ + --zstd programs/zstd + echo "NOTE: The new results.csv is uploaded as an artifact to this job" + echo " If this fails, go to the Artifacts pane in CircleCI, " + echo " download /tmp/circleci-artifacts/results.csv, and if they " + echo " are still good, copy it into the repo and commit it." + echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv" + diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv + # Only save the cache on success (default), since if the failure happened + # before we stamp the data cache, we will have a bad cache for this key. + - save_cache: + key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0 + paths: + - tests/regression/cache + - store_artifacts: + path: /tmp/circleci-artifacts + workflows: version: 2 @@ -96,6 +140,13 @@ workflows: filters: tags: only: /.*/ + # Create a branch called regression and set it to dev to force a + # regression test run + - regression-test: + filters: + branches: + only: + - regression # Only run on release tags. - publish-github-release: requires: @@ -106,6 +157,20 @@ workflows: ignore: /.*/ tags: only: /^v\d+\.\d+\.\d+$/ + nightly: + triggers: + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - master + - dev + jobs: + # Run daily long regression tests + - regression-test + + # Longer tests #- make -C tests test-zstd-nolegacy && make clean diff --git a/tests/regression/Makefile b/tests/regression/Makefile new file mode 100644 index 000000000..2008b2f7e --- /dev/null +++ b/tests/regression/Makefile @@ -0,0 +1,58 @@ +# ################################################################ +# Copyright (c) 2015-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +CFLAGS ?= -O3 + +CURL_CFLAGS := $(shell curl-config --cflags) +CURL_LDFLAGS := $(shell curl-config --libs) + +PROGDIR := ../../programs +LIBDIR := ../../lib +ZSTD_CPPFLAGS := -I$(PROGDIR) -I$(LIBDIR) -I$(LIBDIR)/common + +REGRESSION_CFLAGS = $(CFLAGS) $(CURL_CFLAGS) +REGRESSION_CPPFLAGS = $(CPPFLAGS) $(ZSTD_CPPFLAGS) +REGRESSION_LDFLAGS = $(LDFLAGS) $(CURL_LDFLAGS) + +all: test + +xxhash.o: $(LIBDIR)/common/xxhash.c $(LIBDIR)/common/xxhash.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +util.o: $(PROGDIR)/util.c $(PROGDIR)/util.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +data.o: data.c data.h $(PROGDIR)/util.h $(LIBDIR)/common/xxhash.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +config.o: config.c config.h levels.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +method.h: data.h config.h result.h + +method.o: method.c method.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +result.o: result.c result.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +test.o: test.c data.h config.h method.h + $(CC) $(REGRESSION_CFLAGS) $(REGRESSION_CPPFLAGS) $< -c -o $@ + +libzstd.a: + $(MAKE) -C $(LIBDIR) libzstd.a + cp $(LIBDIR)/libzstd.a . + +test: test.o data.o config.o util.o method.o result.o xxhash.o libzstd.a + $(CC) $^ $(REGRESSION_LDFLAGS) -o $@ + +.PHONY: clean +clean: + $(MAKE) -C $(LIBDIR) clean + $(RM) *.o *.a test diff --git a/tests/regression/config.c b/tests/regression/config.c new file mode 100644 index 000000000..38ac0091b --- /dev/null +++ b/tests/regression/config.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "config.h" + +/* Define a config for each fast level we want to test with. */ +#define FAST_LEVEL(x) \ + param_value_t const level_fast##x##_param_values[] = { \ + {.param = ZSTD_p_compressionLevel, .value = (unsigned)-x}, \ + }; \ + config_t const level_fast##x = { \ + .name = "level -" #x, \ + .cli_args = "--fast=" #x, \ + .param_values = PARAM_VALUES(level_fast##x##_param_values), \ + }; + +/* Define a config for each level we want to test with. */ +#define LEVEL(x) \ + param_value_t const level_##x##_param_values[] = { \ + {.param = ZSTD_p_compressionLevel, .value = (unsigned)x}, \ + }; \ + config_t const level_##x = { \ + .name = "level " #x, \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(level_##x##_param_values), \ + }; + + +#define PARAM_VALUES(pv) \ + { .data = pv, .size = sizeof(pv) / sizeof((pv)[0]) } + +#include "levels.h" + +#undef LEVEL +#undef FAST_LEVEL + +static config_t const* g_configs[] = { +#define FAST_LEVEL(x) &level_fast##x, +#define LEVEL(x) &level_##x, +#include "levels.h" +#undef LEVEL +#undef FAST_LEVEL + NULL, +}; + +config_t const* const* configs = g_configs; + +int config_get_level(config_t const* config) { + param_values_t const params = config->param_values; + size_t i; + for (size_t i = 0; i < params.size; ++i) { + if (params.data[i].param == ZSTD_p_compressionLevel) + return params.data[i].value; + } + return CONFIG_NO_LEVEL; +} diff --git a/tests/regression/config.h b/tests/regression/config.h new file mode 100644 index 000000000..73f1e9f8a --- /dev/null +++ b/tests/regression/config.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include + +#define ZSTD_STATIC_LINKING_ONLY +#include + +typedef struct { + ZSTD_cParameter param; + unsigned value; +} param_value_t; + +typedef struct { + size_t size; + param_value_t const* data; +} param_values_t; + +/** + * The config tells the compression method what options to use. + */ +typedef struct { + const char* name; /**< Identifies the config in the results table */ + /** + * Optional arguments to pass to the CLI. If not set, CLI-based methods + * will skip this config. + */ + char const* cli_args; + /** + * Parameters to pass to the advanced API. If the advanced API isn't used, + * the parameters will be derived from these. + */ + param_values_t param_values; +} config_t; + +#define CONFIG_NO_LEVEL (-ZSTD_TARGETLENGTH_MAX - 1) +/** + * Returns the compression level specified by the config, or CONFIG_NO_LEVEL if + * no level is specified. Note that 0 is a valid compression level, meaning + * default. + */ +int config_get_level(config_t const* config); + +/** + * The NULL-terminated list of configs. + */ +extern config_t const* const* configs; + +#endif diff --git a/tests/regression/data.c b/tests/regression/data.c new file mode 100644 index 000000000..68269e042 --- /dev/null +++ b/tests/regression/data.c @@ -0,0 +1,493 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "data.h" + +#include +#include +#include +#include + +#include + +#include + +#include "mem.h" +#include "util.h" +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + +/** + * Data objects + */ + +#define REGRESSION_RELEASE(x) \ + "https://github.com/facebook/zstd/releases/download/regression-data/" x + +data_t silesia = { + .url = REGRESSION_RELEASE("silesia.tar.zst"), + .name = "silesia", + .type = data_type_dir, + .xxhash64 = 0x67558ee5506918b4LL, +}; + +data_t silesia_tar = { + .url = REGRESSION_RELEASE("silesia.tar.zst"), + .name = "silesia.tar", + .type = data_type_file, + .xxhash64 = 0x67558ee5506918b4LL, +}; + +static data_t* g_data[] = { + &silesia, + &silesia_tar, + NULL, +}; + +data_t const* const* data = (data_t const* const*)g_data; + +/** + * data buffer helper functions (documented in header). + */ + +data_buffer_t data_buffer_create(size_t const capacity) { + data_buffer_t buffer = {}; + + buffer.data = (uint8_t*)malloc(capacity); + if (buffer.data == NULL) + return buffer; + buffer.capacity = capacity; + return buffer; +} + +data_buffer_t data_buffer_read(char const* filename) { + data_buffer_t buffer = {}; + + uint64_t const size = UTIL_getFileSize(filename); + if (size == UTIL_FILESIZE_UNKNOWN) { + fprintf(stderr, "unknown size for %s\n", filename); + return buffer; + } + + buffer.data = (uint8_t*)malloc(size); + if (buffer.data == NULL) { + fprintf(stderr, "malloc failed\n"); + return buffer; + } + buffer.capacity = size; + + FILE* file = fopen(filename, "rb"); + if (file == NULL) { + fprintf(stderr, "file null\n"); + goto err; + } + buffer.size = fread(buffer.data, 1, buffer.capacity, file); + fclose(file); + if (buffer.size != buffer.capacity) { + fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity); + goto err; + } + + return buffer; +err: + free(buffer.data); + memset(&buffer, 0, sizeof(buffer)); + return buffer; + +} + +data_buffer_t data_buffer_get(data_t const* data) { + data_buffer_t const kEmptyBuffer = {}; + + if (data->type != data_type_file) + return kEmptyBuffer; + + return data_buffer_read(data->path); +} + +int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) { + size_t const size = + buffer1.size < buffer2.size ? buffer1.size : buffer2.size; + int const cmp = memcmp(buffer1.data, buffer2.data, size); + if (cmp != 0) + return cmp; + if (buffer1.size < buffer2.size) + return -1; + if (buffer1.size == buffer2.size) + return 0; + assert(buffer1.size > buffer2.size); + return 1; + +} + +void data_buffer_free(data_buffer_t buffer) { + free(buffer.data); +} + +/** + * Initialization and download functions. + */ + +static char* g_data_dir = NULL; + +/* mkdir -p */ +static int ensure_directory_exists(char const* indir) { + char* const dir = strdup(indir); + char* end = dir; + int ret = 0; + if (dir == NULL) { + ret = EINVAL; + goto out; + } + do { + /* Find the next directory level. */ + for (++end; *end != '\0' && *end != '/'; ++end) + ; + /* End the string there, make the directory, and restore the string. */ + char const save = *end; + *end = '\0'; + int const isdir = UTIL_isDirectory(dir); + ret = mkdir(dir, S_IRWXU); + *end = save; + /* Its okay if the directory already exists. */ + if (ret == 0 || (errno == EEXIST && isdir)) + continue; + ret = errno; + fprintf(stderr, "mkdir() failed\n"); + goto out; + } while (*end != '\0'); + + ret = 0; +out: + free(dir); + return ret; +} + +/** Concatenate 3 strings into a new buffer. */ +static char* cat3(char const* str1, char const* str2, char const* str3) { + size_t const size1 = strlen(str1); + size_t const size2 = strlen(str2); + size_t const size3 = strlen(str3); + size_t const size = size1 + size2 + size3 + 1; + char* const dst = (char*)malloc(size); + if (dst == NULL) + return NULL; + strcpy(dst, str1); + strcpy(dst + size1, str2); + strcpy(dst + size1 + size2, str3); + assert(strlen(dst) == size1 + size2 + size3); + return dst; +} + +/** + * State needed by the curl callback. + * It takes data from curl, hashes it, and writes it to the file. + */ +typedef struct { + FILE* file; + XXH64_state_t xxhash64; + int error; +} curl_data_t; + +/** Create the curl state. */ +static curl_data_t curl_data_create(data_t const* data) { + curl_data_t cdata = {}; + + XXH64_reset(&cdata.xxhash64, 0); + + assert(UTIL_isDirectory(g_data_dir)); + + if (data->type == data_type_file) { + /* Decompress the resource and store to the path. */ + char* cmd = cat3("zstd -dqfo '", data->path, "'"); + if (cmd == NULL) { + cdata.error = ENOMEM; + return cdata; + } + cdata.file = popen(cmd, "w"); + free(cmd); + } else { + /* Decompress and extract the resource to the cache directory. */ + char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'"); + if (cmd == NULL) { + cdata.error = ENOMEM; + return cdata; + } + cdata.file = popen(cmd, "w"); + free(cmd); + } + if (cdata.file == NULL) { + cdata.error = errno; + } + + return cdata; +} + +/** Free the curl state. */ +static int curl_data_free(curl_data_t cdata) { + return pclose(cdata.file); +} + +/** curl callback. Updates the hash, and writes to the file. */ +static size_t curl_write(void* data, size_t size, size_t count, void* ptr) { + curl_data_t* cdata = (curl_data_t*)ptr; + size_t const written = fwrite(data, size, count, cdata->file); + XXH64_update(&cdata->xxhash64, data, written * size); + return written; +} + +/** Download a single data object. */ +static int curl_download_datum(CURL* curl, data_t const* data) { + curl_data_t cdata = curl_data_create(data); + int err = EFAULT; + + if (cdata.error != 0) { + err = cdata.error; + goto out; + } + + /* Download the data. */ + if (curl_easy_setopt(curl, CURLOPT_URL, data->url) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0) + goto out; + if (curl_easy_perform(curl) != 0) { + fprintf(stderr, "downloading '%s' failed\n", data->url); + goto out; + } + /* check that the file exists. */ + if (data->type == data_type_file && !UTIL_isRegularFile(data->path)) { + fprintf(stderr, "output file '%s' does not exist\n", data->path); + goto out; + } + if (data->type == data_type_dir && !UTIL_isDirectory(data->path)) { + fprintf(stderr, "output directory '%s' does not exist\n", data->path); + goto out; + } + /* Check that the hash matches. */ + if (XXH64_digest(&cdata.xxhash64) != data->xxhash64) { + fprintf( + stderr, + "checksum does not match: %llx != %llx\n", + (unsigned long long)XXH64_digest(&cdata.xxhash64), + (unsigned long long)data->xxhash64); + goto out; + } + + err = 0; +out: + if (err != 0) + fprintf(stderr, "downloading '%s' failed\n", data->name); + int const close_err = curl_data_free(cdata); + if (close_err != 0 && err == 0) { + fprintf(stderr, "failed to write data for '%s'\n", data->name); + err = close_err; + } + return err; +} + +/** Download all the data. */ +static int curl_download_data(data_t const* const* data) { + if (curl_global_init(CURL_GLOBAL_ALL) != 0) + return EFAULT; + + curl_data_t cdata = {}; + CURL* curl = curl_easy_init(); + int err = EFAULT; + + if (curl == NULL) + return EFAULT; + + if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0) + goto out; + if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0) + goto out; + + assert(data != NULL); + for (; *data != NULL; ++data) { + if (curl_download_datum(curl, *data) != 0) + goto out; + } + + err = 0; +out: + curl_easy_cleanup(curl); + curl_global_cleanup(); + return err; +} + +/** Fill the path member variable of the data objects. */ +static int data_create_paths(data_t* const* data, char const* dir) { + size_t const dirlen = strlen(dir); + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t* const datum = *data; + datum->path = cat3(dir, "/", datum->name); + if (datum->path == NULL) + return ENOMEM; + } + return 0; +} + +/** Free the path member variable of the data objects. */ +static void data_free_paths(data_t* const* data) { + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t* datum = *data; + free((void*)datum->path); + datum->path = NULL; + } +} + +static char const kStampName[] = "STAMP"; + +static void xxh_update_le(XXH64_state_t* state, uint64_t data) { + if (!MEM_isLittleEndian()) + data = MEM_swap64(data); + XXH64_update(state, &data, sizeof(data)); +} + +/** Hash the data to create the stamp. */ +static uint64_t stamp_hash(data_t const* const* data) { + XXH64_state_t state; + + XXH64_reset(&state, 0); + assert(data != NULL); + for (; *data != NULL; ++data) { + data_t const* datum = *data; + /* We don't care about the URL that we fetch from. */ + /* The path is derived from the name. */ + XXH64_update(&state, datum->name, strlen(datum->name)); + xxh_update_le(&state, datum->xxhash64); + xxh_update_le(&state, datum->type); + } + return XXH64_digest(&state); +} + +/** Check if the stamp matches the stamp in the cache directory. */ +static int stamp_check(char const* dir, data_t const* const* data) { + char* stamp = cat3(dir, "/", kStampName); + uint64_t const expected = stamp_hash(data); + XXH64_canonical_t actual; + FILE* stampfile = NULL; + int matches = 0; + + if (stamp == NULL) + goto out; + if (!UTIL_isRegularFile(stamp)) { + fprintf(stderr, "stamp does not exist: recreating the data cache\n"); + goto out; + } + + stampfile = fopen(stamp, "rb"); + if (stampfile == NULL) { + fprintf(stderr, "could not open stamp: recreating the data cache\n"); + goto out; + } + + size_t b; + if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) { + fprintf(stderr, "invalid stamp: recreating the data cache\n"); + goto out; + } + + matches = (expected == XXH64_hashFromCanonical(&actual)); + if (matches) + fprintf(stderr, "stamp matches: reusing the cached data\n"); + else + fprintf(stderr, "stamp does not match: recreating the data cache\n"); + +out: + free(stamp); + if (stampfile != NULL) + fclose(stampfile); + return matches; +} + +/** On success write a new stamp, on failure delete the old stamp. */ +static int +stamp_write(char const* dir, data_t const* const* data, int const data_err) { + char* stamp = cat3(dir, "/", kStampName); + FILE* stampfile = NULL; + int err = EIO; + + if (stamp == NULL) + return ENOMEM; + + if (data_err != 0) { + err = data_err; + goto out; + } + XXH64_canonical_t hash; + + XXH64_canonicalFromHash(&hash, stamp_hash(data)); + + stampfile = fopen(stamp, "wb"); + if (stampfile == NULL) + goto out; + if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1) + goto out; + err = 0; + fprintf(stderr, "stamped new data cache\n"); +out: + if (err != 0) + /* Ignore errors. */ + unlink(stamp); + free(stamp); + if (stampfile != NULL) + fclose(stampfile); + return err; +} + +int data_init(char const* dir) { + int err; + + if (dir == NULL) + return EINVAL; + + /* This must be first to simplify logic. */ + err = ensure_directory_exists(dir); + if (err != 0) + return err; + + /* Save the cache directory. */ + g_data_dir = strdup(dir); + if (g_data_dir == NULL) + return ENOMEM; + + err = data_create_paths(g_data, dir); + if (err != 0) + return err; + + /* If the stamp matches then we are good to go. + * This must be called before any modifications to the data cache. + * After this point, we MUST call stamp_write() to update the STAMP, + * since we've updated the data cache. + */ + if (stamp_check(dir, data)) + return 0; + + err = curl_download_data(data); + if (err != 0) + goto out; + +out: + /* This must be last, since it must know if data_init() succeeded. */ + stamp_write(dir, data, err); + return err; +} + +void data_finish(void) { + data_free_paths(g_data); + free(g_data_dir); + g_data_dir = NULL; +} diff --git a/tests/regression/data.h b/tests/regression/data.h new file mode 100644 index 000000000..d75f645b1 --- /dev/null +++ b/tests/regression/data.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DATA_H +#define DATA_H + +#include +#include + +typedef enum { + data_type_file = 1, /**< This data is a file. *.zst */ + data_type_dir = 2, /**< This data is a directory. *.tar.zst */ +} data_type_t; + +typedef struct { + char const* url; /**< Where to get this resource. */ + uint64_t xxhash64; /**< Hash of the url contents. */ + char const* name; /**< The logical name of the resource (no extension). */ + data_type_t type; /**< The type of this resource. */ + char const* path; /**< The path of the unpacked resource (derived). */ + size_t size; +} data_t; + +/** + * The NULL-terminated list of data objects. + */ +extern data_t const* const* data; + +/** + * Initializes the data module and downloads the data necessary. + * Caches the downloads in dir. We add a stamp file in the directory after + * a successful download. If a stamp file already exists, and matches our + * current data stamp, we will use the cached data without downloading. + * + * @param dir The directory to cache the downloaded data into. + * + * @returns 0 on success. + */ +int data_init(char const* dir); + +/** + * Must be called at exit to free resources allocated by data_init(). + */ +void data_finish(void); + +typedef struct { + uint8_t* data; + size_t size; + size_t capacity; +} data_buffer_t; + +/** + * Read the file that data points to into a buffer. + * NOTE: data must be a file, not a directory. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_get(data_t const* data); + +/** + * Read the contents of filename into a buffer. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_read(char const* filename); + +/** + * Create a buffer with the specified capacity. + * + * @returns The buffer, which is NULL on failure. + */ +data_buffer_t data_buffer_create(size_t capacity); + +/** + * Calls memcmp() on the contents [0, size) of both buffers. + */ +int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2); + +/** + * Frees an allocated buffer. + */ +void data_buffer_free(data_buffer_t buffer); + + +#endif diff --git a/tests/regression/levels.h b/tests/regression/levels.h new file mode 100644 index 000000000..f96689075 --- /dev/null +++ b/tests/regression/levels.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef LEVEL +# error LEVEL(x) must be defined +#endif +#ifndef FAST_LEVEL +# error FAST_LEVEL(x) must be defined +#endif + +/** + * The levels are chosen to trigger every strategy in every source size, + * as well as some fast levels and the default level. + * If you change the compression levels, you should probably update these. + */ + +FAST_LEVEL(5) + +FAST_LEVEL(3) + +FAST_LEVEL(1) +LEVEL(0) +LEVEL(1) + +LEVEL(3) +LEVEL(4) +LEVEL(5) +LEVEL(6) +LEVEL(7) + +LEVEL(9) + +LEVEL(13) + +LEVEL(16) + +LEVEL(19) diff --git a/tests/regression/method.c b/tests/regression/method.c new file mode 100644 index 000000000..c19a377a7 --- /dev/null +++ b/tests/regression/method.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "method.h" + +#include +#include + +#include + +static char const* g_zstdcli = NULL; + +void method_set_zstdcli(char const* zstdcli) { + g_zstdcli = zstdcli; +} + +/** + * Macro to get a pointer of type, given ptr, which is a member variable with + * the given name, member. + * + * method_state_t* base = ...; + * simple_state_t* state = container_of(base, simple_state_t, base); + */ +#define container_of(ptr, type, member) \ + ((type*)(char*)(ptr)-offsetof(type, member)) + +/** State to reuse the same buffers between compression calls. */ +typedef struct { + method_state_t base; + data_buffer_t buffer; /**< The constant input data buffer. */ + data_buffer_t compressed; /**< The compressed data buffer. */ + data_buffer_t decompressed; /**< The decompressed data buffer. */ +} simple_state_t; + +static method_state_t* simple_create(data_t const* data) { + simple_state_t* state = (simple_state_t*)calloc(1, sizeof(simple_state_t)); + if (state == NULL) + return NULL; + state->base.data = data; + state->buffer = data_buffer_get(data); + state->compressed = + data_buffer_create(ZSTD_compressBound(state->buffer.size)); + state->decompressed = data_buffer_create(state->buffer.size); + return &state->base; +} + +static void simple_destroy(method_state_t* base) { + if (base == NULL) + return; + simple_state_t* state = container_of(base, simple_state_t, base); + free(state); +} + +static result_t simple_compress(method_state_t* base, config_t const* config) { + if (base == NULL) + return result_error(result_error_system_error); + simple_state_t* state = container_of(base, simple_state_t, base); + + if (base->data->type != data_type_file) + return result_error(result_error_skip); + + if (state->buffer.data == NULL || state->compressed.data == NULL || + state->decompressed.data == NULL) { + return result_error(result_error_system_error); + } + + /* If the config doesn't specify a level, skip. */ + int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return result_error(result_error_skip); + + /* Compress, decompress, and check the result. */ + state->compressed.size = ZSTD_compress( + state->compressed.data, + state->compressed.capacity, + state->buffer.data, + state->buffer.size, + level); + if (ZSTD_isError(state->compressed.size)) + return result_error(result_error_compression_error); + + state->decompressed.size = ZSTD_decompress( + state->decompressed.data, + state->decompressed.capacity, + state->compressed.data, + state->compressed.size); + if (ZSTD_isError(state->decompressed.size)) + return result_error(result_error_decompression_error); + if (data_buffer_compare(state->buffer, state->decompressed)) + return result_error(result_error_round_trip_error); + + result_data_t data; + data.total_size = state->compressed.size; + return result_data(data); +} + +/** Generic state creation function. */ +static method_state_t* method_state_create(data_t const* data) { + method_state_t* state = (method_state_t*)malloc(sizeof(method_state_t)); + if (state == NULL) + return NULL; + state->data = data; + return state; +} + +static void method_state_destroy(method_state_t* state) { + free(state); +} + +#define MAX_OUT 32 + +static result_t cli_file_compress( + method_state_t* state, + config_t const* config) { + if (config->cli_args == NULL) + return result_error(result_error_skip); + + if (g_zstdcli == NULL) + return result_error(result_error_system_error); + + /* '' -r '' | wc -c */ + char cmd[1024]; + size_t const cmd_size = snprintf( + cmd, + sizeof(cmd), + "'%s' -cqr %s '%s' | wc -c", + g_zstdcli, + config->cli_args, + state->data->path); + if (cmd_size >= sizeof(cmd)) { + fprintf(stderr, "command too large: %s\n", cmd); + return result_error(result_error_system_error); + } + FILE* zstd = popen(cmd, "r"); + if (zstd == NULL) { + fprintf(stderr, "failed to popen command: %s\n", cmd); + return result_error(result_error_system_error); + } + + /* Read the total compressed size. */ + char out[MAX_OUT + 1]; + size_t const out_size = fread(out, 1, MAX_OUT, zstd); + out[out_size] = '\0'; + int const zstd_ret = pclose(zstd); + if (zstd_ret != 0) { + fprintf(stderr, "zstd failed with command: %s\n", cmd); + return result_error(result_error_compression_error); + } + if (out_size == MAX_OUT) { + fprintf(stderr, "wc -c produced more bytes than expected: %s\n", out); + return result_error(result_error_system_error); + } + + result_data_t data; + data.total_size = atoll(out); + return result_data(data); +} + +method_t const simple = { + .name = "simple", + .create = simple_create, + .compress = simple_compress, + .destroy = simple_destroy, +}; + +method_t const cli_file = { + .name = "cli file", + .create = method_state_create, + .compress = cli_file_compress, + .destroy = method_state_destroy, +}; + +static method_t const* g_methods[] = { + &simple, + &cli_file, + NULL, +}; + +method_t const* const* methods = g_methods; diff --git a/tests/regression/method.h b/tests/regression/method.h new file mode 100644 index 000000000..d70b776b1 --- /dev/null +++ b/tests/regression/method.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef METHOD_H +#define METHOD_H + +#include + +#include "data.h" +#include "config.h" +#include "result.h" + +/** + * The base class for state that methods keep. + * All derived method state classes must have a member of this type. + */ +typedef struct { + data_t const* data; +} method_state_t; + +/** + * A method that compresses the data using config. + */ +typedef struct { + char const* name; /**< The identifier for this method in the results. */ + /** + * Creates a state that must contain a member variable of method_state_t, + * and returns a pointer to that member variable. + * + * This method can be used to do expensive work that only depends on the + * data, like loading the data file into a buffer. + */ + method_state_t* (*create)(data_t const* data); + /** + * Compresses the data in the state using the given config. + * + * @param state A pointer to the state returned by create(). + * + * @returns The total compressed size on success, or an error code. + */ + result_t (*compress)(method_state_t* state, config_t const* config); + /** + * Frees the state. + */ + void (*destroy)(method_state_t* state); +} method_t; + +/** + * Set the zstd cli path. Must be called before any methods are used. + */ +void method_set_zstdcli(char const* zstdcli); + +/** + * A NULL-terminated list of methods. + */ +extern method_t const* const* methods; + +#endif diff --git a/tests/regression/result.c b/tests/regression/result.c new file mode 100644 index 000000000..31439b08c --- /dev/null +++ b/tests/regression/result.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "result.h" + +char const* result_get_error_string(result_t result) { + switch (result_get_error(result)) { + case result_error_ok: + return "okay"; + case result_error_skip: + return "skip"; + case result_error_system_error: + return "system error"; + case result_error_compression_error: + return "compression error"; + case result_error_decompression_error: + return "decompression error"; + case result_error_round_trip_error: + return "round trip error"; + } +} diff --git a/tests/regression/result.h b/tests/regression/result.h new file mode 100644 index 000000000..8c80cf85a --- /dev/null +++ b/tests/regression/result.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef RESULT_H +#define RESULT_H + +#include + +/** + * The error type enum. + */ +typedef enum { + result_error_ok, /**< No error. */ + result_error_skip, /**< This method was skipped. */ + result_error_system_error, /**< Some internal error happened. */ + result_error_compression_error, /**< Compression failed. */ + result_error_decompression_error, /**< Decompression failed. */ + result_error_round_trip_error, /**< Data failed to round trip. */ +} result_error_t; + +/** + * The success type. + */ +typedef struct { + size_t total_size; /**< The total compressed size. */ +} result_data_t; + +/** + * The result type. + * Do not access the member variables directory, use the helper functions. + */ +typedef struct { + result_error_t internal_error; + result_data_t internal_data; +} result_t; + +/** + * Create a result of the error type. + */ +static result_t result_error(result_error_t error); +/** + * Create a result of the success type. + */ +static result_t result_data(result_data_t data); + +/** + * Check if the result is an error or skip. + */ +static int result_is_error(result_t result); +/** + * Check if the result error is skip. + */ +static int result_is_skip(result_t result); +/** + * Get the result error or okay. + */ +static result_error_t result_get_error(result_t result); +/** + * Get the result data. The result MUST be checked with result_is_error() first. + */ +static result_data_t result_get_data(result_t result); + +static result_t result_error(result_error_t error) { + result_t result = { + .internal_error = error, + }; + return result; +} + +static result_t result_data(result_data_t data) { + result_t result = { + .internal_error = result_error_ok, + .internal_data = data, + }; + return result; +} + +static int result_is_error(result_t result) { + return result_get_error(result) != result_error_ok; +} + +static int result_is_skip(result_t result) { + return result_get_error(result) == result_error_skip; +} + +static result_error_t result_get_error(result_t result) { + return result.internal_error; +} + +char const* result_get_error_string(result_t result); + +static result_data_t result_get_data(result_t result) { + return result.internal_data; +} + +#endif diff --git a/tests/regression/results.csv b/tests/regression/results.csv new file mode 100644 index 000000000..8ed689d26 --- /dev/null +++ b/tests/regression/results.csv @@ -0,0 +1,43 @@ +Data, Config, Method, Total compressed size +silesia.tar, level -5, simple, 106176430 +silesia.tar, level -3, simple, 98476550 +silesia.tar, level -1, simple, 87206767 +silesia.tar, level 0, simple, 66996953 +silesia.tar, level 1, simple, 73658303 +silesia.tar, level 3, simple, 66996953 +silesia.tar, level 4, simple, 65996020 +silesia.tar, level 5, simple, 64421326 +silesia.tar, level 6, simple, 62388673 +silesia.tar, level 7, simple, 61159525 +silesia.tar, level 9, simple, 60214921 +silesia.tar, level 13, simple, 58428642 +silesia.tar, level 16, simple, 56363759 +silesia.tar, level 19, simple, 53274173 +silesia, level -5, cli file, 106202112 +silesia, level -3, cli file, 98518660 +silesia, level -1, cli file, 87226203 +silesia, level 0, cli file, 67049190 +silesia, level 1, cli file, 73676282 +silesia, level 3, cli file, 67049190 +silesia, level 4, cli file, 66090040 +silesia, level 5, cli file, 64503721 +silesia, level 6, cli file, 62446177 +silesia, level 7, cli file, 61217029 +silesia, level 9, cli file, 60282841 +silesia, level 13, cli file, 58480658 +silesia, level 16, cli file, 56414170 +silesia, level 19, cli file, 53365292 +silesia.tar, level -5, cli file, 106250113 +silesia.tar, level -3, cli file, 98550747 +silesia.tar, level -1, cli file, 87227322 +silesia.tar, level 0, cli file, 67111168 +silesia.tar, level 1, cli file, 73694374 +silesia.tar, level 3, cli file, 67111168 +silesia.tar, level 4, cli file, 66154079 +silesia.tar, level 5, cli file, 64546998 +silesia.tar, level 6, cli file, 62458454 +silesia.tar, level 7, cli file, 61231085 +silesia.tar, level 9, cli file, 60310313 +silesia.tar, level 13, cli file, 58517476 +silesia.tar, level 16, cli file, 56448694 +silesia.tar, level 19, cli file, 53444920 diff --git a/tests/regression/test.c b/tests/regression/test.c new file mode 100644 index 000000000..c3063511c --- /dev/null +++ b/tests/regression/test.c @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +#include "config.h" +#include "data.h" +#include "method.h" + +/** Check if a name contains a comma. */ +static int is_name_bad(char const* name) { + if (name == NULL) + return 1; + for (; *name != '\0'; ++name) + if (*name == ',') + return 1; + return 0; +} + +/** Check if any of the names contain a comma. */ +static int are_names_bad() { + for (size_t method = 0; methods[method] != NULL; ++method) + if (is_name_bad(methods[method]->name)) { + fprintf(stderr, "method name %s is bad\n", methods[method]->name); + return 1; + } + for (size_t datum = 0; data[datum] != NULL; ++datum) + if (is_name_bad(data[datum]->name)) { + fprintf(stderr, "data name %s is bad\n", data[datum]->name); + return 1; + } + for (size_t config = 0; configs[config] != NULL; ++config) + if (is_name_bad(configs[config]->name)) { + fprintf(stderr, "config name %s is bad\n", configs[config]->name); + return 1; + } + return 0; +} + +/** Helper macro to print to stderr and a file. */ +#define tprintf(file, ...) \ + do { \ + fprintf(file, __VA_ARGS__); \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) +/** Helper macro to flush stderr and a file. */ +#define tflush(file) \ + do { \ + fflush(file); \ + fflush(stderr); \ + } while (0) + +/** + * Run all the regression tests and record the results table to results and + * stderr progressively. + */ +static int run_all(FILE* results) { + tprintf(results, "Data,\tConfig,\tMethod,\tTotal compressed size\n"); + for (size_t method = 0; methods[method] != NULL; ++method) { + for (size_t datum = 0; data[datum] != NULL; ++datum) { + /* Create the state common to all configs */ + method_state_t* state = methods[method]->create(data[datum]); + for (size_t config = 0; configs[config] != NULL; ++config) { + /* Print the result for the (method, data, config) tuple. */ + result_t const result = + methods[method]->compress(state, configs[config]); + if (result_is_skip(result)) + continue; + tprintf( + results, + "%s,\t%s,\t%s,\t", + data[datum]->name, + configs[config]->name, + methods[method]->name); + if (result_is_error(result)) { + tprintf(results, "%s\n", result_get_error_string(result)); + } else { + tprintf( + results, + "%llu\n", + (unsigned long long)result_get_data(result).total_size); + } + tflush(results); + } + methods[method]->destroy(state); + } + } + return 0; +} + +/** + * Option parsing using getopt. + * When you add a new option update: long_options, long_extras, and + * short_options. + */ + +/** Option variables filled by parse_args. */ +static char const* g_output = NULL; +static char const* g_diff = NULL; +static char const* g_cache = NULL; +static char const* g_zstdcli = NULL; + +typedef enum { + required_option, + optional_option, + help_option, +} option_type; + +/** + * Extra state that we need to keep per-option that we can't store in getopt. + */ +struct option_extra { + int id; /**< The short option name, used as an id. */ + char const* help; /**< The help message. */ + option_type opt_type; /**< The option type: required, optional, or help. */ + char const** value; /**< The value to set or NULL if no_argument. */ +}; + +/** The options. */ +static struct option long_options[] = { + {"cache", required_argument, NULL, 'c'}, + {"diff", required_argument, NULL, 'd'}, + {"help", no_argument, NULL, 'h'}, + {"output", required_argument, NULL, 'o'}, + {"zstd", required_argument, NULL, 'z'}, +}; + +static size_t const nargs = sizeof(long_options) / sizeof(long_options[0]); + +/** The extra info for the options. Must be in the same order as the options. */ +static struct option_extra long_extras[] = { + {'c', "the cache directory", required_option, &g_cache}, + {'d', "compare the results to this file", optional_option, &g_diff}, + {'h', "display this message", help_option, NULL}, + {'o', "write the results here", required_option, &g_output}, + {'z', "zstd cli tool", required_option, &g_zstdcli}, +}; + +/** The short options. Must correspond to the options. */ +static char const short_options[] = "c:d:ho:z:"; + +/** Return the help string for the option type. */ +static char const* required_message(option_type opt_type) { + switch (opt_type) { + case required_option: + return "[required]"; + case optional_option: + return "[optional]"; + case help_option: + return ""; + default: + assert(0); + return NULL; + } +} + +/** Print the help for the program. */ +static void print_help(void) { + fprintf(stderr, "regression test runner\n"); + size_t const nargs = sizeof(long_options) / sizeof(long_options[0]); + for (size_t i = 0; i < nargs; ++i) { + /* Short / long - help [option type] */ + fprintf( + stderr, + "-%c / --%s \t- %s %s\n", + long_options[i].val, + long_options[i].name, + long_extras[i].help, + required_message(long_extras[i].opt_type)); + } +} + +/** Parse the arguments. Teturn 0 on success. Print help on failure. */ +static int parse_args(int argc, char** argv) { + int option_index = 0; + int c; + + while (1) { + c = getopt_long(argc, argv, short_options, long_options, &option_index); + if (c == -1) + break; + + int found = 0; + for (size_t i = 0; i < nargs; ++i) { + if (c == long_extras[i].id && long_extras[i].value != NULL) { + *long_extras[i].value = optarg; + found = 1; + break; + } + } + if (found) + continue; + + switch (c) { + case 'h': + case '?': + default: + print_help(); + return 1; + } + } + + int bad = 0; + for (size_t i = 0; i < nargs; ++i) { + if (long_extras[i].opt_type != required_option) + continue; + if (long_extras[i].value == NULL) + continue; + if (*long_extras[i].value != NULL) + continue; + fprintf( + stderr, + "-%c / --%s is a required argument but is not set\n", + long_options[i].val, + long_options[i].name); + bad = 1; + } + if (bad) { + fprintf(stderr, "\n"); + print_help(); + return 1; + } + + return 0; +} + +/** memcmp() the old results file and the new results file. */ +static int diff_results(char const* actual_file, char const* expected_file) { + data_buffer_t const actual = data_buffer_read(actual_file); + data_buffer_t const expected = data_buffer_read(expected_file); + int ret = 1; + + if (actual.data == NULL) { + fprintf(stderr, "failed to open results '%s' for diff\n", actual_file); + goto out; + } + if (expected.data == NULL) { + fprintf( + stderr, + "failed to open previous results '%s' for diff\n", + expected_file); + goto out; + } + + ret = data_buffer_compare(actual, expected); + if (ret != 0) { + fprintf( + stderr, + "actual results '%s' does not match expected results '%s'\n", + actual_file, + expected_file); + } else { + fprintf(stderr, "actual results match expected results\n"); + } +out: + data_buffer_free(actual); + data_buffer_free(expected); + return ret; +} + +int main(int argc, char** argv) { + /* Parse args and validate modules. */ + int ret = parse_args(argc, argv); + if (ret != 0) + return ret; + + if (are_names_bad()) + return 1; + + /* Initialize modules. */ + method_set_zstdcli(g_zstdcli); + ret = data_init(g_cache); + if (ret != 0) { + fprintf(stderr, "data_init() failed with error=%s\n", strerror(ret)); + return 1; + } + + /* Run the regression tests. */ + ret = 1; + FILE* results = fopen(g_output, "w"); + if (results == NULL) { + fprintf(stderr, "Failed to open the output file\n"); + goto out; + } + ret = run_all(results); + fclose(results); + + if (ret != 0) + goto out; + + if (g_diff) + /* Diff the new results with the previous results. */ + ret = diff_results(g_output, g_diff); + +out: + data_finish(); + return ret; +}