From f17652931c4f8cebeecebbc4f4a45c47eb81ffca Mon Sep 17 00:00:00 2001 From: yhoogstrate Date: Tue, 8 Feb 2022 14:06:00 +0100 Subject: [PATCH 01/16] seekable_format no header when compressing empty string to stream --- .../seekable_format/tests/seekable_tests.c | 30 +++++++++++++++++++ contrib/seekable_format/zstdseek_compress.c | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c index a482638b9..cf98e734f 100644 --- a/contrib/seekable_format/tests/seekable_tests.c +++ b/contrib/seekable_format/tests/seekable_tests.c @@ -186,6 +186,36 @@ int main(int argc, const char** argv) } printf("Success!\n"); + + printf("Test %u - check ZSTD magic in compressing empty string: ", testNb++); + { // compressing empty string should return a zstd header + char *data_in = (char *) malloc(255 * sizeof(char)); + assert(data_in != NULL); + data_in = "\0"; + + char *data_out = (char *) malloc(255 * 255 * sizeof(char)); + assert(data_out != NULL); + + ZSTD_seekable_CStream *s = ZSTD_seekable_createCStream(); + ZSTD_seekable_initCStream(s, 1, 1, 1024 * 1024); + + ZSTD_inBuffer input = { data_in, 0, 0 }; + ZSTD_outBuffer output = { data_out, 255*255, 0 }; + + ZSTD_seekable_compressStream(s, &output, &input); + ZSTD_seekable_endStream(s, &output); + + if((((char*)output.dst)[0] != '\x28') | (((char*)output.dst)[1] != '\xb5') | (((char*)output.dst)[2] != '\x2f') | (((char*)output.dst)[3] != '\xfd')) { + printf("%#02x %#02x %#02x %#02x\n", ((char*)output.dst)[0], ((char*)output.dst)[1] , ((char*)output.dst)[2] , ((char*)output.dst)[3] ); + + ZSTD_seekable_freeCStream(s); + goto _test_error; + } + + ZSTD_seekable_freeCStream(s); + } + printf("Success!\n"); + /* TODO: Add more tests */ printf("Finished tests\n"); return 0; diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c index 242bd2ac3..7ec9bb577 100644 --- a/contrib/seekable_format/zstdseek_compress.c +++ b/contrib/seekable_format/zstdseek_compress.c @@ -350,7 +350,7 @@ size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output) size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output) { - if (!zcs->writingSeekTable && zcs->frameDSize) { + if (!zcs->writingSeekTable) { const size_t endFrame = ZSTD_seekable_endFrame(zcs, output); if (ZSTD_isError(endFrame)) return endFrame; /* return an accurate size hint */ From aece0f258adfbd72401707d39094a477106af07a Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Tue, 13 Dec 2022 08:15:16 -0800 Subject: [PATCH 02/16] free memory in test case --- .../seekable_format/tests/seekable_tests.c | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c index cf98e734f..1bb2d0e81 100644 --- a/contrib/seekable_format/tests/seekable_tests.c +++ b/contrib/seekable_format/tests/seekable_tests.c @@ -189,18 +189,18 @@ int main(int argc, const char** argv) printf("Test %u - check ZSTD magic in compressing empty string: ", testNb++); { // compressing empty string should return a zstd header - char *data_in = (char *) malloc(255 * sizeof(char)); - assert(data_in != NULL); - data_in = "\0"; - - char *data_out = (char *) malloc(255 * 255 * sizeof(char)); - assert(data_out != NULL); + size_t const capacity = 255; + char* inBuffer = malloc(capacity); + assert(inBuffer != NULL); + inBuffer[0] = '\0'; + void* const outBuffer = malloc(capacity); + assert(outBuffer != NULL); ZSTD_seekable_CStream *s = ZSTD_seekable_createCStream(); - ZSTD_seekable_initCStream(s, 1, 1, 1024 * 1024); + ZSTD_seekable_initCStream(s, 1, 1, 255); - ZSTD_inBuffer input = { data_in, 0, 0 }; - ZSTD_outBuffer output = { data_out, 255*255, 0 }; + ZSTD_inBuffer input = { .src=inBuffer, .pos=0, .size=0 }; + ZSTD_outBuffer output = { .dst=outBuffer, .pos=0, .size=capacity }; ZSTD_seekable_compressStream(s, &output, &input); ZSTD_seekable_endStream(s, &output); @@ -208,10 +208,14 @@ int main(int argc, const char** argv) if((((char*)output.dst)[0] != '\x28') | (((char*)output.dst)[1] != '\xb5') | (((char*)output.dst)[2] != '\x2f') | (((char*)output.dst)[3] != '\xfd')) { printf("%#02x %#02x %#02x %#02x\n", ((char*)output.dst)[0], ((char*)output.dst)[1] , ((char*)output.dst)[2] , ((char*)output.dst)[3] ); + free(inBuffer); + free(outBuffer); ZSTD_seekable_freeCStream(s); goto _test_error; } + free(inBuffer); + free(outBuffer); ZSTD_seekable_freeCStream(s); } printf("Success!\n"); From 69ec75f0d5e028aa34b3b51f990f38a18a0b7783 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Tue, 13 Dec 2022 08:35:20 -0800 Subject: [PATCH 03/16] fix window resizing edge case --- lib/compress/zstd_compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 5bc7a9f3f..0069a7b1b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1370,8 +1370,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, } /* resize windowLog if input is small enough, to use less memory */ - if ( (srcSize < maxWindowResize) - && (dictSize < maxWindowResize) ) { + if ( (srcSize <= maxWindowResize) + && (dictSize <= maxWindowResize) ) { U32 const tSize = (U32)(srcSize + dictSize); static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : From d081d98ae7239385fe5e62d41898ef4b3c5e06ea Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 13 Dec 2022 12:26:34 -0800 Subject: [PATCH 04/16] Fix m68k CI tests on Github Actions seems there is a bug in the qemu version shipped with Ubuntu 22.04 --- .github/workflows/dev-short-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dev-short-tests.yml b/.github/workflows/dev-short-tests.yml index a31eec6bb..08df1c07c 100644 --- a/.github/workflows/dev-short-tests.yml +++ b/.github/workflows/dev-short-tests.yml @@ -275,7 +275,7 @@ jobs: qemu-consistency: name: QEMU ${{ matrix.name }} - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false # 'false' means Don't stop matrix workflows even if some matrix failed. matrix: From 32bb667138028f82170451a3260caffa51544e48 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Tue, 13 Dec 2022 12:45:41 -0800 Subject: [PATCH 05/16] added test to cli-tests --- tests/cli-tests/compression/window-resize.sh | 9 +++++++++ tests/cli-tests/compression/window-resize.sh.stderr.glob | 1 + tests/cli-tests/compression/window-resize.sh.stdout.glob | 3 +++ 3 files changed, 13 insertions(+) create mode 100755 tests/cli-tests/compression/window-resize.sh create mode 100644 tests/cli-tests/compression/window-resize.sh.stderr.glob create mode 100644 tests/cli-tests/compression/window-resize.sh.stdout.glob diff --git a/tests/cli-tests/compression/window-resize.sh b/tests/cli-tests/compression/window-resize.sh new file mode 100755 index 000000000..3b5e6fe24 --- /dev/null +++ b/tests/cli-tests/compression/window-resize.sh @@ -0,0 +1,9 @@ +#!/bin/sh +datagen -g1G > file +zstd --long=31 -1 --single-thread --no-content-size -f file +zstd -l -v file.zst + +# We want to ignore stderr (its outputting "*** zstd command line interface +# 64-bits v1.5.3, by Yann Collet ***") + +rm file file.zst diff --git a/tests/cli-tests/compression/window-resize.sh.stderr.glob b/tests/cli-tests/compression/window-resize.sh.stderr.glob new file mode 100644 index 000000000..eb1ae458f --- /dev/null +++ b/tests/cli-tests/compression/window-resize.sh.stderr.glob @@ -0,0 +1 @@ +... diff --git a/tests/cli-tests/compression/window-resize.sh.stdout.glob b/tests/cli-tests/compression/window-resize.sh.stdout.glob new file mode 100644 index 000000000..313d216e1 --- /dev/null +++ b/tests/cli-tests/compression/window-resize.sh.stdout.glob @@ -0,0 +1,3 @@ +... +Window Size: 1.000 GiB (1073741824 B) +... From 9b9ad5af3f78e517723277262cfd601d00ce4cd9 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Tue, 13 Dec 2022 13:52:54 -0800 Subject: [PATCH 06/16] use .ignore for stderr output in window-resize test case --- tests/cli-tests/compression/window-resize.sh.stderr.glob | 1 - tests/cli-tests/compression/window-resize.sh.stderr.ignore | 0 2 files changed, 1 deletion(-) delete mode 100644 tests/cli-tests/compression/window-resize.sh.stderr.glob create mode 100644 tests/cli-tests/compression/window-resize.sh.stderr.ignore diff --git a/tests/cli-tests/compression/window-resize.sh.stderr.glob b/tests/cli-tests/compression/window-resize.sh.stderr.glob deleted file mode 100644 index eb1ae458f..000000000 --- a/tests/cli-tests/compression/window-resize.sh.stderr.glob +++ /dev/null @@ -1 +0,0 @@ -... diff --git a/tests/cli-tests/compression/window-resize.sh.stderr.ignore b/tests/cli-tests/compression/window-resize.sh.stderr.ignore new file mode 100644 index 000000000..e69de29bb From e767d5c7c144ab911842229a9a8fee527d9616e9 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 13 Dec 2022 15:03:23 -0800 Subject: [PATCH 07/16] [contrib][linux-kernel] Fix stack detection for newer gcc Newer gcc versions were getting smart and omitting the `memset()`. Get around this issue by outlining the `memset()` into a different function. This test is still hacky, but it works... --- contrib/linux-kernel/test/test.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/contrib/linux-kernel/test/test.c b/contrib/linux-kernel/test/test.c index 6cd1730bb..67d248e0c 100644 --- a/contrib/linux-kernel/test/test.c +++ b/contrib/linux-kernel/test/test.c @@ -186,11 +186,14 @@ static void __attribute__((noinline)) use(void *x) { asm volatile("" : "+r"(x)); } +static void __attribute__((noinline)) fill_stack(void) { + memset(g_stack, 0x33, 8192); +} + static void __attribute__((noinline)) set_stack(void) { char stack[8192]; g_stack = stack; - memset(g_stack, 0x33, 8192); use(g_stack); } @@ -208,6 +211,7 @@ static void __attribute__((noinline)) check_stack(void) { static void test_stack_usage(test_data_t const *data) { set_stack(); + fill_stack(); test_f2fs(); test_btrfs(data); test_decompress_unzstd(data); From e1e82f74f1f992acb6d98577e167d2e7cfe45f70 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 13 Dec 2022 17:45:05 -0800 Subject: [PATCH 08/16] Reserve two fields in ZSTD_frameHeader --- lib/zstd.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/zstd.h b/lib/zstd.h index 1867efc93..1dff31b4e 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -2573,6 +2573,8 @@ typedef struct { unsigned headerSize; unsigned dictID; unsigned checksumFlag; + unsigned _reserved1; + unsigned _reserved2; } ZSTD_frameHeader; /*! ZSTD_getFrameHeader() : From c43da3d6059a277ab5d76236bdb66ba2b36264a1 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 13 Dec 2022 18:01:32 -0800 Subject: [PATCH 09/16] Fix C90 compat --- lib/decompress/zstd_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 9fe326e73..df831b774 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1549,7 +1549,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) * ZSTD_getFrameHeader(), which will provide a more precise error code. */ unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) { - ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 }; size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); if (ZSTD_isError(hError)) return 0; return zfp.dictID; From 031de3c69ccbf3282ed02fb49369b476730aeca8 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Wed, 28 Sep 2022 16:17:53 -0700 Subject: [PATCH 10/16] meson: make backtrace dependency on execinfo musl libc for example has no such header. Signed-off-by: Rosen Penev --- build/meson/meson_options.txt | 2 +- build/meson/programs/meson.build | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/build/meson/meson_options.txt b/build/meson/meson_options.txt index accf3fa10..f35cd5fc8 100644 --- a/build/meson/meson_options.txt +++ b/build/meson/meson_options.txt @@ -14,7 +14,7 @@ option('legacy_level', type: 'integer', min: 0, max: 7, value: 5, description: 'Support any legacy format: 7 to 1 for v0.7+ to v0.1+') option('debug_level', type: 'integer', min: 0, max: 9, value: 1, description: 'Enable run-time debug. See lib/common/debug.h') -option('backtrace', type: 'boolean', value: false, +option('backtrace', type: 'feature', value: 'disabled', description: 'Display a stack backtrace when execution generates a runtime exception') option('static_runtime', type: 'boolean', value: false, description: 'Link to static run-time libraries on MSVC') diff --git a/build/meson/programs/meson.build b/build/meson/programs/meson.build index 8df3a5dcb..8cee115da 100644 --- a/build/meson/programs/meson.build +++ b/build/meson/programs/meson.build @@ -51,7 +51,8 @@ endif export_dynamic_on_windows = false # explicit backtrace enable/disable for Linux & Darwin -if not use_backtrace +execinfo = cc.has_header('execinfo.h', required: use_backtrace) +if not execinfo.found() zstd_c_args += '-DBACKTRACE_ENABLE=0' elif use_debug and host_machine_os == os_windows # MinGW target zstd_c_args += '-DBACKTRACE_ENABLE=1' From e58a39f84e988e4229067372b4f30601dcfc484b Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Jan 2022 12:37:32 -0800 Subject: [PATCH 11/16] Allow tests to fake stdin/stdout/stderr is a console We've been unable to effectively test cases where stdin/stdout/stderr are consoles, because in our test cases they generally aren't. Allow the command line flags `--fake-std{in,out,err}-is-console` to tell the CLI to pretend that std{in,out,err} is a console. --- programs/fileio.c | 2 +- programs/platform.h | 4 ++++ programs/util.c | 28 ++++++++++++++++++++++++++++ programs/util.h | 14 ++++++++++++++ programs/zstdcli.c | 13 ++++++++----- 5 files changed, 55 insertions(+), 6 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index e80d37011..57192961e 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -3010,7 +3010,7 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis } } if (numFiles == 0) { - if (!IS_CONSOLE(stdin)) { + if (!UTIL_isConsole(stdin)) { DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n"); } DISPLAYLEVEL(1, "No files given \n"); diff --git a/programs/platform.h b/programs/platform.h index b858e3b48..3ee19580c 100644 --- a/programs/platform.h +++ b/programs/platform.h @@ -127,6 +127,10 @@ extern "C" { /*-********************************************* * Detect if isatty() and fileno() are available +* +* Note: Use UTIL_isConsole() for the zstd CLI +* instead, as it allows faking is console for +* testing. ************************************************/ #if (defined(__linux__) && (PLATFORM_POSIX_VERSION > 1)) \ || (PLATFORM_POSIX_VERSION >= 200112L) \ diff --git a/programs/util.c b/programs/util.c index 63b3ae176..bdb651074 100644 --- a/programs/util.c +++ b/programs/util.c @@ -288,6 +288,34 @@ int UTIL_isLink(const char* infilename) return 0; } +static int g_fakeStdinIsConsole = 0; +static int g_fakeStderrIsConsole = 0; +static int g_fakeStdoutIsConsole = 0; + +int UTIL_isConsole(FILE* file) +{ + if (file == stdin && g_fakeStdinIsConsole) + return 1; + if (file == stderr && g_fakeStderrIsConsole) + return 1; + if (file == stdout && g_fakeStdoutIsConsole) + return 1; + return IS_CONSOLE(file); +} + +void UTIL_fakeStdinIsConsole(void) +{ + g_fakeStdinIsConsole = 1; +} +void UTIL_fakeStdoutIsConsole(void) +{ + g_fakeStdoutIsConsole = 1; +} +void UTIL_fakeStderrIsConsole(void) +{ + g_fakeStderrIsConsole = 1; +} + U64 UTIL_getFileSize(const char* infilename) { stat_t statbuf; diff --git a/programs/util.h b/programs/util.h index faf8c9f11..cf1738772 100644 --- a/programs/util.h +++ b/programs/util.h @@ -175,6 +175,20 @@ int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]); int UTIL_isLink(const char* infilename); int UTIL_isFIFO(const char* infilename); +/** + * Returns with the given file descriptor is a console. + * Allows faking whether stdin/stdout/stderr is a console + * using UTIL_fake*IsConsole(). + */ +int UTIL_isConsole(FILE* file); + +/** + * Pretends that stdin/stdout/stderr is a console for testing. + */ +void UTIL_fakeStdinIsConsole(void); +void UTIL_fakeStdoutIsConsole(void); +void UTIL_fakeStderrIsConsole(void); + #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) U64 UTIL_getFileSize(const char* infilename); U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 362f320a9..cc6bbb935 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -27,8 +27,8 @@ /*-************************************ * Dependencies **************************************/ -#include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */ -#include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */ +#include "platform.h" /* PLATFORM_POSIX_VERSION */ +#include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList, UTIL_isConsole */ #include /* getenv */ #include /* strcmp, strlen */ #include /* fprintf(), stdin, stdout, stderr */ @@ -987,6 +987,9 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--no-progress")) { FIO_setProgressSetting(FIO_ps_never); continue; } if (!strcmp(argument, "--progress")) { FIO_setProgressSetting(FIO_ps_always); continue; } if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; } + if (!strcmp(argument, "--fake-stdin-is-console")) { UTIL_fakeStdinIsConsole(); continue; } + if (!strcmp(argument, "--fake-stdout-is-console")) { UTIL_fakeStdoutIsConsole(); continue; } + if (!strcmp(argument, "--fake-stderr-is-console")) { UTIL_fakeStderrIsConsole(); continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT @@ -1437,12 +1440,12 @@ int main(int argCount, const char* argv[]) /* Check if input/output defined as console; trigger an error in this case */ if (!forceStdin && (UTIL_searchFileNamesTable(filenames, stdinmark) != -1) - && IS_CONSOLE(stdin) ) { + && UTIL_isConsole(stdin) ) { DISPLAYLEVEL(1, "stdin is a console, aborting\n"); CLEAN_RETURN(1); } if ( (!outFileName || !strcmp(outFileName, stdoutmark)) - && IS_CONSOLE(stdout) + && UTIL_isConsole(stdout) && (UTIL_searchFileNamesTable(filenames, stdinmark) != -1) && !forceStdout && operation!=zom_decompress ) { @@ -1479,7 +1482,7 @@ int main(int argCount, const char* argv[]) /* No status message in pipe mode (stdin - stdout) */ hasStdout = outFileName && !strcmp(outFileName,stdoutmark); - if ((hasStdout || !IS_CONSOLE(stderr)) && (g_displayLevel==2)) g_displayLevel=1; + if ((hasStdout || !UTIL_isConsole(stderr)) && (g_displayLevel==2)) g_displayLevel=1; /* IO Stream/File */ FIO_setHasStdoutOutput(fCtx, hasStdout); From fbff7827faba30df7c49992cb39dc00ce36c6a06 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 7 Jan 2022 15:07:28 -0800 Subject: [PATCH 12/16] Refactor progress bar & summary line logic * Centralize the logic about whether to print the progress bar or not in the `*_PROGRESS()` macros. * Centralize the logc about whether to print the summary line or not in `FIO_shouldDisplayFileSummary()` and `FIO_shouldDisplayMultipleFileSummary()`. * Make `--progress` work for non-zstd (de)compressors. * Clean up several edge cases in compression and decompression progress printing along the way. E.g. wrong log level, or missing summary line. One thing I don't like about stdout mode, which sets the display level to 1, is that warnings aren't displayed. After this PR, we could change stdout mode from lowering the display level, to defaulting to implied `--no-progress`. But, I think that deserves a separate PR. --- programs/fileio.c | 123 ++++++++++-------- programs/fileio_common.h | 8 ++ tests/cli-tests/progress/no-progress.sh | 46 +++++++ .../progress/no-progress.sh.stderr.glob | 90 +++++++++++++ tests/cli-tests/progress/progress.sh | 41 ++++++ .../progress/progress.sh.stderr.glob | 62 +++++++++ 6 files changed, 318 insertions(+), 52 deletions(-) create mode 100755 tests/cli-tests/progress/no-progress.sh create mode 100644 tests/cli-tests/progress/no-progress.sh.stderr.glob create mode 100755 tests/cli-tests/progress/progress.sh create mode 100644 tests/cli-tests/progress/progress.sh.stderr.glob diff --git a/programs/fileio.c b/programs/fileio.c index 57192961e..4c1331432 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -249,6 +249,18 @@ struct FIO_ctx_s { size_t totalBytesOutput; }; +static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx) +{ + return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3; +} + +static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx) +{ + int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1); + assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0); + return shouldDisplay; +} + /*-************************************* * Parameters: Initialization @@ -1044,14 +1056,16 @@ FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but no strm.avail_out = (uInt)writeJob->bufferSize; } } if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", - (unsigned)(inFileSize>>20), - (double)outFileSize/inFileSize*100) + DISPLAYUPDATE_PROGRESS( + "\rRead : %u MB ==> %.2f%% ", + (unsigned)(inFileSize>>20), + (double)outFileSize/inFileSize*100) } else { - DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ", - (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), - (double)outFileSize/inFileSize*100); - } } + DISPLAYUPDATE_PROGRESS( + "\rRead : %u / %u MB ==> %.2f%% ", + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/inFileSize*100); + } } while (1) { int const ret = deflate(&strm, Z_FINISH); @@ -1141,11 +1155,11 @@ FIO_compressLzmaFrame(cRess_t* ress, strm.avail_out = writeJob->bufferSize; } } if (srcFileSize == UTIL_FILESIZE_UNKNOWN) - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), (double)outFileSize/inFileSize*100) else - DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), (double)outFileSize/inFileSize*100); if (ret == LZMA_STREAM_END) break; @@ -1225,11 +1239,11 @@ FIO_compressLz4Frame(cRess_t* ress, srcFileName, LZ4F_getErrorName(outSize)); outFileSize += outSize; if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), (double)outFileSize/inFileSize*100) } else { - DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), (double)outFileSize/inFileSize*100); } @@ -1364,32 +1378,34 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced); /* display progress notifications */ + DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */ if (g_display_prefs.displayLevel >= 3) { - DISPLAYUPDATE(3, "\r(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ", - compressionLevel, - buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix, - consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix, - produced_hrs.precision, produced_hrs.value, produced_hrs.suffix, - cShare ); - } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) { + /* Verbose progress update */ + DISPLAY_PROGRESS( + "(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ", + compressionLevel, + buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix, + consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix, + produced_hrs.precision, produced_hrs.value, produced_hrs.suffix, + cShare ); + } else { /* Require level 2 or forcibly displayed progress counter for summarized updates */ - DISPLAYLEVEL(1, "\r%79s\r", ""); /* Clear out the current displayed line */ if (fCtx->nbFilesTotal > 1) { size_t srcFileNameSize = strlen(srcFileName); /* Ensure that the string we print is roughly the same size each time */ if (srcFileNameSize > 18) { const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; - DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ", + DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ", fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName); } else { - DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ", + DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ", fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName); } } - DISPLAYLEVEL(1, "Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix); + DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix); if (fileSize != UTIL_FILESIZE_UNKNOWN) - DISPLAYLEVEL(2, "/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix); - DISPLAYLEVEL(1, " ==> %2.f%%", cShare); + DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix); + DISPLAY_PROGRESS(" ==> %2.f%%", cShare); DELAY_NEXT_UPDATE(); } @@ -1555,20 +1571,18 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx, /* Status */ fCtx->totalBytesInput += (size_t)readsize; fCtx->totalBytesOutput += (size_t)compressedfilesize; - DISPLAYLEVEL(2, "\r%79s\r", ""); - if (g_display_prefs.displayLevel >= 2 && - !fCtx->hasStdoutOutput && - (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) { + DISPLAY_PROGRESS("\r%79s\r", ""); + if (FIO_shouldDisplayFileSummary(fCtx)) { UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize); UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize); if (readsize == 0) { - DISPLAYLEVEL(2,"%-20s : (%6.*f%s => %6.*f%s, %s) \n", + DISPLAY_SUMMARY("%-20s : (%6.*f%s => %6.*f%s, %s) \n", srcFileName, hr_isize.precision, hr_isize.value, hr_isize.suffix, hr_osize.precision, hr_osize.value, hr_osize.suffix, dstFileName); } else { - DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n", + DISPLAY_SUMMARY("%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n", srcFileName, (double)compressedfilesize / (double)readsize * 100, hr_isize.precision, hr_isize.value, hr_isize.suffix, @@ -1917,16 +1931,23 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal); } - if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) { + if (FIO_shouldDisplayMultipleFileSummary(fCtx)) { UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput); UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput); - DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "%3d files compressed :%.2f%% (%6.*f%4s => %6.*f%4s)\n", - fCtx->nbFilesProcessed, - (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, - hr_isize.precision, hr_isize.value, hr_isize.suffix, - hr_osize.precision, hr_osize.value, hr_osize.suffix); + DISPLAY_PROGRESS("\r%79s\r", ""); + if (fCtx->totalBytesInput == 0) { + DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n", + fCtx->nbFilesProcessed, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix); + } else { + DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n", + fCtx->nbFilesProcessed, + (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix); + } } FIO_freeCResources(&ress); @@ -2067,7 +2088,6 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, ZSTD_inBuffer inBuff = { ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 }; ZSTD_outBuffer outBuff= { writeJob->buffer, writeJob->bufferSize, 0 }; size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); - const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2; UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize); if (ZSTD_isError(readSizeHint)) { DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", @@ -2085,14 +2105,15 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, size_t srcFileNameSize = strlen(srcFileName); if (srcFileNameSize > 18) { const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; - DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ", - fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix); + DISPLAYUPDATE_PROGRESS( + "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix); } else { - DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ", + DISPLAYUPDATE_PROGRESS("\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ", fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix); } } else { - DISPLAYUPDATE(displayLevel, "\r%-20.20s : %.*f%s... ", + DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s... ", srcFileName, hrs.precision, hrs.value, hrs.suffix); } @@ -2307,7 +2328,7 @@ FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName) AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); filesize += decodedBytes; hrs = UTIL_makeHumanReadableSize(filesize); - DISPLAYUPDATE(2, "\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix); + DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix); } if (!nextToLoad) break; @@ -2415,13 +2436,9 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, /* Final Status */ fCtx->totalBytesOutput += (size_t)filesize; - DISPLAYLEVEL(2, "\r%79s\r", ""); - /* No status message in pipe mode (stdin - stdout) or multi-files mode */ - if ((g_display_prefs.displayLevel >= 2 && fCtx->nbFilesTotal <= 1) || - g_display_prefs.displayLevel >= 3 || - g_display_prefs.progressSetting == FIO_ps_always) { - DISPLAYLEVEL(1, "\r%-20s: %llu bytes \n", srcFileName, filesize); - } + DISPLAY_PROGRESS("\r%79s\r", ""); + if (FIO_shouldDisplayFileSummary(fCtx)) + DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize); return 0; } @@ -2730,8 +2747,10 @@ FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal); } - if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0) - DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput); + if (FIO_shouldDisplayMultipleFileSummary(fCtx)) { + DISPLAY_PROGRESS("\r%79s\r", ""); + DISPLAY_SUMMARY("%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput); + } FIO_freeDResources(ress); return error; diff --git a/programs/fileio_common.h b/programs/fileio_common.h index 282c2f13b..b82d5b6b1 100644 --- a/programs/fileio_common.h +++ b/programs/fileio_common.h @@ -48,6 +48,14 @@ extern UTIL_time_t g_displayClock; if (g_display_prefs.displayLevel>=4) fflush(stderr); \ } } } +#define SHOULD_DISPLAY_SUMMARY() \ + (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) +#define SHOULD_DISPLAY_PROGRESS() \ + (g_display_prefs.progressSetting != FIO_ps_never && SHOULD_DISPLAY_SUMMARY()) +#define DISPLAY_PROGRESS(...) { if (SHOULD_DISPLAY_PROGRESS()) { DISPLAYLEVEL(1, __VA_ARGS__); }} +#define DISPLAYUPDATE_PROGRESS(...) { if (SHOULD_DISPLAY_PROGRESS()) { DISPLAYUPDATE(1, __VA_ARGS__); }} +#define DISPLAY_SUMMARY(...) { if (SHOULD_DISPLAY_SUMMARY()) { DISPLAYLEVEL(1, __VA_ARGS__); } } + #undef MIN /* in case it would be already defined */ #define MIN(a,b) ((a) < (b) ? (a) : (b)) diff --git a/tests/cli-tests/progress/no-progress.sh b/tests/cli-tests/progress/no-progress.sh new file mode 100755 index 000000000..708878f26 --- /dev/null +++ b/tests/cli-tests/progress/no-progress.sh @@ -0,0 +1,46 @@ +#!/bin/sh + +#!/bin/sh + +. "$COMMON/platform.sh" + +set -e + +echo hello > hello +echo world > world + +zstd -q hello world + +println >&2 "Tests cases where progress information should not be printed" + +for args in \ + "" \ + "--fake-stderr-is-console -q" \ + "--fake-stderr-is-console -qq --progress" \ + "--no-progress --fake-stderr-is-console" \ + "--no-progress --fake-stderr-is-console -v" +do + println >&2 "args = $args" + println >&2 "compress file to file" + zstd $args -f hello + println >&2 "compress pipe to pipe" + zstd $args < hello > $INTOVOID + println >&2 "compress pipe to file" + zstd $args < hello -fo hello.zst + println >&2 "compress file to pipe" + zstd $args hello -c > $INTOVOID + println >&2 "compress 2 files" + zstd $args -f hello world + + println >&2 "decompress file to file" + zstd $args -d -f hello.zst + println >&2 "decompress pipe to pipe" + zstd $args -d < hello.zst > $INTOVOID + println >&2 "decompress pipe to file" + zstd $args -d < hello.zst -fo hello + println >&2 "decompress file to pipe" + zstd $args -d hello.zst -c > $INTOVOID + println >&2 "decompress 2 files" + zstd $args -d -f hello.zst world.zst + println >&2 "" +done diff --git a/tests/cli-tests/progress/no-progress.sh.stderr.glob b/tests/cli-tests/progress/no-progress.sh.stderr.glob new file mode 100644 index 000000000..f07ad3803 --- /dev/null +++ b/tests/cli-tests/progress/no-progress.sh.stderr.glob @@ -0,0 +1,90 @@ +Tests cases where progress information should not be printed +args = +compress file to file +compress pipe to pipe +compress pipe to file +compress file to pipe +compress 2 files +decompress file to file +decompress pipe to pipe +decompress pipe to file +decompress file to pipe +decompress 2 files + +args = --fake-stderr-is-console -q +compress file to file +compress pipe to pipe +compress pipe to file +compress file to pipe +compress 2 files +decompress file to file +decompress pipe to pipe +decompress pipe to file +decompress file to pipe +decompress 2 files + +args = --fake-stderr-is-console -qq --progress +compress file to file +compress pipe to pipe +compress pipe to file +compress file to pipe +compress 2 files +decompress file to file +decompress pipe to pipe +decompress pipe to file +decompress file to pipe +decompress 2 files + +args = --no-progress --fake-stderr-is-console +compress file to file +hello*hello.zst* +compress pipe to pipe +compress pipe to file +*stdin*hello.zst* +compress file to pipe +compress 2 files +2 files compressed* +decompress file to file +hello.zst* +decompress pipe to pipe +decompress pipe to file +*stdin* +decompress file to pipe +decompress 2 files +2 files decompressed* + +args = --no-progress --fake-stderr-is-console -v +compress file to file +*zstd* +hello*hello.zst* +compress pipe to pipe +*zstd* +*stdin*stdout* +compress pipe to file +*zstd* +*stdin*hello.zst* +compress file to pipe +*zstd* +*hello*stdout* +compress 2 files +*zstd* +*hello*hello.zst* +*world*world.zst* +2 files compressed* +decompress file to file +*zstd* +hello.zst* +decompress pipe to pipe +*zstd* +*stdin* +decompress pipe to file +*zstd* +*stdin* +decompress file to pipe +*zstd* +hello.zst* +decompress 2 files +*zstd* +hello.zst* +world.zst* +2 files decompressed* diff --git a/tests/cli-tests/progress/progress.sh b/tests/cli-tests/progress/progress.sh new file mode 100755 index 000000000..eb464993a --- /dev/null +++ b/tests/cli-tests/progress/progress.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +. "$COMMON/platform.sh" + +set -e + +println >&2 "Tests cases where progress information should be printed" + +echo hello > hello +echo world > world + +zstd -q hello world + +for args in \ + "--progress" \ + "--fake-stderr-is-console" \ + "--progress --fake-stderr-is-console -q"; do + println >&2 "args = $args" + println >&2 "compress file to file" + zstd $args -f hello + println >&2 "compress pipe to pipe" + zstd $args < hello > $INTOVOID + println >&2 "compress pipe to file" + zstd $args < hello -fo hello.zst + println >&2 "compress file to pipe" + zstd $args hello -c > $INTOVOID + println >&2 "compress 2 files" + zstd $args -f hello world + + println >&2 "decompress file to file" + zstd $args -d -f hello.zst + println >&2 "decompress pipe to pipe" + zstd $args -d < hello.zst > $INTOVOID + println >&2 "decompress pipe to file" + zstd $args -d < hello.zst -fo hello + println >&2 "decompress file to pipe" + zstd $args -d hello.zst -c > $INTOVOID + println >&2 "decompress 2 files" + zstd $args -d -f hello.zst world.zst + println >&2 "" +done diff --git a/tests/cli-tests/progress/progress.sh.stderr.glob b/tests/cli-tests/progress/progress.sh.stderr.glob new file mode 100644 index 000000000..ca620d3c2 --- /dev/null +++ b/tests/cli-tests/progress/progress.sh.stderr.glob @@ -0,0 +1,62 @@ +Tests cases where progress information should be printed +args = --progress +compress file to file +*Read:*hello*hello.zst* +compress pipe to pipe +*Read:*stdin*stdout* +compress pipe to file +*Read:*stdin*hello.zst* +compress file to pipe +*Read:*hello*stdout* +compress 2 files +*Read*2 files compressed* +decompress file to file +*hello.zst*hello.zst* +decompress pipe to pipe +*stdin*stdin* +decompress pipe to file +*stdin*stdin* +decompress file to pipe +*hello.zst*hello.zst* +decompress 2 files +*hello.zst*2 files decompressed* + +args = --fake-stderr-is-console +compress file to file +*Read:*hello*hello.zst* +compress pipe to pipe +compress pipe to file +*Read:*stdin*hello.zst* +compress file to pipe +compress 2 files +*Read*2 files compressed* +decompress file to file +*hello.zst*hello.zst* +decompress pipe to pipe +decompress pipe to file +*stdin*stdin* +decompress file to pipe +decompress 2 files +*hello.zst*2 files decompressed* + +args = --progress --fake-stderr-is-console -q +compress file to file +*Read:*hello*hello.zst* +compress pipe to pipe +*Read:*stdin*stdout* +compress pipe to file +*Read:*stdin*hello.zst* +compress file to pipe +*Read:*hello*stdout* +compress 2 files +*Read*2 files compressed* +decompress file to file +*hello.zst*hello.zst* +decompress pipe to pipe +*stdin*stdin* +decompress pipe to file +*stdin*stdin* +decompress file to pipe +*hello.zst*hello.zst* +decompress 2 files +*hello.zst*2 files decompressed* From a78c91ae59b9487fc32224b67c4a854dc3720367 Mon Sep 17 00:00:00 2001 From: "Alex Xu (Hello71)" Date: Sun, 28 Nov 2021 21:54:24 -0500 Subject: [PATCH 13/16] Use proper unaligned access attributes Instead of using packed attribute hack, just use aligned attribute. It improves code generation on armv6 and armv7, and slightly improves code generation on aarch64. GCC generates identical code to regular aligned access on ARMv6 for all versions between 4.5 and trunk, except GCC 5 which is buggy and generates the same (bad) code as packed access: https://gcc.godbolt.org/z/hq37rz7sb --- contrib/linux-kernel/Makefile | 1 - lib/common/mem.h | 49 ++++++++++++----------------------- lib/legacy/zstd_v01.c | 30 +++++++++------------ lib/legacy/zstd_v02.c | 32 ++++++++++------------- lib/legacy/zstd_v03.c | 32 ++++++++++------------- lib/legacy/zstd_v04.c | 32 ++++++++++------------- lib/legacy/zstd_v05.c | 34 ++++++++++-------------- lib/legacy/zstd_v06.c | 30 +++++++++------------ lib/legacy/zstd_v07.c | 30 +++++++++------------ tests/fuzz/fuzz.h | 3 +-- 10 files changed, 106 insertions(+), 167 deletions(-) diff --git a/contrib/linux-kernel/Makefile b/contrib/linux-kernel/Makefile index baa1f24c6..f80ee8653 100644 --- a/contrib/linux-kernel/Makefile +++ b/contrib/linux-kernel/Makefile @@ -34,7 +34,6 @@ libzstd: -DFSE_STATIC_LINKING_ONLY \ -DHUF_STATIC_LINKING_ONLY \ -DXXH_STATIC_LINKING_ONLY \ - -DMEM_FORCE_MEMORY_ACCESS=0 \ -D__GNUC__ \ -D__linux__=1 \ -DSTATIC_BMI2=0 \ diff --git a/lib/common/mem.h b/lib/common/mem.h index 4b10f7c1f..493782f6f 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -133,21 +133,15 @@ MEM_STATIC size_t MEM_swapST(size_t in); /*-************************************************************** * Memory I/O Implementation *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -190,30 +184,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) - __pragma( pack(push, 1) ) - typedef struct { U16 v; } unalign16; - typedef struct { U32 v; } unalign32; - typedef struct { U64 v; } unalign64; - typedef struct { size_t v; } unalignArch; - __pragma( pack(pop) ) -#else - typedef struct { U16 v; } __attribute__((packed)) unalign16; - typedef struct { U32 v; } __attribute__((packed)) unalign32; - typedef struct { U64 v; } __attribute__((packed)) unalign64; - typedef struct { size_t v; } __attribute__((packed)) unalignArch; -#endif +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; +typedef __attribute__((aligned(1))) size_t unalignArch; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } -MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index 23caaef56..3a8b6bddd 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -190,21 +190,15 @@ typedef signed long long S64; /**************************************************************** * Memory I/O *****************************************************************/ -/* FSE_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* FSE_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 */ #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define FSE_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -229,13 +223,13 @@ static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +static U16 FSE_read16(const void* ptr) { return *(const unalign16*)ptr; } +static U32 FSE_read32(const void* ptr) { return *(const unalign32*)ptr; } +static U64 FSE_read64(const void* ptr) { return *(const unalign64*)ptr; } #else diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index 6eadd3948..05766d203 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -115,21 +115,15 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -155,15 +149,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c index 72fc9140d..15cf5f3d7 100644 --- a/lib/legacy/zstd_v03.c +++ b/lib/legacy/zstd_v03.c @@ -116,21 +116,15 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -156,15 +150,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index a2d281eb9..d4594292d 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -87,21 +87,15 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -127,15 +121,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index 216c67d01..06aec20a0 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -106,21 +106,15 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -148,17 +142,17 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index b224355d4..66f256a53 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -108,21 +108,15 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -148,15 +142,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index c12a091cb..71c99ab63 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -268,21 +268,15 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -308,15 +302,15 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } #else diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h index 810daa2ce..6f3fb2994 100644 --- a/tests/fuzz/fuzz.h +++ b/tests/fuzz/fuzz.h @@ -26,8 +26,7 @@ * @param MEM_FORCE_MEMORY_ACCESS: * This flag controls how the zstd library accesses unaligned memory. * It can be undefined, or 0 through 2. If it is undefined, it selects - * the method to use based on the compiler. If testing with UBSAN set - * MEM_FORCE_MEMORY_ACCESS=0 to use the standard compliant method. + * the method to use based on the compiler. * @param FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION * This is the canonical flag to enable deterministic builds for fuzzing. * Changes to zstd for fuzzing are gated behind this define. From 15f32ad74ccf1f10efc93fdc4a180e7eba3e387e Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 14 Dec 2022 15:17:05 -0800 Subject: [PATCH 14/16] [fileio] Separate parameter adaption from display update rate Split the logic for parameter adaption from the logic to update the display rate. This decouples the two updates, so changes to display updates don't affect parameter adaption. Also add a test case that checks that parameter adaption actually happens. This fixes Issue #3353, where --adapt is broken when --no-progress is passed. --- programs/fileio.c | 189 ++++++++++++++------------- programs/fileio_common.h | 4 +- tests/cli-tests/compression/adapt.sh | 8 ++ 3 files changed, 108 insertions(+), 93 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 4c1331432..15a0d5387 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1301,6 +1301,9 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, unsigned inputPresented = 0; unsigned inputBlocked = 0; unsigned lastJobID = 0; + UTIL_time_t lastAdaptTime = UTIL_getTime(); + U64 const adaptEveryMicro = REFRESH_RATE; + UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize); DISPLAYLEVEL(6, "compression using zstd format \n"); @@ -1369,14 +1372,106 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, compressedfilesize += outBuff.pos; } - /* display notification; and adapt compression level */ - if (READY_FOR_UPDATE()) { + /* adaptive mode : statistics measurement and speed correction */ + if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) { + ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); + + lastAdaptTime = UTIL_getTime(); + + /* check output speed */ + if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ + + unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; + unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; + assert(zfp.produced >= previous_zfp_update.produced); + assert(prefs->nbWorkers >= 1); + + /* test if compression is blocked + * either because output is slow and all buffers are full + * or because input is slow and no job can start while waiting for at least one buffer to be filled. + * note : exclude starting part, since currentJobID > 1 */ + if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ + && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ + ) { + DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") + speedChange = slower; + } + + previous_zfp_update = zfp; + + if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ + && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ + ) { + DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); + speedChange = slower; + } + flushWaiting = 0; + } + + /* course correct only if there is at least one new job completed */ + if (zfp.currentJobID > lastJobID) { + DISPLAYLEVEL(6, "compression level adaptation check \n") + + /* check input speed */ + if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ + if (inputBlocked <= 0) { + DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); + speedChange = slower; + } else if (speedChange == noChange) { + unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; + unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; + unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; + unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; + previous_zfp_correction = zfp; + assert(inputPresented > 0); + DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", + inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, + (unsigned)newlyIngested, (unsigned)newlyConsumed, + (unsigned)newlyFlushed, (unsigned)newlyProduced); + if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ + && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ + && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ + ) { + DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", + newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); + speedChange = faster; + } + } + inputBlocked = 0; + inputPresented = 0; + } + + if (speedChange == slower) { + DISPLAYLEVEL(6, "slower speed , higher compression \n") + compressionLevel ++; + if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); + if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; + compressionLevel += (compressionLevel == 0); /* skip 0 */ + ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); + } + if (speedChange == faster) { + DISPLAYLEVEL(6, "faster speed , lighter compression \n") + compressionLevel --; + if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; + compressionLevel -= (compressionLevel == 0); /* skip 0 */ + ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); + } + speedChange = noChange; + + lastJobID = zfp.currentJobID; + } /* if (zfp.currentJobID > lastJobID) */ + } /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */ + + /* display notification */ + if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) { ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed); UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed); UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced); + DELAY_NEXT_UPDATE(); + /* display progress notifications */ DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */ if (g_display_prefs.displayLevel >= 3) { @@ -1406,96 +1501,8 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, if (fileSize != UTIL_FILESIZE_UNKNOWN) DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix); DISPLAY_PROGRESS(" ==> %2.f%%", cShare); - DELAY_NEXT_UPDATE(); } - - /* adaptive mode : statistics measurement and speed correction */ - if (prefs->adaptiveMode) { - - /* check output speed */ - if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ - - unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; - unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; - assert(zfp.produced >= previous_zfp_update.produced); - assert(prefs->nbWorkers >= 1); - - /* test if compression is blocked - * either because output is slow and all buffers are full - * or because input is slow and no job can start while waiting for at least one buffer to be filled. - * note : exclude starting part, since currentJobID > 1 */ - if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ - && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ - ) { - DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") - speedChange = slower; - } - - previous_zfp_update = zfp; - - if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ - && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ - ) { - DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); - speedChange = slower; - } - flushWaiting = 0; - } - - /* course correct only if there is at least one new job completed */ - if (zfp.currentJobID > lastJobID) { - DISPLAYLEVEL(6, "compression level adaptation check \n") - - /* check input speed */ - if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ - if (inputBlocked <= 0) { - DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); - speedChange = slower; - } else if (speedChange == noChange) { - unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; - unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; - unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; - unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; - previous_zfp_correction = zfp; - assert(inputPresented > 0); - DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", - inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, - (unsigned)newlyIngested, (unsigned)newlyConsumed, - (unsigned)newlyFlushed, (unsigned)newlyProduced); - if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ - && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ - && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ - ) { - DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", - newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); - speedChange = faster; - } - } - inputBlocked = 0; - inputPresented = 0; - } - - if (speedChange == slower) { - DISPLAYLEVEL(6, "slower speed , higher compression \n") - compressionLevel ++; - if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); - if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; - compressionLevel += (compressionLevel == 0); /* skip 0 */ - ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); - } - if (speedChange == faster) { - DISPLAYLEVEL(6, "faster speed , lighter compression \n") - compressionLevel --; - if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; - compressionLevel -= (compressionLevel == 0); /* skip 0 */ - ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); - } - speedChange = noChange; - - lastJobID = zfp.currentJobID; - } /* if (zfp.currentJobID > lastJobID) */ - } /* if (g_adaptiveMode) */ - } /* if (READY_FOR_UPDATE()) */ + } /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */ } /* while ((inBuff.pos != inBuff.size) */ } while (directive != ZSTD_e_end); diff --git a/programs/fileio_common.h b/programs/fileio_common.h index b82d5b6b1..aec2e8d56 100644 --- a/programs/fileio_common.h +++ b/programs/fileio_common.h @@ -38,11 +38,11 @@ extern FIO_display_prefs_t g_display_prefs; extern UTIL_time_t g_displayClock; #define REFRESH_RATE ((U64)(SEC_TO_MICRO / 6)) -#define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > REFRESH_RATE) +#define READY_FOR_UPDATE() (UTIL_clockSpanMicro(g_displayClock) > REFRESH_RATE || g_display_prefs.displayLevel >= 4) #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); } #define DISPLAYUPDATE(l, ...) { \ if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \ - if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \ + if (READY_FOR_UPDATE()) { \ DELAY_NEXT_UPDATE(); \ DISPLAY(__VA_ARGS__); \ if (g_display_prefs.displayLevel>=4) fflush(stderr); \ diff --git a/tests/cli-tests/compression/adapt.sh b/tests/cli-tests/compression/adapt.sh index 564e955b5..30b9afaa0 100755 --- a/tests/cli-tests/compression/adapt.sh +++ b/tests/cli-tests/compression/adapt.sh @@ -4,3 +4,11 @@ set -e # Test --adapt zstd -f file --adapt -c | zstd -t + +datagen -g100M > file100M + +# Pick parameters to force fast adaptation, even on slow systems +zstd --adapt -vvvv -19 --zstd=wlog=10 file100M -o /dev/null 2>&1 | grep -q "faster speed , lighter compression" + +# Adaption still happens with --no-progress +zstd --no-progress --adapt -vvvv -19 --zstd=wlog=10 file100M -o /dev/null 2>&1 | grep -q "faster speed , lighter compression" From f31b83ff34236b4c8ec7dc5332c52a7e67952215 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 14 Dec 2022 17:00:54 -0800 Subject: [PATCH 15/16] [decompress] Fix nullptr addition & improve fuzzer Fix an instance of `NULL + 0` in `ZSTD_decompressStream()`. Also, improve our `stream_decompress` fuzzer to pass `NULL` in/out buffers to `ZSTD_decompressStream()`, and fix 2 issues that were immediately surfaced. Fixes #3351 --- lib/decompress/zstd_decompress.c | 9 +++++++-- lib/legacy/zstd_v06.c | 3 ++- lib/legacy/zstd_v07.c | 3 ++- tests/fuzz/stream_decompress.c | 8 ++++---- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index df831b774..e95b8822f 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -2058,6 +2058,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); if (ZSTD_isError(decompressedSize)) return decompressedSize; DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + assert(istart != NULL); ip = istart + cSize; op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */ zds->expected = 0; @@ -2143,6 +2144,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + assert(ip != NULL); ip += neededInSize; /* Function modifies the stage so we must break */ break; @@ -2166,8 +2168,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB "should never happen"); loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); } - ip += loadedSize; - zds->inPos += loadedSize; + if (loadedSize != 0) { + /* ip may be NULL */ + ip += loadedSize; + zds->inPos += loadedSize; + } if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ /* decode loaded input */ diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index 66f256a53..1456250b6 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -4029,7 +4029,8 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd, size_t const toLoad = hSize - zbd->lhSize; /* if hSize!=0, hSize > zbd->lhSize */ if (ZSTDv06_isError(hSize)) return hSize; if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */ - memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip); + if (ip != NULL) + memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip); zbd->lhSize += iend-ip; *dstCapacityPtr = 0; return (hSize - zbd->lhSize) + ZSTDv06_blockHeaderSize; /* remaining header bytes + next block header */ diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index 71c99ab63..9cfba8de9 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -4411,7 +4411,8 @@ size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* zbd, if (hSize != 0) { size_t const toLoad = hSize - zbd->lhSize; /* if hSize!=0, hSize > zbd->lhSize */ if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */ - memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip); + if (ip != NULL) + memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip); zbd->lhSize += iend-ip; *dstCapacityPtr = 0; return (hSize - zbd->lhSize) + ZSTDv07_blockHeaderSize; /* remaining header bytes + next block header */ diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index e0cdd34d9..86a39b8c9 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -99,14 +99,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) while (size > 0) { ZSTD_inBuffer in = makeInBuffer(&src, &size, producer); - while (in.pos != in.size) { + do { + size_t const rc = ZSTD_decompressStream(dstream, &out, &in); + if (ZSTD_isError(rc)) goto error; if (out.pos == out.size) { if (stableOutBuffer) goto error; out = makeOutBuffer(producer, buf, bufSize); } - size_t const rc = ZSTD_decompressStream(dstream, &out, &in); - if (ZSTD_isError(rc)) goto error; - } + } while (in.pos != in.size); } error: From 728e73ebb49e316233cc79f8afe79209eb2a5e90 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 14 Dec 2022 16:07:22 -0800 Subject: [PATCH 16/16] [legacy] Remove FORCE_MEMORY_ACCESS and only use memcpy Delete unaligned memory access code from the legacy codebase by removing all the non-memcpy functions. We don't care about speed at all for this codebase, only simplicity. --- lib/legacy/zstd_v01.c | 33 ------------------------------- lib/legacy/zstd_v02.c | 42 --------------------------------------- lib/legacy/zstd_v03.c | 43 ---------------------------------------- lib/legacy/zstd_v04.c | 42 --------------------------------------- lib/legacy/zstd_v05.c | 46 ------------------------------------------- lib/legacy/zstd_v06.c | 42 --------------------------------------- lib/legacy/zstd_v07.c | 41 -------------------------------------- 7 files changed, 289 deletions(-) diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index 3a8b6bddd..3a0733f7e 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -190,19 +190,6 @@ typedef signed long long S64; /**************************************************************** * Memory I/O *****************************************************************/ -/* FSE_FORCE_MEMORY_ACCESS : For accessing unaligned memory: - * Method 0 : always use `memcpy()`. Safe and portable. - * Method 1 : Use compiler extension to set unaligned access. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * Default : method 1 if supported, else method 0 - */ -#ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# ifdef __GNUC__ -# define FSE_FORCE_MEMORY_ACCESS 1 -# endif -#endif - static unsigned FSE_32bits(void) { @@ -215,24 +202,6 @@ static unsigned FSE_isLittleEndian(void) return one.c[0]; } -#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) - -static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } -static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } -static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } - -#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) - -typedef __attribute__((aligned(1))) U16 unalign16; -typedef __attribute__((aligned(1))) U32 unalign32; -typedef __attribute__((aligned(1))) U64 unalign64; - -static U16 FSE_read16(const void* ptr) { return *(const unalign16*)ptr; } -static U32 FSE_read32(const void* ptr) { return *(const unalign32*)ptr; } -static U64 FSE_read64(const void* ptr) { return *(const unalign64*)ptr; } - -#else - static U16 FSE_read16(const void* memPtr) { U16 val; memcpy(&val, memPtr, sizeof(val)); return val; @@ -248,8 +217,6 @@ static U64 FSE_read64(const void* memPtr) U64 val; memcpy(&val, memPtr, sizeof(val)); return val; } -#endif /* FSE_FORCE_MEMORY_ACCESS */ - static U16 FSE_readLE16(const void* memPtr) { if (FSE_isLittleEndian()) diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index 05766d203..817aaa68f 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -115,18 +115,6 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: - * Method 0 : always use `memcpy()`. Safe and portable. - * Method 1 : Use compiler extension to set unaligned access. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * Default : method 1 if supported, else method 0 - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# ifdef __GNUC__ -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } @@ -137,33 +125,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void) return one.c[0]; } -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -typedef __attribute__((aligned(1))) U16 unalign16; -typedef __attribute__((aligned(1))) U32 unalign32; -typedef __attribute__((aligned(1))) U64 unalign64; - -MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - MEM_STATIC U16 MEM_read16(const void* memPtr) { U16 val; memcpy(&val, memPtr, sizeof(val)); return val; @@ -184,9 +145,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) memcpy(memPtr, &value, sizeof(value)); } -#endif /* MEM_FORCE_MEMORY_ACCESS */ - - MEM_STATIC U16 MEM_readLE16(const void* memPtr) { if (MEM_isLittleEndian()) diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c index 15cf5f3d7..858eb848d 100644 --- a/lib/legacy/zstd_v03.c +++ b/lib/legacy/zstd_v03.c @@ -116,18 +116,6 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: - * Method 0 : always use `memcpy()`. Safe and portable. - * Method 1 : Use compiler extension to set unaligned access. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * Default : method 1 if supported, else method 0 - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# ifdef __GNUC__ -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } @@ -138,33 +126,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void) return one.c[0]; } -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -typedef __attribute__((aligned(1))) U16 unalign16; -typedef __attribute__((aligned(1))) U32 unalign32; -typedef __attribute__((aligned(1))) U64 unalign64; - -MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - MEM_STATIC U16 MEM_read16(const void* memPtr) { U16 val; memcpy(&val, memPtr, sizeof(val)); return val; @@ -185,10 +146,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) memcpy(memPtr, &value, sizeof(value)); } - -#endif /* MEM_FORCE_MEMORY_ACCESS */ - - MEM_STATIC U16 MEM_readLE16(const void* memPtr) { if (MEM_isLittleEndian()) diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index d4594292d..084a67b1d 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -87,18 +87,6 @@ extern "C" { /**************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: - * Method 0 : always use `memcpy()`. Safe and portable. - * Method 1 : Use compiler extension to set unaligned access. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * Default : method 1 if supported, else method 0 - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# ifdef __GNUC__ -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } @@ -109,33 +97,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void) return one.c[0]; } -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -typedef __attribute__((aligned(1))) U16 unalign16; -typedef __attribute__((aligned(1))) U32 unalign32; -typedef __attribute__((aligned(1))) U64 unalign64; - -MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - MEM_STATIC U16 MEM_read16(const void* memPtr) { U16 val; memcpy(&val, memPtr, sizeof(val)); return val; @@ -156,9 +117,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) memcpy(memPtr, &value, sizeof(value)); } -#endif /* MEM_FORCE_MEMORY_ACCESS */ - - MEM_STATIC U16 MEM_readLE16(const void* memPtr) { if (MEM_isLittleEndian()) diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index 06aec20a0..e48895a5c 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -106,18 +106,6 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: - * Method 0 : always use `memcpy()`. Safe and portable. - * Method 1 : Use compiler extension to set unaligned access. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * Default : method 1 if supported, else method 0 - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# ifdef __GNUC__ -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } @@ -128,37 +116,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void) return one.c[0]; } -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard, by lying on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -typedef __attribute__((aligned(1))) U16 unalign16; -typedef __attribute__((aligned(1))) U32 unalign32; -typedef __attribute__((aligned(1))) U64 unalign64; - -MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - MEM_STATIC U16 MEM_read16(const void* memPtr) { U16 val; memcpy(&val, memPtr, sizeof(val)); return val; @@ -189,9 +146,6 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) memcpy(memPtr, &value, sizeof(value)); } -#endif /* MEM_FORCE_MEMORY_ACCESS */ - - MEM_STATIC U16 MEM_readLE16(const void* memPtr) { if (MEM_isLittleEndian()) diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index 1456250b6..ca68cb432 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -108,18 +108,6 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: - * Method 0 : always use `memcpy()`. Safe and portable. - * Method 1 : Use compiler extension to set unaligned access. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * Default : method 1 if supported, else method 0 - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# ifdef __GNUC__ -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } @@ -130,33 +118,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void) return one.c[0]; } -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard, by lying on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -typedef __attribute__((aligned(1))) U16 unalign16; -typedef __attribute__((aligned(1))) U32 unalign32; -typedef __attribute__((aligned(1))) U64 unalign64; - -MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - MEM_STATIC U16 MEM_read16(const void* memPtr) { U16 val; memcpy(&val, memPtr, sizeof(val)); return val; @@ -177,9 +138,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) memcpy(memPtr, &value, sizeof(value)); } - -#endif /* MEM_FORCE_MEMORY_ACCESS */ - MEM_STATIC U32 MEM_swap32(U32 in) { #if defined(_MSC_VER) /* Visual Studio */ diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index 9cfba8de9..274af788b 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -268,18 +268,6 @@ extern "C" { /*-************************************************************** * Memory I/O *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: - * Method 0 : always use `memcpy()`. Safe and portable. - * Method 1 : Use compiler extension to set unaligned access. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * Default : method 1 if supported, else method 0 - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# ifdef __GNUC__ -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } @@ -290,33 +278,6 @@ MEM_STATIC unsigned MEM_isLittleEndian(void) return one.c[0]; } -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard, by lying on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -typedef __attribute__((aligned(1))) U16 unalign16; -typedef __attribute__((aligned(1))) U32 unalign32; -typedef __attribute__((aligned(1))) U64 unalign64; - -MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - MEM_STATIC U16 MEM_read16(const void* memPtr) { U16 val; memcpy(&val, memPtr, sizeof(val)); return val; @@ -337,8 +298,6 @@ MEM_STATIC void MEM_write16(void* memPtr, U16 value) memcpy(memPtr, &value, sizeof(value)); } -#endif /* MEM_FORCE_MEMORY_ACCESS */ - MEM_STATIC U32 MEM_swap32(U32 in) { #if defined(_MSC_VER) /* Visual Studio */