From 1df9f36c6c6cea08778d45a4adaf60e2433439a3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 9 Mar 2023 17:48:35 -0800 Subject: [PATCH] Improved seekable format ingestion speed for small frame size As reported by @P-E-Meunier in https://github.com/facebook/zstd/issues/2662#issuecomment-1443836186, seekable format ingestion speed can be particularly slow when selected `FRAME_SIZE` is very small, especially in combination with the recent row_hash compression mode. The specific scenario mentioned was `pijul`, using frame sizes of 256 bytes and level 10. This is improved in this PR, by providing approximate parameter adaptation to the compression process. Tested locally on a M1 laptop, ingestion of `enwik8` using `pijul` parameters went from 35sec. (before this PR) to 2.5sec (with this PR). For the specific corner case of a file full of zeroes, this is even more pronounced, going from 45sec. to 0.5sec. These benefits are unrelated to (and come on top of) other improvement efforts currently being made by @yoniko for the row_hash compression method specifically. The `seekable_compress` test program has been updated to allows setting compression level, in order to produce these performance results. --- .../examples/parallel_compression.c | 2 +- .../examples/parallel_processing.c | 2 +- .../examples/seekable_compression.c | 17 ++++++++++------- .../examples/seekable_decompression.c | 2 +- contrib/seekable_format/tests/seekable_tests.c | 2 +- contrib/seekable_format/zstd_seekable.h | 4 ++-- contrib/seekable_format/zstdseek_compress.c | 2 ++ 7 files changed, 18 insertions(+), 13 deletions(-) diff --git a/contrib/seekable_format/examples/parallel_compression.c b/contrib/seekable_format/examples/parallel_compression.c index 0ec9fbd20..4e06fae32 100644 --- a/contrib/seekable_format/examples/parallel_compression.c +++ b/contrib/seekable_format/examples/parallel_compression.c @@ -25,7 +25,7 @@ #include "pool.h" // use zstd thread pool for demo -#include "zstd_seekable.h" +#include "../zstd_seekable.h" static void* malloc_orDie(size_t size) { diff --git a/contrib/seekable_format/examples/parallel_processing.c b/contrib/seekable_format/examples/parallel_processing.c index b1709db77..356561e5a 100644 --- a/contrib/seekable_format/examples/parallel_processing.c +++ b/contrib/seekable_format/examples/parallel_processing.c @@ -29,7 +29,7 @@ #include "pool.h" // use zstd thread pool for demo -#include "zstd_seekable.h" +#include "../zstd_seekable.h" #define MIN(a, b) ((a) < (b) ? (a) : (b)) diff --git a/contrib/seekable_format/examples/seekable_compression.c b/contrib/seekable_format/examples/seekable_compression.c index 182b46f64..c3d227dd0 100644 --- a/contrib/seekable_format/examples/seekable_compression.c +++ b/contrib/seekable_format/examples/seekable_compression.c @@ -13,7 +13,7 @@ #define ZSTD_STATIC_LINKING_ONLY #include // presumes zstd library is installed -#include "zstd_seekable.h" +#include "../zstd_seekable.h" static void* malloc_orDie(size_t size) { @@ -112,20 +112,23 @@ static char* createOutFilename_orDie(const char* filename) return (char*)outSpace; } -int main(int argc, const char** argv) { +#define CLEVEL_DEFAULT 5 +int main(int argc, const char** argv) +{ const char* const exeName = argv[0]; - if (argc!=3) { - printf("wrong arguments\n"); - printf("usage:\n"); - printf("%s FILE FRAME_SIZE\n", exeName); + if (argc<3 || argc>4) { + printf("wrong arguments \n"); + printf("usage: \n"); + printf("%s FILE FRAME_SIZE [LEVEL] \n", exeName); return 1; } { const char* const inFileName = argv[1]; unsigned const frameSize = (unsigned)atoi(argv[2]); + int const cLevel = (argc==4) ? atoi(argv[3]) : CLEVEL_DEFAULT; char* const outFileName = createOutFilename_orDie(inFileName); - compressFile_orDie(inFileName, outFileName, 5, frameSize); + compressFile_orDie(inFileName, outFileName, cLevel, frameSize); free(outFileName); } diff --git a/contrib/seekable_format/examples/seekable_decompression.c b/contrib/seekable_format/examples/seekable_decompression.c index 2c4f3ba0c..7edbca87d 100644 --- a/contrib/seekable_format/examples/seekable_decompression.c +++ b/contrib/seekable_format/examples/seekable_decompression.c @@ -16,7 +16,7 @@ #include // presumes zstd library is installed #include -#include "zstd_seekable.h" +#include "../zstd_seekable.h" #define MIN(a, b) ((a) < (b) ? (a) : (b)) diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c index 1bb2d0e81..c2e1619f4 100644 --- a/contrib/seekable_format/tests/seekable_tests.c +++ b/contrib/seekable_format/tests/seekable_tests.c @@ -4,7 +4,7 @@ #include #include -#include "zstd_seekable.h" +#include "../zstd_seekable.h" /* Basic unit tests for zstd seekable format */ int main(int argc, const char** argv) diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h index d2807cfbd..ef2957588 100644 --- a/contrib/seekable_format/zstd_seekable.h +++ b/contrib/seekable_format/zstd_seekable.h @@ -15,8 +15,8 @@ extern "C" { #define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U -/* Limit the maximum size to avoid any potential issues storing the compressed size */ -#define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U +/* Limit maximum size to avoid potential issues storing the compressed size */ +#define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x40000000U /*-**************************************************************************** * Seekable Format diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c index 113f6f99c..4997807d2 100644 --- a/contrib/seekable_format/zstdseek_compress.c +++ b/contrib/seekable_format/zstdseek_compress.c @@ -230,6 +230,8 @@ size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* const BYTE* const inBase = (const BYTE*) input->src + input->pos; size_t inLen = input->size - input->pos; + assert(zcs->maxFrameSize < INT_MAX); + ZSTD_CCtx_setParameter(zcs->cstream, ZSTD_c_srcSizeHint, (int)zcs->maxFrameSize); inLen = MIN(inLen, (size_t)(zcs->maxFrameSize - zcs->frameDSize)); /* if we haven't finished flushing the last frame, don't start writing a new one */