diff --git a/.cirrus.yml b/.cirrus.yml new file mode 100644 index 000000000..506647a00 --- /dev/null +++ b/.cirrus.yml @@ -0,0 +1,16 @@ +env: + CIRRUS_CLONE_DEPTH: 1 + ARCH: amd64 + +task: + freebsd_instance: + matrix: + image: freebsd-12-0-release-amd64 + image: freebsd-11-2-release-amd64 + install_script: + - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf + - pkg upgrade -y + - pkg install -y gmake coreutils + script: | + MOREFLAGS="-Werror" gmake -j all + gmake shortest diff --git a/CHANGELOG b/CHANGELOG index 2a3942278..0c09b4ad3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ +dev +api : Rename ZSTD_CCtxParam_getParameter to ZSTD_CCtxParams_getParameter +api : Rename ZSTD_CCtxParam_setParameter to ZSTD_CCtxParams_setParameter + v1.3.8 perf: better decompression speed on large files (+7%) and cold dictionaries (+15%) perf: slightly better compression ratio at high compression modes diff --git a/README.md b/README.md index 4b6d19e7e..65dcd1821 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww [![Build Status][travisDevBadge]][travisLink] [![Build status][AppveyorDevBadge]][AppveyorLink] [![Build status][CircleDevBadge]][CircleLink] +[![Build status][CirrusDevBadge]][CirrusLink] [travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite" [travisLink]: https://travis-ci.org/facebook/zstd @@ -21,6 +22,8 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww [AppveyorLink]: https://ci.appveyor.com/project/YannCollet/zstd-p0yf0 [CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite" [CircleLink]: https://circleci.com/gh/facebook/zstd +[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev +[CirrusLink]: https://cirrus-ci.com/github/facebook/zstd ## Benchmarks diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index b99bb699c..508bee378 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -149,7 +149,8 @@ if (ZSTD_BUILD_SHARED) libzstd_shared PROPERTIES OUTPUT_NAME zstd - SOVERSION ${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}) + VERSION ${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH} + SOVERSION ${zstd_VERSION_MAJOR}) endif () if (ZSTD_BUILD_STATIC) diff --git a/build/meson/lib/meson.build b/build/meson/lib/meson.build index a02bd2d7a..f8014c625 100644 --- a/build/meson/lib/meson.build +++ b/build/meson/lib/meson.build @@ -98,7 +98,7 @@ if use_debug if cc_id == compiler_gcc or cc_id == compiler_clang libzstd_debug_cflags = ['-Wstrict-aliasing=1', '-Wswitch-enum', '-Wdeclaration-after-statement', '-Wstrict-prototypes', - '-Wundef', '-Wpointer-arith', '-Wformat-security', '-Wvla', + '-Wundef', '-Wpointer-arith', '-Wvla', '-Wformat=2', '-Winit-self', '-Wfloat-equal', '-Wwrite-strings', '-Wredundant-decls', '-Wmissing-prototypes', '-Wc++-compat'] endif diff --git a/contrib/adaptive-compression/Makefile b/contrib/adaptive-compression/Makefile index c26efcd28..2c6867f5c 100644 --- a/contrib/adaptive-compression/Makefile +++ b/contrib/adaptive-compression/Makefile @@ -13,7 +13,7 @@ CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ CFLAGS ?= -O3 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef -Wformat-security \ + -Wstrict-prototypes -Wundef \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls CFLAGS += $(DEBUGFLAGS) diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile index 541f3969b..6fc382563 100644 --- a/contrib/largeNbDicts/Makefile +++ b/contrib/largeNbDicts/Makefile @@ -18,7 +18,7 @@ CFLAGS ?= -O3 CFLAGS += -std=gnu99 DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum \ - -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ + -Wstrict-prototypes -Wundef -Wpointer-arith \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile index ace1294f8..c1d2c4cc4 100644 --- a/doc/educational_decoder/Makefile +++ b/doc/educational_decoder/Makefile @@ -7,7 +7,7 @@ CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ CFLAGS ?= -O3 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef -Wformat-security \ + -Wstrict-prototypes -Wundef \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls CFLAGS += $(DEBUGFLAGS) diff --git a/examples/Makefile b/examples/Makefile index 25a0a62c2..cd995f2f8 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -25,28 +25,28 @@ all: simple_compression simple_decompression \ $(LIB) : $(MAKE) -C ../lib libzstd.a -simple_compression : simple_compression.c utils.h $(LIB) +simple_compression : simple_compression.c common.h $(LIB) $(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@ -simple_decompression : simple_decompression.c utils.h $(LIB) +simple_decompression : simple_decompression.c common.h $(LIB) $(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@ -multiple_simple_compression : multiple_simple_compression.c utils.h $(LIB) +multiple_simple_compression : multiple_simple_compression.c common.h $(LIB) $(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@ -dictionary_compression : dictionary_compression.c utils.h $(LIB) +dictionary_compression : dictionary_compression.c common.h $(LIB) $(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@ -dictionary_decompression : dictionary_decompression.c utils.h $(LIB) +dictionary_decompression : dictionary_decompression.c common.h $(LIB) $(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@ -streaming_compression : streaming_compression.c utils.h $(LIB) +streaming_compression : streaming_compression.c common.h $(LIB) $(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@ -multiple_streaming_compression : multiple_streaming_compression.c utils.h $(LIB) +multiple_streaming_compression : multiple_streaming_compression.c common.h $(LIB) $(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@ -streaming_decompression : streaming_decompression.c utils.h $(LIB) +streaming_decompression : streaming_decompression.c common.h $(LIB) $(CC) $(CPPFLAGS) $(CFLAGS) $< $(LIB) $(LDFLAGS) -o $@ streaming_memory_usage : streaming_memory_usage.c $(LIB) diff --git a/examples/utils.h b/examples/common.h similarity index 82% rename from examples/utils.h rename to examples/common.h index 77c7a4f0c..a714cbb72 100644 --- a/examples/utils.h +++ b/examples/common.h @@ -11,15 +11,15 @@ /* * This header file has common utility functions used in examples. */ -#ifndef UTILS_H -#define UTILS_H +#ifndef COMMON_H +#define COMMON_H #include // malloc, free, exit #include // fprintf, perror, fopen, etc. -#include // strlen, strcat, memset, strerror +#include // strerror #include // errno -#include // assert #include // stat +#include /* * Define the returned error code from utility functions. @@ -34,7 +34,34 @@ typedef enum { ERROR_saveFile = 7, ERROR_malloc = 8, ERROR_largeFile = 9, -} UTILS_ErrorCode; +} COMMON_ErrorCode; + +/*! CHECK + * Check that the condition holds. If it doesn't print a message and die. + */ +#define CHECK(cond, ...) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, \ + "%s:%d CHECK(%s) failed: ", \ + __FILE__, \ + __LINE__, \ + #cond); \ + fprintf(stderr, "" __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + exit(1); \ + } \ + } while (0) + +/*! CHECK_ZSTD + * Check the zstd error code and die if an error occurred after printing a + * message. + */ +#define CHECK_ZSTD(fn, ...) \ + do { \ + size_t const err = (fn); \ + CHECK(!ZSTD_isError(err), "%s", ZSTD_getErrorName(err)); \ + } while (0) /*! fsize_orDie() : * Get the size of a given file path. @@ -153,7 +180,7 @@ static void* malloc_orDie(size_t size) static size_t loadFile_orDie(const char* fileName, void* buffer, size_t bufferSize) { size_t const fileSize = fsize_orDie(fileName); - assert(fileSize <= bufferSize); + CHECK(fileSize <= bufferSize, "File too large!"); FILE* const inFile = fopen_orDie(fileName, "rb"); size_t const readSize = fread(buffer, 1, fileSize, inFile); diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c index 3c4a5bd1e..9efdb785c 100644 --- a/examples/dictionary_compression.c +++ b/examples/dictionary_compression.c @@ -7,13 +7,11 @@ * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ -#include // malloc, exit #include // printf -#include // strerror -#include // errno -#include // stat +#include // free +#include // memset, strcat #include // presumes zstd library is installed -#include "utils.h" +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() /* createDict() : `dictFileName` is supposed to have been created using `zstd --train` */ @@ -23,10 +21,7 @@ static ZSTD_CDict* createCDict_orDie(const char* dictFileName, int cLevel) printf("loading dictionary %s \n", dictFileName); void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize); ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, cLevel); - if (!cdict) { - fprintf(stderr, "ZSTD_createCDict error \n"); - exit(7); - } + CHECK(cdict != NULL, "ZSTD_createCDict() failed!"); free(dictBuffer); return cdict; } @@ -39,13 +34,16 @@ static void compress(const char* fname, const char* oname, const ZSTD_CDict* cdi size_t const cBuffSize = ZSTD_compressBound(fSize); void* const cBuff = malloc_orDie(cBuffSize); + /* Compress using the dictionary. + * This function writes the dictionary id, and content size into the header. + * But, it doesn't use a checksum. You can control these options using the + * advanced API: ZSTD_CCtx_setParameter(), ZSTD_CCtx_refCDict(), + * and ZSTD_compress2(). + */ ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - if (cctx==NULL) { fprintf(stderr, "ZSTD_createCCtx() error \n"); exit(10); } + CHECK(cctx != NULL, "ZSTD_createCCtx() failed!"); size_t const cSize = ZSTD_compress_usingCDict(cctx, cBuff, cBuffSize, fBuff, fSize, cdict); - if (ZSTD_isError(cSize)) { - fprintf(stderr, "error compressing %s : %s \n", fname, ZSTD_getErrorName(cSize)); - exit(7); - } + CHECK_ZSTD(cSize); saveFile_orDie(oname, cBuff, cSize); diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c index 243e22236..f683bbb43 100644 --- a/examples/dictionary_decompression.c +++ b/examples/dictionary_decompression.c @@ -9,15 +9,10 @@ */ - -#include // malloc, exit #include // printf -#include // strerror -#include // errno -#include // stat -#define ZSTD_STATIC_LINKING_ONLY // ZSTD_findDecompressedSize +#include // free #include // presumes zstd library is installed -#include "utils.h" +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() /* createDict() : `dictFileName` is supposed to have been created using `zstd --train` */ @@ -27,7 +22,7 @@ static ZSTD_DDict* createDict_orDie(const char* dictFileName) printf("loading dictionary %s \n", dictFileName); void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize); ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictSize); - if (ddict==NULL) { fprintf(stderr, "ZSTD_createDDict error \n"); exit(5); } + CHECK(ddict != NULL, "ZSTD_createDDict() failed!"); free(dictBuffer); return ddict; } @@ -36,24 +31,40 @@ static void decompress(const char* fname, const ZSTD_DDict* ddict) { size_t cSize; void* const cBuff = mallocAndLoadFile_orDie(fname, &cSize); - unsigned long long const rSize = ZSTD_findDecompressedSize(cBuff, cSize); - if (rSize==ZSTD_CONTENTSIZE_ERROR) { - fprintf(stderr, "%s : it was not compressed by zstd.\n", fname); - exit(5); - } else if (rSize==ZSTD_CONTENTSIZE_UNKNOWN) { - fprintf(stderr, "%s : original size unknown \n", fname); - exit(6); - } - + /* Read the content size from the frame header. For simplicity we require + * that it is always present. By default, zstd will write the content size + * in the header when it is known. If you can't guarantee that the frame + * content size is always written into the header, either use streaming + * decompression, or ZSTD_decompressBound(). + */ + unsigned long long const rSize = ZSTD_getFrameContentSize(cBuff, cSize); + CHECK(rSize != ZSTD_CONTENTSIZE_ERROR, "%s: not compressed by zstd!", fname); + CHECK(rSize != ZSTD_CONTENTSIZE_UNKNOWN, "%s: original size unknown!", fname); void* const rBuff = malloc_orDie((size_t)rSize); + /* Check that the dictionary ID matches. + * If a non-zstd dictionary is used, then both will be zero. + * By default zstd always writes the dictionary ID into the frame. + * Zstd will check if there is a dictionary ID mismatch as well. + */ + unsigned const expectedDictID = ZSTD_getDictID_fromDDict(ddict); + unsigned const actualDictID = ZSTD_getDictID_fromFrame(cBuff, cSize); + CHECK(actualDictID == expectedDictID, + "DictID mismatch: expected %u got %u", + expectedDictID, + actualDictID); + + /* Decompress using the dictionary. + * If you need to control the decompression parameters, then use the + * advanced API: ZSTD_DCtx_setParameter(), ZSTD_DCtx_refDDict(), and + * ZSTD_decompressDCtx(). + */ ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - if (dctx==NULL) { fprintf(stderr, "ZSTD_createDCtx() error \n"); exit(10); } + CHECK(dctx != NULL, "ZSTD_createDCtx() failed!"); size_t const dSize = ZSTD_decompress_usingDDict(dctx, rBuff, rSize, cBuff, cSize, ddict); - if (dSize != rSize) { - fprintf(stderr, "error decoding %s : %s \n", fname, ZSTD_getErrorName(dSize)); - exit(7); - } + CHECK_ZSTD(dSize); + /* When zstd knows the content size, it will error if it doesn't match. */ + CHECK(dSize == rSize, "Impossible because zstd will check this condition!"); /* success */ printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize); diff --git a/examples/multiple_simple_compression.c b/examples/multiple_simple_compression.c index b9bb29a9d..51c9ec725 100644 --- a/examples/multiple_simple_compression.c +++ b/examples/multiple_simple_compression.c @@ -8,13 +8,11 @@ * You may select, at your option, one of the above-listed licenses. */ -#include // malloc, free, exit -#include // fprintf, perror, fopen, etc. -#include // strlen, strcat, memset, strerror -#include // errno -#include // stat +#include // printf +#include // free +#include // memcpy, strlen #include // presumes zstd library is installed -#include "utils.h" +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() typedef struct { void* fBuffer; @@ -52,7 +50,7 @@ static resources createResources_orDie(int argc, const char** argv, char **ofn, ress.fBuffer = malloc_orDie(ress.fBufferSize); ress.cBuffer = malloc_orDie(ress.cBufferSize); ress.cctx = ZSTD_createCCtx(); - if (ress.cctx==NULL) { fprintf(stderr, "ZSTD_createCCtx() error \n"); exit(10); } + CHECK(ress.cctx != NULL, "ZSTD_createCCtx() failed!"); return ress; } @@ -69,16 +67,17 @@ static void compressFile_orDie(resources ress, const char* fname, const char* on { size_t fSize = loadFile_orDie(fname, ress.fBuffer, ress.fBufferSize); + /* Compress using the context. + * If you need more control over parameters, use the advanced API: + * ZSTD_CCtx_setParameter(), and ZSTD_compress2(). + */ size_t const cSize = ZSTD_compressCCtx(ress.cctx, ress.cBuffer, ress.cBufferSize, ress.fBuffer, fSize, 1); - if (ZSTD_isError(cSize)) { - fprintf(stderr, "error compressing %s : %s \n", fname, ZSTD_getErrorName(cSize)); - exit(8); - } + CHECK_ZSTD(cSize); saveFile_orDie(oname, ress.cBuffer, cSize); /* success */ - // printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname); + printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname); } int main(int argc, const char** argv) @@ -102,7 +101,7 @@ int main(int argc, const char** argv) for (argNb = 1; argNb < argc; argNb++) { const char* const inFilename = argv[argNb]; size_t const inFilenameLen = strlen(inFilename); - assert(inFilenameLen + 5 <= outFilenameBufferLen); + CHECK(inFilenameLen + 5 <= outFilenameBufferLen, "File name too long!"); memcpy(outFilename, inFilename, inFilenameLen); memcpy(outFilename+inFilenameLen, ".zst", 5); compressFile_orDie(ress, inFilename, outFilename); diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c index 442ff40ae..ad98b1bd1 100644 --- a/examples/multiple_streaming_compression.c +++ b/examples/multiple_streaming_compression.c @@ -14,66 +14,80 @@ * All structures and buffers will be created only once, * and shared across all compression operations */ -#include // malloc, exit -#include // fprintf, perror, feof -#include // strerror -#include // errno -#define ZSTD_STATIC_LINKING_ONLY // streaming API defined as "experimental" for the time being +#include // printf +#include // free +#include // memset, strcat #include // presumes zstd library is installed -#include "utils.h" +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() typedef struct { void* buffIn; void* buffOut; size_t buffInSize; size_t buffOutSize; - ZSTD_CStream* cstream; -} resources ; + ZSTD_CCtx* cctx; +} resources; -static resources createResources_orDie() +static resources createResources_orDie(int cLevel) { resources ress; ress.buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */ ress.buffOutSize= ZSTD_CStreamOutSize(); /* can always flush a full block */ ress.buffIn = malloc_orDie(ress.buffInSize); ress.buffOut= malloc_orDie(ress.buffOutSize); - ress.cstream = ZSTD_createCStream(); - if (ress.cstream==NULL) { fprintf(stderr, "ZSTD_createCStream() error \n"); exit(10); } + ress.cctx = ZSTD_createCCtx(); + CHECK(ress.cctx != NULL, "ZSTD_createCCtx() failed!"); + + /* Set any compression parameters you want here. + * They will persist for every compression operation. + * Here we set the compression level, and enable the checksum. + */ + CHECK_ZSTD( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); + CHECK_ZSTD( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, 1) ); return ress; } static void freeResources(resources ress) { - ZSTD_freeCStream(ress.cstream); + ZSTD_freeCCtx(ress.cctx); free(ress.buffIn); free(ress.buffOut); } -static void compressFile_orDie(resources ress, const char* fname, const char* outName, int cLevel) +static void compressFile_orDie(resources ress, const char* fname, const char* outName) { + // Open the input and output files. FILE* const fin = fopen_orDie(fname, "rb"); FILE* const fout = fopen_orDie(outName, "wb"); - size_t const initResult = ZSTD_initCStream(ress.cstream, cLevel); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } + /* Reset the context to a clean state to start a new compression operation. + * The parameters are sticky, so we keep the compression level and extra + * parameters that we set in createResources_orDie(). + */ + CHECK_ZSTD( ZSTD_CCtx_reset(ress.cctx, ZSTD_reset_session_only) ); + + size_t const toRead = ress.buffInSize; + size_t read; + while ( (read = fread_orDie(ress.buffIn, toRead, fin)) ) { + /* This loop is the same as streaming_compression.c. + * See that file for detailed comments. + */ + int const lastChunk = (read < toRead); + ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue; - size_t read, toRead = ress.buffInSize; - while( (read = fread_orDie(ress.buffIn, toRead, fin)) ) { ZSTD_inBuffer input = { ress.buffIn, read, 0 }; - while (input.pos < input.size) { + int finished; + do { ZSTD_outBuffer output = { ress.buffOut, ress.buffOutSize, 0 }; - toRead = ZSTD_compressStream(ress.cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */ - if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); } - if (toRead > ress.buffInSize) toRead = ress.buffInSize; /* Safely handle when `buffInSize` is manually changed to a smaller value */ + size_t const remaining = ZSTD_compressStream2(ress.cctx, &output, &input, mode); + CHECK_ZSTD(remaining); fwrite_orDie(ress.buffOut, output.pos, fout); - } + finished = lastChunk ? (remaining == 0) : (input.pos == input.size); + } while (!finished); + CHECK(input.pos == input.size, + "Impossible: zstd only returns 0 when the input is completely consumed!"); } - ZSTD_outBuffer output = { ress.buffOut, ress.buffOutSize, 0 }; - size_t const remainingToFlush = ZSTD_endStream(ress.cstream, &output); /* close frame */ - if (remainingToFlush) { fprintf(stderr, "not fully flushed"); exit(13); } - fwrite_orDie(ress.buffOut, output.pos, fout); - fclose_orDie(fout); fclose_orDie(fin); } @@ -89,7 +103,8 @@ int main(int argc, const char** argv) return 1; } - resources const ress = createResources_orDie(); + int const cLevel = 7; + resources const ress = createResources_orDie(cLevel); void* ofnBuffer = NULL; size_t ofnbSize = 0; @@ -106,7 +121,7 @@ int main(int argc, const char** argv) memset(ofnBuffer, 0, ofnSize); strcat(ofnBuffer, ifn); strcat(ofnBuffer, ".zst"); - compressFile_orDie(ress, ifn, ofnBuffer, 7); + compressFile_orDie(ress, ifn, ofnBuffer); } freeResources(ress); diff --git a/examples/simple_compression.c b/examples/simple_compression.c index 829cbd7d8..019a143d4 100644 --- a/examples/simple_compression.c +++ b/examples/simple_compression.c @@ -8,13 +8,11 @@ * You may select, at your option, one of the above-listed licenses. */ -#include // malloc, free, exit -#include // fprintf, perror, fopen, etc. -#include // strlen, strcat, memset, strerror -#include // errno -#include // stat +#include // printf +#include // free +#include // strlen, strcat, memset #include // presumes zstd library is installed -#include "utils.h" +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() static void compress_orDie(const char* fname, const char* oname) { @@ -23,11 +21,12 @@ static void compress_orDie(const char* fname, const char* oname) size_t const cBuffSize = ZSTD_compressBound(fSize); void* const cBuff = malloc_orDie(cBuffSize); + /* Compress. + * If you are doing many compressions, you may want to reuse the context. + * See the multiple_simple_compression.c example. + */ size_t const cSize = ZSTD_compress(cBuff, cBuffSize, fBuff, fSize, 1); - if (ZSTD_isError(cSize)) { - fprintf(stderr, "error compressing %s : %s \n", fname, ZSTD_getErrorName(cSize)); - exit(8); - } + CHECK_ZSTD(cSize); saveFile_orDie(oname, cBuff, cSize); diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c index e7949376c..1aa57c7b0 100644 --- a/examples/simple_decompression.c +++ b/examples/simple_decompression.c @@ -8,37 +8,36 @@ * You may select, at your option, one of the above-listed licenses. */ -#include // malloc, exit #include // printf -#include // strerror -#include // errno -#include // stat -#define ZSTD_STATIC_LINKING_ONLY // ZSTD_findDecompressedSize +#include // free #include // presumes zstd library is installed -#include "utils.h" +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() static void decompress(const char* fname) { size_t cSize; void* const cBuff = mallocAndLoadFile_orDie(fname, &cSize); - unsigned long long const rSize = ZSTD_findDecompressedSize(cBuff, cSize); - if (rSize==ZSTD_CONTENTSIZE_ERROR) { - fprintf(stderr, "%s : it was not compressed by zstd.\n", fname); - exit(5); - } else if (rSize==ZSTD_CONTENTSIZE_UNKNOWN) { - fprintf(stderr, - "%s : original size unknown. Use streaming decompression instead.\n", fname); - exit(6); - } + /* Read the content size from the frame header. For simplicity we require + * that it is always present. By default, zstd will write the content size + * in the header when it is known. If you can't guarantee that the frame + * content size is always written into the header, either use streaming + * decompression, or ZSTD_decompressBound(). + */ + unsigned long long const rSize = ZSTD_getFrameContentSize(cBuff, cSize); + CHECK(rSize != ZSTD_CONTENTSIZE_ERROR, "%s: not compressed by zstd!", fname); + CHECK(rSize != ZSTD_CONTENTSIZE_UNKNOWN, "%s: original size unknown!", fname); void* const rBuff = malloc_orDie((size_t)rSize); + /* Decompress. + * If you are doing many decompressions, you may want to reuse the context + * and use ZSTD_decompressDCtx(). If you want to set advanced parameters, + * use ZSTD_DCtx_setParameter(). + */ size_t const dSize = ZSTD_decompress(rBuff, rSize, cBuff, cSize); - - if (dSize != rSize) { - fprintf(stderr, "error decoding %s : %s \n", fname, ZSTD_getErrorName(dSize)); - exit(7); - } + CHECK_ZSTD(dSize); + /* When zstd knows the content size, it will error if it doesn't match. */ + CHECK(dSize == rSize, "Impossible because zstd will check this condition!"); /* success */ printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize); diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c index e056f86f0..d1353a684 100644 --- a/examples/streaming_compression.c +++ b/examples/streaming_compression.c @@ -9,54 +9,79 @@ */ -#include // malloc, free, exit -#include // fprintf, perror, feof, fopen, etc. -#include // strlen, memset, strcat +#include // printf +#include // free +#include // memset, strcat, strlen #include // presumes zstd library is installed -#include "utils.h" +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() + static void compressFile_orDie(const char* fname, const char* outName, int cLevel) { + /* Open the input and output files. */ FILE* const fin = fopen_orDie(fname, "rb"); FILE* const fout = fopen_orDie(outName, "wb"); - size_t const buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */ + /* Create the input and output buffers. + * They may be any size, but we recommend using these functions to size them. + * Performance will only suffer significantly for very tiny buffers. + */ + size_t const buffInSize = ZSTD_CStreamInSize(); void* const buffIn = malloc_orDie(buffInSize); - size_t const buffOutSize = ZSTD_CStreamOutSize(); /* can always flush a full block */ + size_t const buffOutSize = ZSTD_CStreamOutSize(); void* const buffOut = malloc_orDie(buffOutSize); - ZSTD_CStream* const cstream = ZSTD_createCStream(); - if (cstream==NULL) { fprintf(stderr, "ZSTD_createCStream() error \n"); exit(10); } - size_t const initResult = ZSTD_initCStream(cstream, cLevel); - if (ZSTD_isError(initResult)) { - fprintf(stderr, "ZSTD_initCStream() error : %s \n", - ZSTD_getErrorName(initResult)); - exit(11); - } + /* Create the context. */ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + CHECK(cctx != NULL, "ZSTD_createCCtx() failed!"); - size_t read, toRead = buffInSize; - while( (read = fread_orDie(buffIn, toRead, fin)) ) { + /* Set any parameters you want. + * Here we set the compression level, and enable the checksum. + */ + CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel) ); + CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) ); + + /* This loop read from the input file, compresses that entire chunk, + * and writes all output produced to the output file. + */ + size_t const toRead = buffInSize; + size_t read; + while ((read = fread_orDie(buffIn, toRead, fin))) { + /* Select the flush mode. + * If the read may not be finished (read == toRead) we use + * ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end. + * Zstd optimizes the case where the first flush mode is ZSTD_e_end, + * since it knows it is compressing the entire source in one pass. + */ + int const lastChunk = (read < toRead); + ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue; + /* Set the input buffer to what we just read. + * We compress until the input buffer is empty, each time flushing the + * output. + */ ZSTD_inBuffer input = { buffIn, read, 0 }; - while (input.pos < input.size) { + int finished; + do { + /* Compress into the output buffer and write all of the output to + * the file so we can reuse the buffer next iteration. + */ ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - toRead = ZSTD_compressStream(cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */ - if (ZSTD_isError(toRead)) { - fprintf(stderr, "ZSTD_compressStream() error : %s \n", - ZSTD_getErrorName(toRead)); - exit(12); - } - if (toRead > buffInSize) toRead = buffInSize; /* Safely handle case when `buffInSize` is manually changed to a value < ZSTD_CStreamInSize()*/ + size_t const remaining = ZSTD_compressStream2(cctx, &output , &input, mode); + CHECK_ZSTD(remaining); fwrite_orDie(buffOut, output.pos, fout); - } + /* If we're on the last chunk we're finished when zstd returns 0, + * which means its consumed all the input AND finished the frame. + * Otherwise, we're finished when we've consumed all the input. + */ + finished = lastChunk ? (remaining == 0) : (input.pos == input.size); + } while (!finished); + CHECK(input.pos == input.size, + "Impossible: zstd only returns 0 when the input is completely consumed!"); } - ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - size_t const remainingToFlush = ZSTD_endStream(cstream, &output); /* close frame */ - if (remainingToFlush) { fprintf(stderr, "not fully flushed"); exit(13); } - fwrite_orDie(buffOut, output.pos, fout); - - ZSTD_freeCStream(cstream); + ZSTD_freeCCtx(cctx); fclose_orDie(fout); - fclose_orDie(fin); free(buffIn); + fclose_orDie(fin); + free(buffIn); free(buffOut); } diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c index 13c3c509e..bcd861b75 100644 --- a/examples/streaming_decompression.c +++ b/examples/streaming_decompression.c @@ -9,12 +9,10 @@ */ -#include // malloc, exit -#include // fprintf, perror, feof -#include // strerror -#include // errno +#include // fprintf +#include // free #include // presumes zstd library is installed -#include "utils.h" +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() static void decompressFile_orDie(const char* fname) { @@ -25,26 +23,40 @@ static void decompressFile_orDie(const char* fname) size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */ void* const buffOut = malloc_orDie(buffOutSize); - ZSTD_DStream* const dstream = ZSTD_createDStream(); - if (dstream==NULL) { fprintf(stderr, "ZSTD_createDStream() error \n"); exit(10); } + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + CHECK(dctx != NULL, "ZSTD_createDCtx() failed!"); - /* In more complex scenarios, a file may consist of multiple appended frames (ex : pzstd). - * The following example decompresses only the first frame. - * It is compatible with other provided streaming examples */ - size_t const initResult = ZSTD_initDStream(dstream); - if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_initDStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } - size_t read, toRead = initResult; + /* This loop assumes that the input file is one or more concatenated zstd + * streams. This example won't work if there is trailing non-zstd data at + * the end, but streaming decompression in general handles this case. + * ZSTD_decompressStream() returns 0 exactly when the frame is completed, + * and doesn't consume input after the frame. + */ + size_t const toRead = buffInSize; + size_t read; while ( (read = fread_orDie(buffIn, toRead, fin)) ) { ZSTD_inBuffer input = { buffIn, read, 0 }; + /* Given a valid frame, zstd won't consume the last byte of the frame + * until it has flushed all of the decompressed data of the frame. + * Therefore, instead of checking if the return code is 0, we can + * decompress just check if input.pos < input.size. + */ while (input.pos < input.size) { ZSTD_outBuffer output = { buffOut, buffOutSize, 0 }; - toRead = ZSTD_decompressStream(dstream, &output , &input); /* toRead : size of next compressed block */ - if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_decompressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); } + /* The return code is zero if the frame is complete, but there may + * be multiple frames concatenated together. Zstd will automatically + * reset the context when a frame is complete. Still, calling + * ZSTD_DCtx_reset() can be useful to reset the context to a clean + * state, for instance if the last decompression call returned an + * error. + */ + size_t const ret = ZSTD_decompressStream(dctx, &output , &input); + CHECK_ZSTD(ret); fwrite_orDie(buffOut, output.pos, fout); } } - ZSTD_freeDStream(dstream); + ZSTD_freeDCtx(dctx); fclose_orDie(fin); fclose_orDie(fout); free(buffIn); diff --git a/examples/streaming_memory_usage.c b/examples/streaming_memory_usage.c index 5e7e13e82..26835788a 100644 --- a/examples/streaming_memory_usage.c +++ b/examples/streaming_memory_usage.c @@ -16,9 +16,10 @@ /*=== Dependencies ===*/ -#include /* printf */ +#include // printf #define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" +#include // presumes zstd library is installed +#include "common.h" // Helper functions, CHECK(), and CHECK_ZSTD() /*=== functions ===*/ @@ -61,90 +62,75 @@ int main(int argc, char const *argv[]) { char const dataToCompress[INPUT_SIZE] = "abcde"; char compressedData[COMPRESSED_SIZE]; char decompressedData[INPUT_SIZE]; - ZSTD_CStream* const cstream = ZSTD_createCStream(); - if (cstream==NULL) { - printf("Level %i : ZSTD_CStream Memory allocation failure \n", compressionLevel); - return 1; - } + /* the ZSTD_CCtx_params structure is a way to save parameters and use + * them across multiple contexts. We use them here so we can call the + * function ZSTD_estimateCStreamSize_usingCCtxParams(). + */ + ZSTD_CCtx_params* const cctxParams = ZSTD_createCCtxParams(); + CHECK(cctxParams != NULL, "ZSTD_createCCtxParams() failed!"); - /* forces compressor to use maximum memory size for given compression level, - * by not providing any information on input size */ - ZSTD_parameters params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); - if (wLog) { /* special mode : specific wLog */ - printf("Using custom compression parameter : level 1 + wLog=%u \n", wLog); - params = ZSTD_getParams(1 /*compressionLevel*/, - 1 << wLog /*estimatedSrcSize*/, - 0 /*no dictionary*/); - size_t const error = ZSTD_initCStream_advanced(cstream, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN); - if (ZSTD_isError(error)) { - printf("ZSTD_initCStream_advanced error : %s \n", ZSTD_getErrorName(error)); - return 1; - } - } else { - size_t const error = ZSTD_initCStream(cstream, compressionLevel); - if (ZSTD_isError(error)) { - printf("ZSTD_initCStream error : %s \n", ZSTD_getErrorName(error)); - return 1; - } - } + /* Set the compression level. */ + CHECK_ZSTD( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_compressionLevel, compressionLevel) ); + /* Set the window log. + * The value 0 means use the default window log, which is equivalent to + * not setting it. + */ + CHECK_ZSTD( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, wLog) ); + /* Force the compressor to allocate the maximum memory size for a given + * level by not providing the pledged source size, or calling + * ZSTD_compressStream2() with ZSTD_e_end. + */ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + CHECK(cctx != NULL, "ZSTD_createCCtx() failed!"); + CHECK_ZSTD( ZSTD_CCtx_setParametersUsingCCtxParams(cctx, cctxParams) ); size_t compressedSize; - { ZSTD_inBuffer inBuff = { dataToCompress, sizeof(dataToCompress), 0 }; + { + ZSTD_inBuffer inBuff = { dataToCompress, sizeof(dataToCompress), 0 }; ZSTD_outBuffer outBuff = { compressedData, sizeof(compressedData), 0 }; - size_t const cError = ZSTD_compressStream(cstream, &outBuff, &inBuff); - if (ZSTD_isError(cError)) { - printf("ZSTD_compressStream error : %s \n", ZSTD_getErrorName(cError)); - return 1; - } - size_t const fError = ZSTD_endStream(cstream, &outBuff); - if (ZSTD_isError(fError)) { - printf("ZSTD_endStream error : %s \n", ZSTD_getErrorName(fError)); - return 1; - } + CHECK_ZSTD( ZSTD_compressStream(cctx, &outBuff, &inBuff) ); + size_t const remaining = ZSTD_endStream(cctx, &outBuff); + CHECK_ZSTD(remaining); + CHECK(remaining == 0, "Frame not flushed!"); compressedSize = outBuff.pos; } - ZSTD_DStream* dstream = ZSTD_createDStream(); - if (dstream==NULL) { - printf("Level %i : ZSTD_DStream Memory allocation failure \n", compressionLevel); - return 1; - } - { size_t const error = ZSTD_initDStream(dstream); - if (ZSTD_isError(error)) { - printf("ZSTD_initDStream error : %s \n", ZSTD_getErrorName(error)); - return 1; - } - } - /* forces decompressor to use maximum memory size, as decompressed size is not known */ + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + CHECK(dctx != NULL, "ZSTD_createDCtx() failed!"); + /* Set the maximum allowed window log. + * The value 0 means use the default window log, which is equivalent to + * not setting it. + */ + CHECK_ZSTD( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, wLog) ); + /* forces decompressor to use maximum memory size, since the + * decompressed size is not stored in the frame header. + */ { ZSTD_inBuffer inBuff = { compressedData, compressedSize, 0 }; ZSTD_outBuffer outBuff = { decompressedData, sizeof(decompressedData), 0 }; - size_t const dResult = ZSTD_decompressStream(dstream, &outBuff, &inBuff); - if (ZSTD_isError(dResult)) { - printf("ZSTD_decompressStream error : %s \n", ZSTD_getErrorName(dResult)); - return 1; - } - if (dResult != 0) { - printf("ZSTD_decompressStream error : unfinished decompression \n"); - return 1; - } - if (outBuff.pos != sizeof(dataToCompress)) { - printf("ZSTD_decompressStream error : incorrect decompression \n"); - return 1; - } + size_t const remaining = ZSTD_decompressStream(dctx, &outBuff, &inBuff); + CHECK_ZSTD(remaining); + CHECK(remaining == 0, "Frame not complete!"); + CHECK(outBuff.pos == sizeof(dataToCompress), "Bad decompression!"); } - size_t const cstreamSize = ZSTD_sizeof_CStream(cstream); - size_t const cstreamEstimatedSize = wLog ? - ZSTD_estimateCStreamSize_usingCParams(params.cParams) : - ZSTD_estimateCStreamSize(compressionLevel); - size_t const dstreamSize = ZSTD_sizeof_DStream(dstream); + size_t const cstreamSize = ZSTD_sizeof_CStream(cctx); + size_t const cstreamEstimatedSize = ZSTD_estimateCStreamSize_usingCCtxParams(cctxParams); + size_t const dstreamSize = ZSTD_sizeof_DStream(dctx); + size_t const dstreamEstimatedSize = ZSTD_estimateDStreamSize_fromFrame(compressedData, compressedSize); - printf("Level %2i : Compression Mem = %5u KB (estimated : %5u KB) ; Decompression Mem = %4u KB \n", + CHECK(cstreamSize <= cstreamEstimatedSize, "Compression mem (%u) > estimated (%u)", + (unsigned)cstreamSize, (unsigned)cstreamEstimatedSize); + CHECK(dstreamSize <= dstreamEstimatedSize, "Decompression mem (%u) > estimated (%u)", + (unsigned)dstreamSize, (unsigned)dstreamEstimatedSize); + + printf("Level %2i : Compression Mem = %5u KB (estimated : %5u KB) ; Decompression Mem = %4u KB (estimated : %5u KB)\n", compressionLevel, - (unsigned)(cstreamSize>>10), (unsigned)(cstreamEstimatedSize>>10), (unsigned)(dstreamSize>>10)); + (unsigned)(cstreamSize>>10), (unsigned)(cstreamEstimatedSize>>10), + (unsigned)(dstreamSize>>10), (unsigned)(dstreamEstimatedSize>>10)); - ZSTD_freeDStream(dstream); - ZSTD_freeCStream(cstream); + ZSTD_freeDCtx(dctx); + ZSTD_freeCCtx(cctx); + ZSTD_freeCCtxParams(cctxParams); if (wLog) break; /* single test */ } return 0; diff --git a/lib/Makefile b/lib/Makefile index 386583e72..404f5b692 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -25,7 +25,7 @@ endif CFLAGS ?= -O3 DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ + -Wstrict-prototypes -Wundef -Wpointer-arith \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls -Wmissing-prototypes -Wc++-compat CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) @@ -56,6 +56,7 @@ ZSTD_FORCE_DECOMPRESS_SHORT ?= 0 ZSTD_FORCE_DECOMPRESS_LONG ?= 0 ZSTD_NO_INLINE ?= 0 ZSTD_STRIP_ERROR_STRINGS ?= 0 +ZSTD_LEGACY_MULTITHREADED_API ?= 0 ifeq ($(ZSTD_LIB_COMPRESSION), 0) ZSTD_LIB_DICTBUILDER = 0 @@ -107,6 +108,10 @@ ifneq ($(ZSTD_STRIP_ERROR_STRINGS), 0) CFLAGS += -DZSTD_STRIP_ERROR_STRINGS endif +ifneq ($(ZSTD_LEGACY_MULTITHREADED_API), 0) + CFLAGS += -DZSTD_LEGACY_MULTITHREADED_API +endif + ifneq ($(ZSTD_LEGACY_SUPPORT), 0) ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0) ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') @@ -151,8 +156,7 @@ ifneq (,$(filter Windows%,$(OS))) LIBZSTD = dll\libzstd.dll $(LIBZSTD): $(ZSTD_FILES) @echo compiling dynamic library $(LIBVER) - @$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -shared $^ -o $@ - dlltool -D $@ -d dll\libzstd.def -l dll\libzstd.lib + $(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.lib -shared $^ -o $@ else diff --git a/lib/README.md b/lib/README.md index 0c9cd6d58..792729b1f 100644 --- a/lib/README.md +++ b/lib/README.md @@ -31,8 +31,6 @@ note that it's necessary to request the `-pthread` flag during link stage. Multithreading capabilities are exposed via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592). -This API is still labelled experimental, -but is expected to become "stable" in the near future. #### API @@ -110,6 +108,10 @@ The file structure is designed to make this selection manually achievable for an which removes the error messages that are otherwise returned by `ZSTD_getErrorName`. +- While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1` + will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in + the shared library, which is now hidden by default. + #### Windows : using MinGW+MSYS to create DLL diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index edeb74b9c..31f756ab5 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -53,8 +53,50 @@ extern "C" { #undef MAX #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) -#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ -#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */ + +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. In order to do that + * (particularly, printing the conditional that failed), this can't just wrap + * RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); /*-************************************* @@ -200,6 +242,17 @@ typedef struct { U32 longLengthPos; } seqStore_t; +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ef55f0323..4a9f6b7c8 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -103,12 +103,31 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) return cctx; } +/** + * Clears and frees all of the dictionaries in the CCtx. + */ +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) +{ + ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; +} + +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) +{ + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; +} + static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) { assert(cctx != NULL); assert(cctx->staticSize == 0); ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; - ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL; + ZSTD_clearAllDicts(cctx); #ifdef ZSTD_MULTITHREAD ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; #endif @@ -117,7 +136,8 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) { if (cctx==NULL) return 0; /* support free on NULL */ - if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); ZSTD_freeCCtxContent(cctx); ZSTD_free(cctx, cctx->customMem); return 0; @@ -139,7 +159,7 @@ size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) { if (cctx==NULL) return 0; /* support sizeof on NULL */ return sizeof(*cctx) + cctx->workSpaceSize - + ZSTD_sizeof_CDict(cctx->cdictLocal) + + ZSTD_sizeof_localDict(cctx->localDict) + ZSTD_sizeof_mtctx(cctx); } @@ -195,7 +215,7 @@ size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) } size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { - if (!cctxParams) { return ERROR(GENERIC); } + RETURN_ERROR_IF(!cctxParams, GENERIC); memset(cctxParams, 0, sizeof(*cctxParams)); cctxParams->compressionLevel = compressionLevel; cctxParams->fParams.contentSizeFlag = 1; @@ -204,8 +224,8 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) { - if (!cctxParams) { return ERROR(GENERIC); } - CHECK_F( ZSTD_checkCParams(params.cParams) ); + RETURN_ERROR_IF(!cctxParams, GENERIC); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); memset(cctxParams, 0, sizeof(*cctxParams)); cctxParams->cParams = params.cParams; cctxParams->fParams = params.fParams; @@ -359,6 +379,12 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; + default: { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; return boundError; @@ -378,10 +404,22 @@ static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) return 1; } -#define BOUNDCHECK(cParam, val) { \ - if (!ZSTD_cParam_withinBounds(cParam,val)) { \ - return ERROR(parameter_outOfBound); \ -} } +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; +} + +#define BOUNDCHECK(cParam, val) { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound); \ +} static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) @@ -413,6 +451,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_ldmBucketSizeLog: case ZSTD_c_ldmHashRateLog: case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: default: return 0; } @@ -425,18 +464,17 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) if (ZSTD_isUpdateAuthorized(param)) { cctx->cParamsChanged = 1; } else { - return ERROR(stage_wrong); + RETURN_ERROR(stage_wrong); } } switch(param) { - case ZSTD_c_format : - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; case ZSTD_c_compressionLevel: - if (cctx->cdict) return ERROR(stage_wrong); - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - case ZSTD_c_windowLog: case ZSTD_c_hashLog: case ZSTD_c_chainLog: @@ -444,49 +482,32 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_minMatch: case ZSTD_c_targetLength: case ZSTD_c_strategy: - if (cctx->cdict) return ERROR(stage_wrong); - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: case ZSTD_c_contentSizeFlag: case ZSTD_c_checksumFlag: case ZSTD_c_dictIDFlag: - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - - case ZSTD_c_forceMaxWindow : /* Force back-references to remain < windowSize, - * even when referencing into Dictionary content. - * default : 0 when using a CDict, 1 when using a Prefix */ - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - + case ZSTD_c_forceMaxWindow: case ZSTD_c_forceAttachDict: - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - - case ZSTD_c_nbWorkers: - if ((value!=0) && cctx->staticSize) { - return ERROR(parameter_unsupported); /* MT not compatible with static alloc */ - } - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - + case ZSTD_c_literalCompressionMode: case ZSTD_c_jobSize: case ZSTD_c_overlapLog: case ZSTD_c_rsyncable: - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - case ZSTD_c_enableLongDistanceMatching: case ZSTD_c_ldmHashLog: case ZSTD_c_ldmMinMatch: case ZSTD_c_ldmBucketSizeLog: - case ZSTD_c_ldmHashRateLog: - if (cctx->cdict) return ERROR(stage_wrong); - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + break; - default: return ERROR(parameter_unsupported); + default: RETURN_ERROR(parameter_unsupported); } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); } -size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams, - ZSTD_cParameter param, int value) +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) { - DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%i, %i)", (int)param, value); + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); switch(param) { case ZSTD_c_format : @@ -495,11 +516,9 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams, return (size_t)CCtxParams->format; case ZSTD_c_compressionLevel : { - int cLevel = value; - if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); - if (cLevel < ZSTD_minCLevel()) cLevel = ZSTD_minCLevel(); - if (cLevel) { /* 0 : does not change current level */ - CCtxParams->compressionLevel = cLevel; + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + if (value) { /* 0 : does not change current level */ + CCtxParams->compressionLevel = value; } if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; return 0; /* return type (size_t) cannot represent negative values */ @@ -573,33 +592,55 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams, return CCtxParams->attachDictPref; } + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } + case ZSTD_c_nbWorkers : #ifndef ZSTD_MULTITHREAD - if (value!=0) return ERROR(parameter_unsupported); + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); return 0; #else - return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + CCtxParams->nbWorkers = value; + return CCtxParams->nbWorkers; #endif case ZSTD_c_jobSize : #ifndef ZSTD_MULTITHREAD - return ERROR(parameter_unsupported); + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; #else - return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value); + /* Adjust to the minimum non-default value. */ + if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) + value = ZSTDMT_JOBSIZE_MIN; + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + assert(value >= 0); + CCtxParams->jobSize = value; + return CCtxParams->jobSize; #endif case ZSTD_c_overlapLog : #ifndef ZSTD_MULTITHREAD - return ERROR(parameter_unsupported); + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; #else - return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapLog, value); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); + CCtxParams->overlapLog = value; + return CCtxParams->overlapLog; #endif case ZSTD_c_rsyncable : #ifndef ZSTD_MULTITHREAD - return ERROR(parameter_unsupported); + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; #else - return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_rsyncable, value); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); + CCtxParams->rsyncable = value; + return CCtxParams->rsyncable; #endif case ZSTD_c_enableLongDistanceMatching : @@ -625,21 +666,21 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams, return CCtxParams->ldmParams.bucketSizeLog; case ZSTD_c_ldmHashRateLog : - if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) - return ERROR(parameter_outOfBound); + RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, + parameter_outOfBound); CCtxParams->ldmParams.hashRateLog = value; return CCtxParams->ldmParams.hashRateLog; - default: return ERROR(parameter_unsupported); + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) { - return ZSTD_CCtxParam_getParameter(&cctx->requestedParams, param, value); + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); } -size_t ZSTD_CCtxParam_getParameter( +size_t ZSTD_CCtxParams_getParameter( ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) { switch(param) @@ -686,6 +727,9 @@ size_t ZSTD_CCtxParam_getParameter( case ZSTD_c_forceAttachDict : *value = CCtxParams->attachDictPref; break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; case ZSTD_c_nbWorkers : #ifndef ZSTD_MULTITHREAD assert(CCtxParams->nbWorkers == 0); @@ -694,7 +738,7 @@ size_t ZSTD_CCtxParam_getParameter( break; case ZSTD_c_jobSize : #ifndef ZSTD_MULTITHREAD - return ERROR(parameter_unsupported); + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); #else assert(CCtxParams->jobSize <= INT_MAX); *value = (int)CCtxParams->jobSize; @@ -702,14 +746,14 @@ size_t ZSTD_CCtxParam_getParameter( #endif case ZSTD_c_overlapLog : #ifndef ZSTD_MULTITHREAD - return ERROR(parameter_unsupported); + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); #else *value = CCtxParams->overlapLog; break; #endif case ZSTD_c_rsyncable : #ifndef ZSTD_MULTITHREAD - return ERROR(parameter_unsupported); + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); #else *value = CCtxParams->rsyncable; break; @@ -729,7 +773,7 @@ size_t ZSTD_CCtxParam_getParameter( case ZSTD_c_ldmHashRateLog : *value = CCtxParams->ldmParams.hashRateLog; break; - default: return ERROR(parameter_unsupported); + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; } @@ -745,8 +789,8 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) { DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); - if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); - if (cctx->cdict) return ERROR(stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->cdict, stage_wrong); cctx->requestedParams = *params; return 0; @@ -755,33 +799,71 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); - if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; return 0; } +/** + * Initializes the local dict using the requested parameters. + * NOTE: This does not use the pledged src size, because it may be used for more + * than one compression. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( + &cctx->requestedParams, 0, dl->dictSize); + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + assert(cctx->cdict == dl->cdict); + /* Local dictionary already initialized. */ + return 0; + } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); + + dl->cdict = ZSTD_createCDict_advanced( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + cParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation); + cctx->cdict = dl->cdict; + return 0; +} + size_t ZSTD_CCtx_loadDictionary_advanced( ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) { - if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); - if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); - ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */ - if (dict==NULL || dictSize==0) { /* no dictionary mode */ - cctx->cdictLocal = NULL; - cctx->cdict = NULL; + ZSTD_clearAllDicts(cctx); /* in case one already exists */ + if (dict == NULL || dictSize == 0) /* no dictionary mode */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; } else { - ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); - cctx->cdictLocal = ZSTD_createCDict_advanced( - dict, dictSize, - dictLoadMethod, dictContentType, - cParams, cctx->customMem); - cctx->cdict = cctx->cdictLocal; - if (cctx->cdictLocal == NULL) - return ERROR(memory_allocation); + void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation); + memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; + cctx->localDict.dict = dictBuffer; } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; return 0; } @@ -801,9 +883,10 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { - if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); cctx->cdict = cdict; - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* exclusive */ return 0; } @@ -815,8 +898,8 @@ size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSiz size_t ZSTD_CCtx_refPrefix_advanced( ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) { - if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); - cctx->cdict = NULL; /* prefix discards any prior cdict */ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + ZSTD_clearAllDicts(cctx); cctx->prefixDict.dict = prefix; cctx->prefixDict.dictSize = prefixSize; cctx->prefixDict.dictContentType = dictContentType; @@ -834,8 +917,8 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { - if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); - cctx->cdict = NULL; + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + ZSTD_clearAllDicts(cctx); return ZSTD_CCtxParams_reset(&cctx->requestedParams); } return 0; @@ -974,8 +1057,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { - /* Estimate CCtx size is supported for single-threaded compression only. */ - if (params->nbWorkers > 0) { return ERROR(GENERIC); } + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); { ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(params, 0, 0); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); @@ -1023,10 +1105,12 @@ size_t ZSTD_estimateCCtxSize(int compressionLevel) size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) { - if (params->nbWorkers > 0) { return ERROR(GENERIC); } - { size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog); - size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize; + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, 0, 0); + size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; size_t const streamingSize = inBuffSize + outBuffSize; @@ -1368,13 +1452,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB", zc->workSpaceSize >> 10, neededSpace >> 10); - /* static cctx : no resize, error out */ - if (zc->staticSize) return ERROR(memory_allocation); + + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); zc->workSpaceSize = 0; ZSTD_free(zc->workSpace, zc->customMem); zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); - if (zc->workSpace == NULL) return ERROR(memory_allocation); + RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation); zc->workSpaceSize = neededSpace; zc->workSpaceOversizedDuration = 0; @@ -1645,7 +1729,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_buffered_policy_e zbuff) { DEBUGLOG(5, "ZSTD_copyCCtx_internal"); - if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong); memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); { ZSTD_CCtx_params params = dstCCtx->requestedParams; @@ -1778,7 +1862,8 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) { U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); - if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall); MEM_writeLE24(dst, cBlockHeader24); memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); return ZSTD_blockHeaderSize + srcSize; @@ -1789,7 +1874,7 @@ static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void BYTE* const ostart = (BYTE* const)dst; U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); switch(flSize) { @@ -1879,7 +1964,7 @@ static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } - if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); { HUF_repeat repeat = prevHuf->repeatMode; int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; @@ -2051,21 +2136,17 @@ static size_t ZSTD_fseBitCost( unsigned s; FSE_CState_t cstate; FSE_initCState(&cstate, ctable); - if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { - DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, + "Repeat FSE_CTable has maxSymbolValue %u < %u", ZSTD_getFSEMaxSymbolValue(ctable), max); - return ERROR(GENERIC); - } for (s = 0; s <= max; ++s) { unsigned const tableLog = cstate.stateLog; unsigned const badCost = (tableLog + 1) << kAccuracyLog; unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); if (count[s] == 0) continue; - if (bitCost >= badCost) { - DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); - return ERROR(GENERIC); - } + RETURN_ERROR_IF(bitCost >= badCost, GENERIC, + "Repeat FSE_CTable has Prob[%u] == 0", s); cost += count[s] * bitCost; } return cost >> kAccuracyLog; @@ -2081,7 +2162,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, BYTE wksp[FSE_NCOUNTBOUND]; S16 norm[MaxSeq + 1]; const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); } @@ -2187,15 +2268,15 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, switch (type) { case set_rle: - CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max)); - if (dstCapacity==0) return ERROR(dstSize_tooSmall); + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); *op = codeTable[0]; return 1; case set_repeat: memcpy(nextCTable, prevCTable, prevCTableSize); return 0; case set_basic: - CHECK_F(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ return 0; case set_compressed: { S16 norm[MaxSeq + 1]; @@ -2206,14 +2287,14 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, nbSeq_1--; } assert(nbSeq_1 > 1); - CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return NCountSize; - CHECK_F(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); + FORWARD_IF_ERROR(NCountSize); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); return NCountSize; } } - default: return assert(0), ERROR(GENERIC); + default: assert(0); RETURN_ERROR(GENERIC); } } @@ -2230,7 +2311,9 @@ ZSTD_encodeSequences_body( FSE_CState_t stateOffsetBits; FSE_CState_t stateLitLength; - CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */ + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", (int)(blockStream.endPtr - blockStream.startPtr), (unsigned)dstCapacity); @@ -2304,7 +2387,7 @@ ZSTD_encodeSequences_body( FSE_flushCState(&blockStream, &stateLitLength); { size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); return streamSize; } } @@ -2369,6 +2452,21 @@ static size_t ZSTD_encodeSequences( sequences, nbSeq, longOffsets); } +static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + /* ZSTD_compressSequences_internal(): * actually compresses both literals and sequences */ MEM_STATIC size_t @@ -2404,22 +2502,22 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; - int const disableLiteralCompression = (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); size_t const cSize = ZSTD_compressLiterals( &prevEntropy->huf, &nextEntropy->huf, - cctxParams->cParams.strategy, disableLiteralCompression, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), op, dstCapacity, literals, litSize, workspace, wkspSize, bmi2); - if (ZSTD_isError(cSize)) - return cSize; + FORWARD_IF_ERROR(cSize); assert(cSize <= dstCapacity); op += cSize; } /* Sequences Header */ - if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall); if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; else if (nbSeq < LONGNBSEQ) @@ -2453,7 +2551,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), workspace, wkspSize); - if (ZSTD_isError(countSize)) return countSize; + FORWARD_IF_ERROR(countSize); if (LLtype == set_compressed) lastNCount = op; op += countSize; @@ -2475,7 +2573,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), workspace, wkspSize); - if (ZSTD_isError(countSize)) return countSize; + FORWARD_IF_ERROR(countSize); if (Offtype == set_compressed) lastNCount = op; op += countSize; @@ -2495,7 +2593,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), workspace, wkspSize); - if (ZSTD_isError(countSize)) return countSize; + FORWARD_IF_ERROR(countSize); if (MLtype == set_compressed) lastNCount = op; op += countSize; @@ -2510,7 +2608,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, CTable_LitLength, llCodeTable, sequences, nbSeq, longOffsets, bmi2); - if (ZSTD_isError(bitstreamSize)) return bitstreamSize; + FORWARD_IF_ERROR(bitstreamSize); op += bitstreamSize; /* zstd versions <= 1.3.4 mistakenly report corruption when * FSE_readNCount() recieves a buffer < 4 bytes. @@ -2553,7 +2651,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr, */ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) return 0; /* block not compressed */ - if (ZSTD_isError(cSize)) return cSize; + FORWARD_IF_ERROR(cSize); /* Check compressibility */ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); @@ -2642,7 +2740,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, goto out; /* don't even attempt compression below a certain srcSize */ } ZSTD_resetSeqStore(&(zc->seqStore)); - ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */ + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; /* a gap between an attached dict and the current window is not safe, * they must remain adjacent, @@ -2680,7 +2781,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, ldmSeqStore.seq = zc->ldmSequences; ldmSeqStore.capacity = zc->maxNbLdmSequences; /* Updates ldmSeqStore.size */ - CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, &zc->appliedParams.ldmParams, src, srcSize)); /* Updates ldmSeqStore.pos */ @@ -2753,8 +2854,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, ZSTD_matchState_t* const ms = &cctx->blockState.matchState; U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) - return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + dstSize_tooSmall, + "not enough space to store compressed block"); if (remaining < blockSize) blockSize = remaining; if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { @@ -2775,11 +2877,11 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, { size_t cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); - if (ZSTD_isError(cSize)) return cSize; + FORWARD_IF_ERROR(cSize); if (cSize == 0) { /* block is not compressible */ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - if (ZSTD_isError(cSize)) return cSize; + FORWARD_IF_ERROR(cSize); } else { U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); MEM_writeLE24(op, cBlockHeader24); @@ -2816,7 +2918,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, size_t pos=0; assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); - if (dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); @@ -2852,7 +2954,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, */ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) { - if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall); { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ MEM_writeLE24(dst, cBlockHeader24); return ZSTD_blockHeaderSize; @@ -2861,10 +2963,9 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) { - if (cctx->stage != ZSTDcs_init) - return ERROR(stage_wrong); - if (cctx->appliedParams.ldmParams.enableLdm) - return ERROR(parameter_unsupported); + RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong); + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + parameter_unsupported); cctx->externSeqStore.seq = seq; cctx->externSeqStore.size = nbSeq; cctx->externSeqStore.capacity = nbSeq; @@ -2883,12 +2984,13 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", cctx->stage, (unsigned)srcSize); - if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); if (frame && (cctx->stage==ZSTDcs_init)) { fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, cctx->pledgedSrcSizePlusOne-1, cctx->dictID); - if (ZSTD_isError(fhSize)) return fhSize; + FORWARD_IF_ERROR(fhSize); dstCapacity -= fhSize; dst = (char*)dst + fhSize; cctx->stage = ZSTDcs_ongoing; @@ -2923,17 +3025,18 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); - if (ZSTD_isError(cSize)) return cSize; + FORWARD_IF_ERROR(cSize); cctx->consumedSrcSize += srcSize; cctx->producedCSize += (cSize + fhSize); assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); - if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) { - DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u", - (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); - return ERROR(srcSize_wrong); - } + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); } return cSize + fhSize; } @@ -2958,7 +3061,7 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); - if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); } @@ -3021,9 +3124,9 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, NOTE: This behavior is not standard and could be improved in the future. */ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { U32 s; - if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted); for (s = 0; s <= maxSymbolValue; ++s) { - if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted); } return 0; } @@ -3061,53 +3164,56 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, { unsigned maxSymbolValue = 255; size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); - if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); - if (maxSymbolValue < 255) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); dictPtr += hufHeaderSize; } { unsigned offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); - if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ /* fill all offset symbols to avoid garbage at end of table */ - CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, - offcodeNCount, MaxOff, offcodeLog, - workspace, HUF_WORKSPACE_SIZE), - dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); - if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); /* Every match length code must have non-zero probability */ - CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); - CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, - matchlengthNCount, matchlengthMaxValue, matchlengthLog, - workspace, HUF_WORKSPACE_SIZE), - dictionary_corrupted); + FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); - if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); /* Every literal length code must have non-zero probability */ - CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); - CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, - litlengthNCount, litlengthMaxValue, litlengthLog, - workspace, HUF_WORKSPACE_SIZE), - dictionary_corrupted); + FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted); dictPtr += litlengthHeaderSize; } - if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); bs->rep[0] = MEM_readLE32(dictPtr+0); bs->rep[1] = MEM_readLE32(dictPtr+4); bs->rep[2] = MEM_readLE32(dictPtr+8); @@ -3120,19 +3226,19 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ } /* All offset values <= dictContentSize + 128 KB must be representable */ - CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); + FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); /* All repCodes must be <= dictContentSize and != 0*/ { U32 u; for (u=0; u<3; u++) { - if (bs->rep[u] == 0) return ERROR(dictionary_corrupted); - if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); } } bs->entropy.huf.repeatMode = HUF_repeat_valid; bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; - CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); return dictID; } } @@ -3162,8 +3268,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, DEBUGLOG(4, "raw content dictionary detected"); return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); } - if (dictContentType == ZSTD_dct_fullDict) - return ERROR(dictionary_wrong); + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); assert(0); /* impossible */ } @@ -3190,13 +3295,13 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } - CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_continue, zbuff) ); { size_t const dictID = ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); - if (ZSTD_isError(dictID)) return dictID; + FORWARD_IF_ERROR(dictID); assert(dictID <= (size_t)(U32)-1); cctx->dictID = (U32)dictID; } @@ -3213,7 +3318,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, { DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); /* compression parameters verification and optimization */ - CHECK_F( ZSTD_checkCParams(params.cParams) ); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, dtlm, cdict, @@ -3261,12 +3366,12 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) size_t fhSize = 0; DEBUGLOG(4, "ZSTD_writeEpilogue"); - if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */ + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); /* special case : empty frame */ if (cctx->stage == ZSTDcs_init) { fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); - if (ZSTD_isError(fhSize)) return fhSize; + FORWARD_IF_ERROR(fhSize); dstCapacity -= fhSize; op += fhSize; cctx->stage = ZSTDcs_ongoing; @@ -3275,7 +3380,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) if (cctx->stage != ZSTDcs_ending) { /* write one last empty block, make it the "last" block */ U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; - if (dstCapacity<4) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); MEM_writeLE32(op, cBlockHeader24); op += ZSTD_blockHeaderSize; dstCapacity -= ZSTD_blockHeaderSize; @@ -3283,7 +3388,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) if (cctx->appliedParams.fParams.checksumFlag) { U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); - if (dstCapacity<4) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); MEM_writeLE32(op, checksum); op += 4; @@ -3301,18 +3406,20 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 1 /* last chunk */); - if (ZSTD_isError(cSize)) return cSize; + FORWARD_IF_ERROR(cSize); endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); - if (ZSTD_isError(endResult)) return endResult; + FORWARD_IF_ERROR(endResult); assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); DEBUGLOG(4, "end of frame : controlling src size"); - if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) { - DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u", - (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); - return ERROR(srcSize_wrong); - } } + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } return cSize + endResult; } @@ -3340,7 +3447,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, ZSTD_parameters params) { DEBUGLOG(4, "ZSTD_compress_advanced"); - CHECK_F(ZSTD_checkCParams(params.cParams)); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams)); return ZSTD_compress_internal(cctx, dst, dstCapacity, src, srcSize, @@ -3357,7 +3464,7 @@ size_t ZSTD_compress_advanced_internal( ZSTD_CCtx_params params) { DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); - CHECK_F( ZSTD_compressBegin_internal(cctx, + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, params, srcSize, ZSTDb_not_buffered) ); return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); @@ -3441,7 +3548,7 @@ static size_t ZSTD_initCDict_internal( void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); cdict->dictBuffer = internalBuffer; cdict->dictContent = internalBuffer; - if (!internalBuffer) return ERROR(memory_allocation); + RETURN_ERROR_IF(!internalBuffer, memory_allocation); memcpy(internalBuffer, dictBuffer, dictSize); } cdict->dictContentSize = dictSize; @@ -3467,7 +3574,7 @@ static size_t ZSTD_initCDict_internal( &cdict->cBlockState, &cdict->matchState, ¶ms, cdict->dictContent, cdict->dictContentSize, dictContentType, ZSTD_dtlm_full, cdict->workspace); - if (ZSTD_isError(dictID)) return dictID; + FORWARD_IF_ERROR(dictID); assert(dictID <= (size_t)(U32)-1); cdict->dictID = (U32)dictID; } @@ -3597,7 +3704,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced( ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); - if (cdict==NULL) return ERROR(dictionary_wrong); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; params.cParams = ZSTD_getCParamsFromCDict(cdict); /* Increase window log to fit the entire dictionary and source if the @@ -3633,7 +3740,7 @@ size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) { - CHECK_F (ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } @@ -3701,7 +3808,7 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - CHECK_F( ZSTD_compressBegin_internal(cctx, + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, ZSTD_dtlm_fast, cdict, params, pledgedSrcSize, @@ -3719,13 +3826,17 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, /* ZSTD_resetCStream(): * pledgedSrcSize == 0 means "unknown" */ -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) { - ZSTD_CCtx_params params = zcs->requestedParams; + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); - if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; - params.fParams.contentSizeFlag = 1; - return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + return 0; } /*! ZSTD_initCStream_internal() : @@ -3737,32 +3848,18 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_initCStream_internal"); - params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + zcs->requestedParams = params; assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - - if (dict && dictSize >= 8) { - DEBUGLOG(4, "loading dictionary of size %u", (unsigned)dictSize); - if (zcs->staticSize) { /* static CCtx : never uses malloc */ - /* incompatible with internal cdict creation */ - return ERROR(memory_allocation); - } - ZSTD_freeCDict(zcs->cdictLocal); - zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - params.cParams, zcs->customMem); - zcs->cdict = zcs->cdictLocal; - if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); + if (dict) { + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); } else { - if (cdict) { - params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict; it includes windowLog */ - } - ZSTD_freeCDict(zcs->cdictLocal); - zcs->cdictLocal = NULL; - zcs->cdict = cdict; + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); } - - return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); + return 0; } /* ZSTD_initCStream_usingCDict_advanced() : @@ -3773,22 +3870,20 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); - if (!cdict) return ERROR(dictionary_wrong); /* cannot handle NULL cdict (does not know what to do) */ - { ZSTD_CCtx_params params = zcs->requestedParams; - params.cParams = ZSTD_getCParamsFromCDict(cdict); - params.fParams = fParams; - return ZSTD_initCStream_internal(zcs, - NULL, 0, cdict, - params, pledgedSrcSize); - } + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + return 0; } /* note : cdict must outlive compression session */ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) { - ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksum */, 0 /* hideDictID */ }; DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); - return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + return 0; } @@ -3798,33 +3893,53 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize) + ZSTD_parameters params, unsigned long long pss) { - DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u", - (unsigned)pledgedSrcSize, params.fParams.contentSizeFlag); - CHECK_F( ZSTD_checkCParams(params.cParams) ); - if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */ + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); - return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, zcs->requestedParams, pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + return 0; } size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel); - return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, zcs->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN); + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + return 0; } size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) { - U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */ - ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel); - return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, zcs->requestedParams, pledgedSrcSize); + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + return 0; } size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) { DEBUGLOG(4, "ZSTD_initCStream"); - return ZSTD_initCStream_srcSize(zcs, compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); + return 0; } /*====== Compression ======*/ @@ -3848,10 +3963,10 @@ static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, * internal function for all *compressStream*() variants * non-static, because can be called from zstdmt_compress.c * @return : hint size for next input */ -size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective const flushMode) +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) { const char* const istart = (const char*)input->src; const char* const iend = istart + input->size; @@ -3874,8 +3989,7 @@ size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, switch(zcs->streamStage) { case zcss_init: - /* call ZSTD_initCStream() first ! */ - return ERROR(init_missing); + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); case zcss_load: if ( (flushMode == ZSTD_e_end) @@ -3885,7 +3999,7 @@ size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, size_t const cSize = ZSTD_compressEnd(zcs, op, oend-op, ip, iend-ip); DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); - if (ZSTD_isError(cSize)) return cSize; + FORWARD_IF_ERROR(cSize); ip = iend; op += cSize; zcs->frameEnded = 1; @@ -3926,7 +4040,7 @@ size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, zcs->inBuff + zcs->inToCompress, iSize) : ZSTD_compressContinue(zcs, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize); - if (ZSTD_isError(cSize)) return cSize; + FORWARD_IF_ERROR(cSize); zcs->frameEnded = lastBlock; /* prepare next block */ zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; @@ -4002,7 +4116,7 @@ static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - CHECK_F( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); return ZSTD_nextInputSizeHint_MTorST(zcs); } @@ -4014,14 +4128,15 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, { DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); /* check conditions */ - if (output->pos > output->size) return ERROR(GENERIC); - if (input->pos > input->size) return ERROR(GENERIC); + RETURN_ERROR_IF(output->pos > output->size, GENERIC); + RETURN_ERROR_IF(input->pos > input->size, GENERIC); assert(cctx!=NULL); /* transparent initialization stage */ if (cctx->streamStage == zcss_init) { ZSTD_CCtx_params params = cctx->requestedParams; ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */ memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); @@ -4040,11 +4155,11 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", params.nbWorkers); cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); - if (cctx->mtctx == NULL) return ERROR(memory_allocation); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); } /* mt compression */ DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); - CHECK_F( ZSTDMT_initCStream_internal( + FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( cctx->mtctx, prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); @@ -4052,7 +4167,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, cctx->appliedParams.nbWorkers = params.nbWorkers; } else #endif - { CHECK_F( ZSTD_resetCStream_internal(cctx, + { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); @@ -4064,20 +4179,30 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, /* compression stage */ #ifdef ZSTD_MULTITHREAD if (cctx->appliedParams.nbWorkers > 0) { + int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + size_t flushMin; + assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); if (cctx->cParamsChanged) { ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); cctx->cParamsChanged = 0; } - { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + do { + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); if ( ZSTD_isError(flushMin) || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); } - DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); - return flushMin; - } } + FORWARD_IF_ERROR(flushMin); + } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); + return flushMin; + } #endif - CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) ); + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); DEBUGLOG(5, "completed ZSTD_compressStream2"); return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ } @@ -4108,10 +4233,10 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, dst, dstCapacity, &oPos, src, srcSize, &iPos, ZSTD_e_end); - if (ZSTD_isError(result)) return result; + FORWARD_IF_ERROR(result); if (result != 0) { /* compression not completed, due to lack of output space */ assert(oPos == dstCapacity); - return ERROR(dstSize_tooSmall); + RETURN_ERROR(dstSize_tooSmall); } assert(iPos == srcSize); /* all input is expected consumed */ return oPos; @@ -4133,7 +4258,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) { ZSTD_inBuffer input = { NULL, 0, 0 }; size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); - CHECK_F( remainingToFlush ); + FORWARD_IF_ERROR( remainingToFlush ); if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ /* single thread mode : attempt to calculate remaining to flush more precisely */ { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 29bca5985..78b53550b 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -54,6 +54,14 @@ typedef struct ZSTD_prefixDict_s { ZSTD_dictContentType_e dictContentType; } ZSTD_prefixDict; +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; + typedef struct { U32 CTable[HUF_CTABLE_SIZE_U32(255)]; HUF_repeat repeatMode; @@ -107,6 +115,7 @@ typedef struct { U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; } optState_t; typedef struct { @@ -188,6 +197,7 @@ struct ZSTD_CCtx_params_s { * 1<cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip0 += (ip0 == prefixStart); + ip1 = ip0 + 1; + { + U32 const maxRep = (U32)(ip0 - prefixStart); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ + size_t mLength; + BYTE const* ip2 = ip0 + 2; + size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); + U32 const val0 = MEM_read32(ip0); + size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); + U32 const val1 = MEM_read32(ip1); + U32 const current0 = (U32)(ip0-base); + U32 const current1 = (U32)(ip1-base); + U32 const matchIndex0 = hashTable[h0]; + U32 const matchIndex1 = hashTable[h1]; + BYTE const* repMatch = ip2-offset_1; + const BYTE* match0 = base + matchIndex0; + const BYTE* match1 = base + matchIndex1; + U32 offcode; + hashTable[h0] = current0; /* update hash table */ + hashTable[h1] = current1; /* update hash table */ + + assert(ip0 + 1 == ip1); + + if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { + mLength = ip2[-1] == repMatch[-1] ? 1 : 0; + ip0 = ip2 - mLength; + match0 = repMatch - mLength; + offcode = 0; + goto _match; + } + if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { + /* found a regular match */ + goto _offset; + } + if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { + /* found a regular match after one literal */ + ip0 = ip1; + match0 = match1; + goto _offset; + } + { + size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 2); + ip0 += step; + ip1 += step; + continue; + } +_offset: /* Requires: ip0, match0 */ + /* Compute the offset code */ + offset_2 = offset_1; + offset_1 = (U32)(ip0-match0); + offcode = offset_1 + ZSTD_REP_MOVE; + mLength = 0; + /* Count the backwards match length */ + while (((ip0>anchor) & (match0>prefixStart)) + && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ + +_match: /* Requires: ip0, match0, offcode */ + /* Count the forward length */ + mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; + ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH); + /* match found */ + ip0 += mLength; + anchor = ip0; + ip1 = ip0 + 1; + + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + while ( (ip0 <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ip1 = ip0 + 1; + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } + } + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return iend - anchor; +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32 const mls = cParams->minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; @@ -64,46 +212,26 @@ size_t ZSTD_compressBlock_fast_generic( U32 offsetSaved = 0; const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = - dictMode == ZSTD_dictMatchState ? - &dms->cParams : NULL; - const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ? - dms->hashTable : NULL; - const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? - dictBase + dictStartIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixStartIndex - (U32)(dictEnd - dictBase) : - 0; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); - const U32 dictHLog = dictMode == ZSTD_dictMatchState ? - dictCParams->hashLog : hlog; - - assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + const U32 dictHLog = dictCParams->hashLog; /* otherwise, we would get index underflow when translating a dict index * into a local index */ - assert(dictMode != ZSTD_dictMatchState - || prefixStartIndex >= (U32)(dictEnd - dictBase)); + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); /* init */ ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const maxRep = (U32)(ip - prefixStart); - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ @@ -113,50 +241,37 @@ size_t ZSTD_compressBlock_fast_generic( U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; const U32 repIndex = current + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixStartIndex) ? + const BYTE* repMatch = (repIndex < prefixStartIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; hashTable[h] = current; /* update hash table */ - if ( (dictMode == ZSTD_dictMatchState) - && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); - } else if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); } else if ( (matchIndex <= prefixStartIndex) ) { - if (dictMode == ZSTD_dictMatchState) { - size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); - U32 const dictMatchIndex = dictHashTable[dictHash]; - const BYTE* dictMatch = dictBase + dictMatchIndex; - if (dictMatchIndex <= dictStartIndex || - MEM_read32(dictMatch) != MEM_read32(ip)) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } else { - /* found a dict match */ - U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); - mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; - while (((ip>anchor) & (dictMatch>dictStart)) - && (ip[-1] == dictMatch[-1])) { - ip--; dictMatch--; mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - } else { + size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); + U32 const dictMatchIndex = dictHashTable[dictHash]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { assert(stepSize >= 1); ip += ((ip-anchor) >> kSearchStrength) + stepSize; continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } else if (MEM_read32(match) != MEM_read32(ip)) { /* it's not a match, and we're not going to check the dictionary */ @@ -185,41 +300,27 @@ size_t ZSTD_compressBlock_fast_generic( hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? - dictBase - dictIndexDelta + repIndex2 : - base + repIndex2; - if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } - } - - if (dictMode == ZSTD_noDict) { - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); - ip += rLength; + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; anchor = ip; - continue; /* faster when present ... (?) */ - } } } } + continue; + } + break; + } + } + } /* save reps for next block */ rep[0] = offset_1 ? offset_1 : offsetSaved; @@ -229,28 +330,6 @@ size_t ZSTD_compressBlock_fast_generic( return iend - anchor; } - -size_t ZSTD_compressBlock_fast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32 const mls = cParams->minMatch; - assert(ms->dictMatchState == NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); - } -} - size_t ZSTD_compressBlock_fast_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -262,13 +341,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); } } diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 44de6e97f..cf2f70b11 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -64,9 +64,15 @@ MEM_STATIC double ZSTD_fCost(U32 price) } #endif +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} + static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) { - optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); @@ -99,6 +105,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, const BYTE* const src, size_t const srcSize, int const optLevel) { + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); optPtr->priceType = zop_dynamic; @@ -113,9 +120,10 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, /* huffman table presumed generated by dictionary */ optPtr->priceType = zop_dynamic; - assert(optPtr->litFreq != NULL); - optPtr->litSum = 0; - { unsigned lit; + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; for (lit=0; lit<=MaxLit; lit++) { U32 const scaleLog = 11; /* scale to 2K */ U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); @@ -163,10 +171,11 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, } else { /* not a dictionary */ assert(optPtr->litFreq != NULL); - { unsigned lit = MaxLit; + if (compressedLiterals) { + unsigned lit = MaxLit; HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); } - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); { unsigned ll; for (ll=0; ll<=MaxLL; ll++) @@ -190,7 +199,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, } else { /* new block : re-use previous statistics, scaled down */ - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); @@ -207,6 +217,10 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, int optLevel) { if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + if (optPtr->priceType == zop_predef) return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ @@ -310,7 +324,8 @@ static void ZSTD_updateStats(optState_t* const optPtr, U32 offsetCode, U32 matchLength) { /* literals */ - { U32 u; + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; for (u=0; u < litLength; u++) optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; @@ -1108,7 +1123,8 @@ static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) /* used in 2-pass strategy */ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) { - optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 2cbd6ffad..7e2c78984 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -672,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription) if (ZSTD_isError(initError)) JOB_ERROR(initError); } else { /* srcStart points at reloaded section */ U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; - { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); + { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); } { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, @@ -864,11 +864,7 @@ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) { * Internal use only */ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) { - if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX; - params->nbWorkers = nbWorkers; - params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT; - params->jobSize = 0; - return nbWorkers; + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); } ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) @@ -986,26 +982,13 @@ ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, { case ZSTDMT_p_jobSize : DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value); - if ( value != 0 /* default */ - && value < ZSTDMT_JOBSIZE_MIN) - value = ZSTDMT_JOBSIZE_MIN; - assert(value >= 0); - if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX; - params->jobSize = value; - return value; - + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value); case ZSTDMT_p_overlapLog : DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value); - if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN; - if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX; - params->overlapLog = value; - return value; - + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value); case ZSTDMT_p_rsyncable : - value = (value != 0); - params->rsyncable = value; - return value; - + DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value); + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value); default : return ERROR(parameter_unsupported); } @@ -1021,32 +1004,29 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, { switch (parameter) { case ZSTDMT_p_jobSize: - assert(mtctx->params.jobSize <= INT_MAX); - *value = (int)(mtctx->params.jobSize); - break; + return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value); case ZSTDMT_p_overlapLog: - *value = mtctx->params.overlapLog; - break; + return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value); case ZSTDMT_p_rsyncable: - *value = mtctx->params.rsyncable; - break; + return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value); default: return ERROR(parameter_unsupported); } - return 0; } /* Sets parameters relevant to the compression job, * initializing others to default values. */ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) { - ZSTD_CCtx_params jobParams; - memset(&jobParams, 0, sizeof(jobParams)); - - jobParams.cParams = params.cParams; - jobParams.fParams = params.fParams; - jobParams.compressionLevel = params.compressionLevel; - + ZSTD_CCtx_params jobParams = params; + /* Clear parameters related to multithreading */ + jobParams.forceWindow = 0; + jobParams.nbWorkers = 0; + jobParams.jobSize = 0; + jobParams.overlapLog = 0; + jobParams.rsyncable = 0; + memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t)); + memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem)); return jobParams; } @@ -1056,7 +1036,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) { if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); - CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) ); + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) ); mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); if (mtctx->bufPool == NULL) return ERROR(memory_allocation); mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); @@ -1263,7 +1243,7 @@ static size_t ZSTDMT_compress_advanced_internal( if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize)) return ERROR(memory_allocation); - CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */ + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */ { unsigned u; for (u=0; uparams.nbWorkers) - CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) ); + FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; @@ -1987,7 +1967,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, assert(input->pos <= input->size); if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ - return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); + return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp); } if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { @@ -2051,7 +2031,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ size_t const jobSize = mtctx->inBuff.filled; assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); - CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); } /* check for potential compressed data ready to be flushed */ @@ -2065,7 +2045,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); + FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); /* recommended next input size : fill current input buffer */ return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ @@ -2082,7 +2062,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", (U32)srcSize, (U32)endFrame); - CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); } /* check if there is any data available to flush */ diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index ee771681f..bae830e28 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -17,10 +17,21 @@ /* Note : This is an internal API. - * Some methods are still exposed (ZSTDLIB_API), + * These APIs used to be exposed with ZSTDLIB_API, * because it used to be the only way to invoke MT compression. * Now, it's recommended to use ZSTD_compress_generic() instead. - * These methods will stop being exposed in a future version */ + * + * If you depend on these APIs and can't switch, then define + * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library. + * However, we may completely remove these functions in a future + * release, so please switch soon. + */ + +#ifdef ZSTD_LEGACY_MULTITHREADED_API +# define ZSTDMT_API ZSTDLIB_API +#else +# define ZSTDMT_API +#endif /* === Dependencies === */ #include /* size_t */ @@ -40,17 +51,17 @@ /* === Memory management === */ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; -ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); -ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, +ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); +ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem); -ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); +ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); -ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); +ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); /* === Simple one-pass compression function === */ -ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, +ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); @@ -59,31 +70,31 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, /* === Streaming functions === */ -ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); -ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */ +ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); +ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */ -ZSTDLIB_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); -ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); +ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ /* === Advanced functions and parameters === */ -ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_parameters params, - int overlapLog); +ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_parameters params, + int overlapLog); -ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, +ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ ZSTD_parameters params, unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ -ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, +ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, const ZSTD_CDict* cdict, ZSTD_frameParameters fparams, unsigned long long pledgedSrcSize); /* note : zero means empty */ @@ -101,12 +112,12 @@ typedef enum { * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value); +ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value); /* ZSTDMT_getMTCtxParameter() : * Query the ZSTDMT_CCtx for a parameter value. * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value); +ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value); /*! ZSTDMT_compressStream_generic() : @@ -116,7 +127,7 @@ ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter * 0 if fully flushed * or an error code * note : needs to be init using any ZSTD_initCStream*() variant */ -ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, +ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input, ZSTD_EndDirective endOp); diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c index 2ad044068..0af3d23bf 100644 --- a/lib/decompress/zstd_ddict.c +++ b/lib/decompress/zstd_ddict.c @@ -105,9 +105,9 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); /* load entropy tables */ - CHECK_E( ZSTD_loadDEntropy(&ddict->entropy, - ddict->dictContent, ddict->dictSize), - dictionary_corrupted ); + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted); ddict->entropyPresent = 1; return 0; } @@ -133,7 +133,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ /* parse dictionary content */ - CHECK_F( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) ); + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) ); return 0; } diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index feef1ef67..d8f14882d 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -150,7 +150,7 @@ ZSTD_DCtx* ZSTD_createDCtx(void) size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) { if (dctx==NULL) return 0; /* support free on NULL */ - if (dctx->staticSize) return ERROR(memory_allocation); /* not compatible with static DCtx */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); { ZSTD_customMem const cMem = dctx->customMem; ZSTD_freeDDict(dctx->ddictLocal); dctx->ddictLocal = NULL; @@ -203,7 +203,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) { size_t const minInputSize = ZSTD_startingInputLength(format); - if (srcSize < minInputSize) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong); { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; U32 const dictID= fhd & 3; @@ -238,7 +238,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ if (srcSize < minInputSize) return minInputSize; - if (src==NULL) return ERROR(GENERIC); /* invalid parameter */ + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); if ( (format != ZSTD_f_zstd1_magicless) && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { @@ -251,7 +251,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s zfhPtr->frameType = ZSTD_skippableFrame; return 0; } - return ERROR(prefix_unknown); + RETURN_ERROR(prefix_unknown); } /* ensure there is enough `srcSize` to fully read/decode frame header */ @@ -269,14 +269,13 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s U64 windowSize = 0; U32 dictID = 0; U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; - if ((fhdByte & 0x08) != 0) - return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */ + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); if (!singleSegment) { BYTE const wlByte = ip[pos++]; U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - if (windowLog > ZSTD_WINDOWLOG_MAX) - return ERROR(frameParameter_windowTooLarge); + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge); windowSize = (1ULL << windowLog); windowSize += (windowSize >> 3) * (wlByte&7); } @@ -348,12 +347,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; U32 sizeU32; - if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) - return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong); sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); - if ((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32) - return ERROR(frameParameter_unsupported); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported); return skippableHeaderSize + sizeU32; } @@ -428,13 +426,89 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he { size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); if (ZSTD_isError(result)) return result; /* invalid header */ - if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ - if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) - return ERROR(dictionary_wrong); + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong); +#endif if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); return 0; } +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} + +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameSizeInfoLegacy(src, srcSize); +#endif + + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; + + if (blockProperties.lastBlock) break; + } + + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } + + frameSizeInfo.compressedSize = ip - ipstart; + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} /** ZSTD_findFrameCompressedSize() : * compatible with legacy mode @@ -443,53 +517,34 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he * @return : the compressed size of the frame starting at `src` */ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) { -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) - return ZSTD_findFrameCompressedSizeLegacy(src, srcSize); -#endif - if ( (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) - && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START ) { - return readSkippableFrameSize(src, srcSize); - } else { - const BYTE* ip = (const BYTE*)src; - const BYTE* const ipstart = ip; - size_t remainingSize = srcSize; - ZSTD_frameHeader zfh; - - /* Extract Frame Header */ - { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); - if (ZSTD_isError(ret)) return ret; - if (ret > 0) return ERROR(srcSize_wrong); - } - - ip += zfh.headerSize; - remainingSize -= zfh.headerSize; - - /* Loop on each block */ - while (1) { - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) return cBlockSize; - - if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) - return ERROR(srcSize_wrong); - - ip += ZSTD_blockHeaderSize + cBlockSize; - remainingSize -= ZSTD_blockHeaderSize + cBlockSize; - - if (blockProperties.lastBlock) break; - } - - if (zfh.checksumFlag) { /* Final frame content checksum */ - if (remainingSize < 4) return ERROR(srcSize_wrong); - ip += 4; - } - - return ip - ipstart; - } + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + return frameSizeInfo.compressedSize; } +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + /*-************************************************************* * Frame decoding @@ -522,9 +577,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, DEBUGLOG(5, "ZSTD_copyRawBlock"); if (dst == NULL) { if (srcSize == 0) return 0; - return ERROR(dstBuffer_null); + RETURN_ERROR(dstBuffer_null); } - if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall); memcpy(dst, src, srcSize); return srcSize; } @@ -535,9 +590,9 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, { if (dst == NULL) { if (regenSize == 0) return 0; - return ERROR(dstBuffer_null); + RETURN_ERROR(dstBuffer_null); } - if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall); memset(dst, b, regenSize); return regenSize; } @@ -560,15 +615,16 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); /* check */ - if (remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize) - return ERROR(srcSize_wrong); + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize, + srcSize_wrong); /* Frame Header */ { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX); if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; - if (remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize) - return ERROR(srcSize_wrong); - CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; } @@ -581,7 +637,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += ZSTD_blockHeaderSize; remainingSrcSize -= ZSTD_blockHeaderSize; - if (cBlockSize > remainingSrcSize) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong); switch(blockProperties.blockType) { @@ -596,7 +652,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, break; case bt_reserved : default: - return ERROR(corruption_detected); + RETURN_ERROR(corruption_detected); } if (ZSTD_isError(decodedSize)) return decodedSize; @@ -609,15 +665,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, } if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { - if ((U64)(op-ostart) != dctx->fParams.frameContentSize) { - return ERROR(corruption_detected); - } } + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected); + } if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); U32 checkRead; - if (remainingSrcSize<4) return ERROR(checksum_wrong); + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong); checkRead = MEM_readLE32(ip); - if (checkRead != checkCalc) return ERROR(checksum_wrong); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong); ip += 4; remainingSrcSize -= 4; } @@ -652,8 +708,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, size_t decodedSize; size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); if (ZSTD_isError(frameSize)) return frameSize; - /* legacy support is not compatible with static dctx */ - if (dctx->staticSize) return ERROR(memory_allocation); + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, + "legacy support is not compatible with static dctx"); decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); if (ZSTD_isError(decodedSize)) return decodedSize; @@ -676,7 +732,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, size_t const skippableSize = readSkippableFrameSize(src, srcSize); if (ZSTD_isError(skippableSize)) return skippableSize; - if (srcSize < skippableSize) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong); src = (const BYTE *)src + skippableSize; srcSize -= skippableSize; @@ -685,29 +741,29 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, if (ddict) { /* we were called from ZSTD_decompress_usingDDict */ - CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict)); } else { /* this will initialize correctly with no dict if dict == NULL, so * use this in all cases but ddict */ - CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); } ZSTD_checkContinuity(dctx, dst); { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); - if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) - && (moreThan1Frame==1) ) { - /* at least one frame successfully completed, - * but following bytes are garbage : - * it's more likely to be a srcSize error, - * specifying more bytes than compressed size of frame(s). - * This error message replaces ERROR(prefix_unknown), - * which would be confusing, as the first header is actually correct. - * Note that one could be unlucky, it might be a corruption error instead, - * happening right at the place where we expect zstd magic bytes. - * But this is _much_ less likely than a srcSize field error. */ - return ERROR(srcSize_wrong); - } + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "at least one frame successfully completed, but following " + "bytes are garbage: it's more likely to be a srcSize error, " + "specifying more bytes than compressed size of frame(s). This " + "error message replaces ERROR(prefix_unknown), which would be " + "confusing, as the first header is actually correct. Note that " + "one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic " + "bytes. But this is _much_ less likely than a srcSize field " + "error."); if (ZSTD_isError(res)) return res; assert(res <= dstCapacity); dst = (BYTE*)dst + res; @@ -716,7 +772,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, moreThan1Frame = 1; } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ - if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */ + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); return (BYTE*)dst - (BYTE*)dststart; } @@ -732,7 +788,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, dctx->ddict); } @@ -741,7 +797,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) size_t regenSize; ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - if (dctx==NULL) return ERROR(memory_allocation); + RETURN_ERROR_IF(dctx==NULL, memory_allocation); regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); ZSTD_freeDCtx(dctx); return regenSize; @@ -791,8 +847,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); /* Sanity check */ - if (srcSize != dctx->expected) - return ERROR(srcSize_wrong); /* not allowed */ + RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed"); if (dstCapacity) ZSTD_checkContinuity(dctx, dst); switch (dctx->stage) @@ -817,7 +872,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decodeFrameHeader: assert(src != NULL); memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); - CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); dctx->expected = ZSTD_blockHeaderSize; dctx->stage = ZSTDds_decodeBlockHeader; return 0; @@ -867,7 +922,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c break; case bt_reserved : /* should never happen */ default: - return ERROR(corruption_detected); + RETURN_ERROR(corruption_detected); } if (ZSTD_isError(rSize)) return rSize; DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); @@ -876,10 +931,10 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); - if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { - if (dctx->decodedSize != dctx->fParams.frameContentSize) { - return ERROR(corruption_detected); - } } + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected); if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ dctx->expected = 4; dctx->stage = ZSTDds_checkChecksum; @@ -900,7 +955,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); U32 const check32 = MEM_readLE32(src); DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - if (check32 != h32) return ERROR(checksum_wrong); + RETURN_ERROR_IF(check32 != h32, checksum_wrong); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; @@ -921,7 +976,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c default: assert(0); /* impossible */ - return ERROR(GENERIC); /* some compiler require default to do something */ + RETURN_ERROR(GENERIC); /* some compiler require default to do something */ } } @@ -945,7 +1000,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; - if (dictSize <= 8) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted); assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ dictPtr += 8; /* skip header = magic + dictID */ @@ -964,16 +1019,16 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, dictPtr, dictEnd - dictPtr, workspace, workspaceSize); #endif - if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted); dictPtr += hSize; } { short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff, offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); - if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted); - if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); ZSTD_buildFSETable( entropy->OFTable, offcodeNCount, offcodeMaxValue, OF_base, OF_bits, @@ -984,9 +1039,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); - if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted); - if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); ZSTD_buildFSETable( entropy->MLTable, matchlengthNCount, matchlengthMaxValue, ML_base, ML_bits, @@ -997,9 +1052,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); - if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted); - if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); ZSTD_buildFSETable( entropy->LLTable, litlengthNCount, litlengthMaxValue, LL_base, LL_bits, @@ -1007,12 +1062,13 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, dictPtr += litlengthHeaderSize; } - if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); { int i; size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); for (i=0; i<3; i++) { U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; - if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(rep==0 || rep >= dictContentSize, + dictionary_corrupted); entropy->rep[i] = rep; } } @@ -1030,7 +1086,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict /* load entropy tables */ { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); - if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted); dict = (const char*)dict + eSize; dictSize -= eSize; } @@ -1064,9 +1120,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - CHECK_F( ZSTD_decompressBegin(dctx) ); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); if (dict && dictSize) - CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted); return 0; } @@ -1085,7 +1143,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) DEBUGLOG(4, "DDict is %s", dctx->ddictIsCold ? "~cold~" : "hot!"); } - CHECK_F( ZSTD_decompressBegin(dctx) ); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); if (ddict) { /* NULL ddict is equivalent to no dictionary */ ZSTD_copyDDictParameters(dctx, ddict); } @@ -1176,11 +1234,11 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) { - if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); ZSTD_freeDDict(dctx->ddictLocal); if (dict && dictSize >= 8) { dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); - if (dctx->ddictLocal == NULL) return ERROR(memory_allocation); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); } else { dctx->ddictLocal = NULL; } @@ -1217,7 +1275,7 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di DEBUGLOG(4, "ZSTD_initDStream_usingDict"); zds->streamStage = zdss_init; zds->noForwardProgress = 0; - CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); return ZSTD_FRAMEHEADERSIZE_PREFIX; } @@ -1254,7 +1312,7 @@ size_t ZSTD_resetDStream(ZSTD_DStream* dctx) size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { - if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); dctx->ddict = ddict; return 0; } @@ -1267,9 +1325,9 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); size_t const min = (size_t)1 << bounds.lowerBound; size_t const max = (size_t)1 << bounds.upperBound; - if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); - if (maxWindowSize < min) return ERROR(parameter_outOfBound); - if (maxWindowSize > max) return ERROR(parameter_outOfBound); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound); dctx->maxWindowSize = maxWindowSize; return 0; } @@ -1311,15 +1369,15 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) } #define CHECK_DBOUNDS(p,v) { \ - if (!ZSTD_dParam_withinBounds(p, v)) \ - return ERROR(parameter_outOfBound); \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \ } size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) { - if (dctx->streamStage != zdss_init) return ERROR(stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); switch(dParam) { case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); dctx->maxWindowSize = ((size_t)1) << value; return 0; @@ -1329,7 +1387,7 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value return 0; default:; } - return ERROR(parameter_unsupported); + RETURN_ERROR(parameter_unsupported); } size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) @@ -1340,8 +1398,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { - if (dctx->streamStage != zdss_init) - return ERROR(stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); dctx->format = ZSTD_f_zstd1; dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; } @@ -1360,7 +1417,8 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); size_t const minRBSize = (size_t) neededSize; - if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge); + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge); return minRBSize; } @@ -1378,9 +1436,9 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) ZSTD_frameHeader zfh; size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); if (ZSTD_isError(err)) return err; - if (err>0) return ERROR(srcSize_wrong); - if (zfh.windowSize > windowSizeMax) - return ERROR(frameParameter_windowTooLarge); + RETURN_ERROR_IF(err>0, srcSize_wrong); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge); return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); } @@ -1406,16 +1464,16 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB U32 someMoreWork = 1; DEBUGLOG(5, "ZSTD_decompressStream"); - if (input->pos > input->size) { /* forbidden */ - DEBUGLOG(5, "in: pos: %u vs size: %u", - (U32)input->pos, (U32)input->size); - return ERROR(srcSize_wrong); - } - if (output->pos > output->size) { /* forbidden */ - DEBUGLOG(5, "out: pos: %u vs size: %u", - (U32)output->pos, (U32)output->size); - return ERROR(dstSize_tooSmall); - } + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); while (someMoreWork) { @@ -1430,8 +1488,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) if (zds->legacyVersion) { - /* legacy support is incompatible with static dctx */ - if (zds->staticSize) return ERROR(memory_allocation); + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); if (hint==0) zds->streamStage = zdss_init; return hint; @@ -1446,9 +1504,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB const void* const dict = zds->ddict ? ZSTD_DDict_dictContent(zds->ddict) : NULL; size_t const dictSize = zds->ddict ? ZSTD_DDict_dictSize(zds->ddict) : 0; DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); - /* legacy support is incompatible with static dctx */ - if (zds->staticSize) return ERROR(memory_allocation); - CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, dict, dictSize)); zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; @@ -1495,13 +1553,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB /* Consume header (see ZSTDds_decodeFrameHeader) */ DEBUGLOG(4, "Consume header"); - CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, zds->ddict)); if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->stage = ZSTDds_skipFrame; } else { - CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); zds->expected = ZSTD_blockHeaderSize; zds->stage = ZSTDds_decodeBlockHeader; } @@ -1511,7 +1569,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB (U32)(zds->fParams.windowSize >>10), (U32)(zds->maxWindowSize >> 10) ); zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge); /* Adapt buffer sizes to frame header instructions */ { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); @@ -1525,14 +1584,15 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (zds->staticSize) { /* static DCtx */ DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ - if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx)) - return ERROR(memory_allocation); + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation); } else { ZSTD_free(zds->inBuff, zds->customMem); zds->inBuffSize = 0; zds->outBuffSize = 0; zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); - if (zds->inBuff == NULL) return ERROR(memory_allocation); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation); } zds->inBuffSize = neededInBuffSize; zds->outBuff = zds->inBuff + zds->inBuffSize; @@ -1574,7 +1634,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (isSkipFrame) { loadedSize = MIN(toLoad, (size_t)(iend-ip)); } else { - if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); } ip += loadedSize; @@ -1615,7 +1677,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB default: assert(0); /* impossible */ - return ERROR(GENERIC); /* some compiler require default to do something */ + RETURN_ERROR(GENERIC); /* some compiler require default to do something */ } } /* result */ @@ -1624,8 +1686,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if ((ip==istart) && (op==ostart)) { /* no forward progress */ zds->noForwardProgress ++; if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { - if (op==oend) return ERROR(dstSize_tooSmall); - if (ip==iend) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(op==oend, dstSize_tooSmall); + RETURN_ERROR_IF(ip==iend, srcSize_wrong); assert(0); } } else { diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 32baad9fb..4418c51db 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -56,14 +56,15 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { - if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong); + { U32 const cBlockHeader = MEM_readLE24(src); U32 const cSize = cBlockHeader >> 3; bpPtr->lastBlock = cBlockHeader & 1; bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); bpPtr->origSize = cSize; /* only useful for RLE */ if (bpPtr->blockType == bt_rle) return 1; - if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected); + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected); return cSize; } } @@ -78,7 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { - if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); { const BYTE* const istart = (const BYTE*) src; symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); @@ -86,11 +87,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, switch(litEncType) { case set_repeat: - if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); /* fall-through */ case set_compressed: - if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); { size_t lhSize, litSize, litCSize; U32 singleStream=0; U32 const lhlCode = (istart[0] >> 2) & 3; @@ -118,8 +119,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, litCSize = (lhc >> 22) + (istart[4] << 10); break; } - if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); - if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected); /* prefetch huffman table if cold */ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { @@ -157,7 +158,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } } - if (HUF_isError(hufSuccess)) return ERROR(corruption_detected); + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; @@ -187,7 +188,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - if (litSize+lhSize > srcSize) return ERROR(corruption_detected); + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected); memcpy(dctx->litBuffer, istart+lhSize, litSize); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; @@ -216,17 +217,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, case 3: lhSize = 3; litSize = MEM_readLE24(istart) >> 4; - if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); break; } - if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; return lhSize+1; } default: - return ERROR(corruption_detected); /* impossible */ + RETURN_ERROR(corruption_detected, "impossible"); } } } @@ -436,8 +437,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb switch(type) { case set_rle : - if (!srcSize) return ERROR(srcSize_wrong); - if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + RETURN_ERROR_IF(!srcSize, srcSize_wrong); + RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected); { U32 const symbol = *(const BYTE*)src; U32 const baseline = baseValue[symbol]; U32 const nbBits = nbAdditionalBits[symbol]; @@ -449,7 +450,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb *DTablePtr = defaultTable; return 0; case set_repeat: - if (!flagRepeatTable) return ERROR(corruption_detected); + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected); /* prefetch FSE table if used */ if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { const void* const pStart = *DTablePtr; @@ -461,15 +462,15 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb { unsigned tableLog; S16 norm[MaxSeq+1]; size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - if (FSE_isError(headerSize)) return ERROR(corruption_detected); - if (tableLog > maxLog) return ERROR(corruption_detected); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected); ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); *DTablePtr = DTableSpace; return headerSize; } - default : /* impossible */ + default : assert(0); - return ERROR(GENERIC); + RETURN_ERROR(GENERIC, "impossible"); } } @@ -483,28 +484,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); /* check */ - if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong); /* SeqHead */ nbSeq = *ip++; if (!nbSeq) { *nbSeqPtr=0; - if (srcSize != 1) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong); return 1; } if (nbSeq > 0x7F) { if (nbSeq == 0xFF) { - if (ip+2 > iend) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong); nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; } else { - if (ip >= iend) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(ip >= iend, srcSize_wrong); nbSeq = ((nbSeq-0x80)<<8) + *ip++; } } *nbSeqPtr = nbSeq; /* FSE table descriptors */ - if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ + RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); @@ -517,7 +518,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, LL_base, LL_bits, LL_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq); - if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected); ip += llhSize; } @@ -527,7 +528,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq); - if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected); ip += ofhSize; } @@ -537,7 +538,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ML_base, ML_bits, ML_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq); - if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected); ip += mlhSize; } } @@ -590,8 +591,8 @@ size_t ZSTD_execSequenceLast7(BYTE* op, const BYTE* match = oLitEnd - sequence.offset; /* check */ - if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must fit within dstBuffer */ - if (iLitEnd > litLimit) return ERROR(corruption_detected); /* try to read beyond literal buffer */ + RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); /* copy literals */ while (op < oLitEnd) *op++ = *(*litPtr)++; @@ -599,7 +600,7 @@ size_t ZSTD_execSequenceLast7(BYTE* op, /* copy Match */ if (sequence.offset > (size_t)(oLitEnd - base)) { /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected); match = dictEnd - (base-match); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); @@ -631,8 +632,8 @@ size_t ZSTD_execSequence(BYTE* op, const BYTE* match = oLitEnd - sequence.offset; /* check */ - if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); + RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* copy Literals */ @@ -645,8 +646,7 @@ size_t ZSTD_execSequence(BYTE* op, /* copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix -> go into extDict */ - if (sequence.offset > (size_t)(oLitEnd - virtualStart)) - return ERROR(corruption_detected); + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); match = dictEnd + (match - prefixStart); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); @@ -712,8 +712,8 @@ size_t ZSTD_execSequenceLong(BYTE* op, const BYTE* match = sequence.match; /* check */ - if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); + RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); /* copy Literals */ @@ -726,7 +726,7 @@ size_t ZSTD_execSequenceLong(BYTE* op, /* copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected); + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; @@ -911,7 +911,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, seqState_t seqState; dctx->fseEntropy = 1; { U32 i; for (i=0; ientropy.rep[i]; } - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); @@ -927,14 +929,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - if (nbSeq) return ERROR(corruption_detected); + RETURN_ERROR_IF(nbSeq, corruption_detected); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } } /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); memcpy(op, litPtr, lastLLSize); op += lastLLSize; } @@ -1066,7 +1068,9 @@ ZSTD_decompressSequencesLong_body( seqState.pos = (size_t)(op-prefixStart); seqState.dictEnd = dictEnd; assert(iend >= ip); - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); @@ -1076,7 +1080,7 @@ ZSTD_decompressSequencesLong_body( sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset); PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ } - if (seqNb (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); memcpy(op, litPtr, lastLLSize); op += lastLLSize; } @@ -1240,7 +1244,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); - if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong); /* Decode literals section */ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index b55bfb510..ed5a02f98 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -627,6 +627,39 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, return 1; } +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) +{ + const double ratio = (double)nbDmers / maxDictSize; + if (ratio >= 10) { + return; + } + LOCALDISPLAYLEVEL(displayLevel, 1, + "WARNING: The maximum dictionary size %u is too large " + "compared to the source size %u! " + "size(source)/size(dictionary) = %f, but it should be >= " + "10! This may lead to a subpar dictionary! We recommend " + "training on sources at least 10x, and up to 100x the " + "size of the dictionary!\n", (U32)maxDictSize, + (U32)nbDmers, ratio); +} + +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, + U32 nbDmers, U32 k, U32 passes) +{ + const U32 minEpochSize = k * 10; + COVER_epoch_info_t epochs; + epochs.num = MAX(1, maxDictSize / k / passes); + epochs.size = nbDmers / epochs.num; + if (epochs.size >= minEpochSize) { + assert(epochs.size * epochs.num <= nbDmers); + return epochs; + } + epochs.size = MIN(minEpochSize, nbDmers); + epochs.num = nbDmers / epochs.size; + assert(epochs.size * epochs.num <= nbDmers); + return epochs; +} + /** * Given the prepared context build the dictionary. */ @@ -636,28 +669,34 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, ZDICT_cover_params_t parameters) { BYTE *const dict = (BYTE *)dictBuffer; size_t tail = dictBufferCapacity; - /* Divide the data up into epochs of equal size. - * We will select at least one segment from each epoch. - */ - const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4)); - const unsigned epochSize = (U32)(ctx->suffixSize / epochs); + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4); + const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3)); + size_t zeroScoreRun = 0; size_t epoch; DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", - epochs, epochSize); + (U32)epochs.num, (U32)epochs.size); /* Loop through the epochs until there are no more segments or the dictionary * is full. */ - for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { - const U32 epochBegin = (U32)(epoch * epochSize); - const U32 epochEnd = epochBegin + epochSize; + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; size_t segmentSize; /* Select a segment */ COVER_segment_t segment = COVER_selectSegment( ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); - /* If the segment covers no dmers, then we are out of content */ + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ if (segment.score == 0) { - break; + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; } + zeroScoreRun = 0; /* Trim the segment if necessary and if it is too small then we are done */ segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); if (segmentSize < parameters.d) { @@ -706,6 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( parameters.d, parameters.splitPoint)) { return ERROR(GENERIC); } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); COVER_ctx_destroy(&ctx); @@ -977,6 +1017,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( unsigned k; COVER_best_t best; POOL_ctx *pool = NULL; + int warned = 0; /* Checks */ if (splitPoint <= 0 || splitPoint > 1) { @@ -1019,6 +1060,10 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( POOL_free(pool); return ERROR(GENERIC); } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); + warned = 1; + } /* Loop through k reusing the same context */ for (k = kMinK; k <= kMaxK; k += kStepSize) { /* Prepare the arguments */ diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h index 82e2e1cea..27e6fb7a3 100644 --- a/lib/dictBuilder/cover.h +++ b/lib/dictBuilder/cover.h @@ -38,6 +38,35 @@ typedef struct { U32 score; } COVER_segment_t; +/** + *Number of epochs and size of each epoch. + */ +typedef struct { + U32 num; + U32 size; +} COVER_epoch_info_t; + +/** + * Computes the number of epochs and the size of each epoch. + * We will make sure that each epoch gets at least 10 * k bytes. + * + * The COVER algorithms divide the data up into epochs of equal size and + * select one segemnt from each epoch. + * + * @param maxDictSize The maximum allowed dictioary size. + * @param nbDmers The number of dmers we are training on. + * @param k The parameter k (segment size). + * @param passes The target number of passes over the dmer corpus. + * More passes means a better dictionary. + */ +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, + U32 k, U32 passes); + +/** + * Warns the user when their corpus is too small. + */ +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); + /** * Checks total compressed size of a dictionary */ diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c index c289c0690..6cf37026f 100644 --- a/lib/dictBuilder/fastcover.c +++ b/lib/dictBuilder/fastcover.c @@ -386,29 +386,35 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx, { BYTE *const dict = (BYTE *)dictBuffer; size_t tail = dictBufferCapacity; - /* Divide the data up into epochs of equal size. - * We will select at least one segment from each epoch. - */ - const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k)); - const unsigned epochSize = (U32)(ctx->nbDmers / epochs); + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1); + const size_t maxZeroScoreRun = 10; + size_t zeroScoreRun = 0; size_t epoch; DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", - epochs, epochSize); + (U32)epochs.num, (U32)epochs.size); /* Loop through the epochs until there are no more segments or the dictionary * is full. */ - for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { - const U32 epochBegin = (U32)(epoch * epochSize); - const U32 epochEnd = epochBegin + epochSize; + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; size_t segmentSize; /* Select a segment */ COVER_segment_t segment = FASTCOVER_selectSegment( ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); - /* If the segment covers no dmers, then we are out of content */ + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ if (segment.score == 0) { - break; + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; } + zeroScoreRun = 0; /* Trim the segment if necessary and if it is too small then we are done */ segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); @@ -564,6 +570,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, DISPLAYLEVEL(1, "Failed to initialize context\n"); return ERROR(GENERIC); } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); /* Build the dictionary */ DISPLAYLEVEL(2, "Building dictionary\n"); { @@ -616,6 +623,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( unsigned k; COVER_best_t best; POOL_ctx *pool = NULL; + int warned = 0; /* Checks */ if (splitPoint <= 0 || splitPoint > 1) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); @@ -664,6 +672,10 @@ ZDICT_optimizeTrainFromBuffer_fastCover( POOL_free(pool); return ERROR(GENERIC); } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); + warned = 1; + } /* Loop through k reusing the same context */ for (k = kMinK; k <= kMaxK; k += kStepSize) { /* Prepare the arguments */ diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index d57d59f01..e22973173 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -46,7 +46,12 @@ extern "C" { * The resulting dictionary will be saved into `dictBuffer`. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) * or an error code, which can be tested with ZDICT_isError(). - * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. + * Note: Dictionary training will fail if there are not enough samples to construct a + * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit). + * If dictionary training fails, you should use zstd without a dictionary, as the dictionary + * would've been ineffective anyways. If you believe your samples would benefit from a dictionary + * please open an issue with details, and we can look into it. + * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. * In general, it's recommended to provide a few thousands samples, though this can vary a lot. @@ -110,6 +115,7 @@ typedef struct { * The resulting dictionary will be saved into `dictBuffer`. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. @@ -133,8 +139,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. * * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - * or an error code, which can be tested with ZDICT_isError(). - * On success `*parameters` contains the parameters selected. + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * See ZDICT_trainFromBuffer() for details on failure modes. * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. */ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( @@ -151,7 +158,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( * The resulting dictionary will be saved into `dictBuffer`. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) * or an error code, which can be tested with ZDICT_isError(). - * Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory . + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. * In general, it's recommended to provide a few thousands samples, though this can vary a lot. @@ -175,9 +183,10 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, * If accel is zero, default value of 1 is used. * * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - * or an error code, which can be tested with ZDICT_isError(). - * On success `*parameters` contains the parameters selected. - * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread. + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. */ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, @@ -195,7 +204,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. * * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), - * or an error code, which can be tested by ZDICT_isError(). + * or an error code, which can be tested by ZDICT_isError(). * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. * Note 2: dictBuffer and dictContent can overlap */ @@ -219,6 +228,7 @@ typedef struct { * `parameters` is optional and can be provided with values set to 0 to mean "default". * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. * In general, it's recommended to provide a few thousands samples, though this can vary a lot. diff --git a/lib/dll/example/build_package.bat b/lib/dll/example/build_package.bat index cae0a15cb..8baabc7b2 100644 --- a/lib/dll/example/build_package.bat +++ b/lib/dll/example/build_package.bat @@ -6,14 +6,15 @@ COPY programs\datagen.h bin\example\ COPY programs\util.h bin\example\ COPY programs\platform.h bin\example\ COPY lib\common\mem.h bin\example\ -COPY lib\common\zstd_errors.h bin\example\ COPY lib\common\zstd_internal.h bin\example\ COPY lib\common\error_private.h bin\example\ COPY lib\common\xxhash.h bin\example\ -COPY lib\zstd.h bin\include\ COPY lib\libzstd.a bin\static\libzstd_static.lib COPY lib\dll\libzstd.* bin\dll\ COPY lib\dll\example\Makefile bin\example\ COPY lib\dll\example\fullbench-dll.* bin\example\ COPY lib\dll\example\README.md bin\ +COPY lib\zstd.h bin\include\ +COPY lib\common\zstd_errors.h bin\include\ +COPY lib\dictBuilder\zdict.h bin\include\ COPY programs\zstd.exe bin\zstd.exe diff --git a/lib/dll/libzstd.def b/lib/dll/libzstd.def deleted file mode 100644 index 668c4b1c2..000000000 --- a/lib/dll/libzstd.def +++ /dev/null @@ -1,87 +0,0 @@ -LIBRARY libzstd.dll -EXPORTS - ZDICT_getDictID - ZDICT_getErrorName - ZDICT_isError - ZDICT_trainFromBuffer - ZSTD_CStreamInSize - ZSTD_CStreamOutSize - ZSTD_DStreamInSize - ZSTD_DStreamOutSize - ZSTD_adjustCParams - ZSTD_checkCParams - ZSTD_compress - ZSTD_compressBegin - ZSTD_compressBegin_advanced - ZSTD_compressBegin_usingDict - ZSTD_compressBlock - ZSTD_compressBound - ZSTD_compressCCtx - ZSTD_compressContinue - ZSTD_compressEnd - ZSTD_compressStream - ZSTD_compress_advanced - ZSTD_compress_usingCDict - ZSTD_compress_usingDict - ZSTD_copyCCtx - ZSTD_copyDCtx - ZSTD_createCCtx - ZSTD_createCCtx_advanced - ZSTD_createCDict - ZSTD_createCDict_advanced - ZSTD_createCStream - ZSTD_createCStream_advanced - ZSTD_createDCtx - ZSTD_createDCtx_advanced - ZSTD_createDDict - ZSTD_createDStream - ZSTD_createDStream_advanced - ZSTD_decompress - ZSTD_decompressBegin - ZSTD_decompressBegin_usingDict - ZSTD_decompressBlock - ZSTD_decompressContinue - ZSTD_decompressDCtx - ZSTD_decompressStream - ZSTD_decompress_usingDDict - ZSTD_decompress_usingDict - ZSTD_endStream - ZSTD_estimateCCtxSize - ZSTD_estimateDCtxSize - ZSTD_flushStream - ZSTD_freeCCtx - ZSTD_freeCDict - ZSTD_freeCStream - ZSTD_freeDCtx - ZSTD_freeDDict - ZSTD_freeDStream - ZSTD_getBlockSizeMax - ZSTD_getCParams - ZSTD_getDecompressedSize - ZSTD_findDecompressedSize - ZSTD_getFrameContentSize - ZSTD_getErrorName - ZSTD_getFrameParams - ZSTD_getParams - ZSTD_initCStream - ZSTD_initCStream_advanced - ZSTD_initCStream_usingCDict - ZSTD_initCStream_usingDict - ZSTD_initDStream - ZSTD_initDStream_usingDDict - ZSTD_initDStream_usingDict - ZSTD_insertBlock - ZSTD_isError - ZSTD_isFrame - ZSTD_maxCLevel - ZSTD_nextInputType - ZSTD_nextSrcSizeToDecompress - ZSTD_resetCStream - ZSTD_resetDStream - ZSTD_sizeof_CCtx - ZSTD_sizeof_CDict - ZSTD_sizeof_CStream - ZSTD_sizeof_DCtx - ZSTD_sizeof_DDict - ZSTD_sizeof_DStream - ZSTD_versionNumber diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h index 5893cb965..e5b383ee4 100644 --- a/lib/legacy/zstd_legacy.h +++ b/lib/legacy/zstd_legacy.h @@ -20,7 +20,7 @@ extern "C" { ***************************************/ #include "mem.h" /* MEM_STATIC */ #include "error_private.h" /* ERROR */ -#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */ +#include "zstd_internal.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */ #if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) # undef ZSTD_LEGACY_SUPPORT @@ -178,43 +178,73 @@ MEM_STATIC size_t ZSTD_decompressLegacy( } } -MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, - size_t compressedSize) +MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize) { - U32 const version = ZSTD_isLegacy(src, compressedSize); + ZSTD_frameSizeInfo frameSizeInfo; + U32 const version = ZSTD_isLegacy(src, srcSize); switch(version) { #if (ZSTD_LEGACY_SUPPORT <= 1) case 1 : - return ZSTDv01_findFrameCompressedSize(src, compressedSize); + ZSTDv01_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; #endif #if (ZSTD_LEGACY_SUPPORT <= 2) case 2 : - return ZSTDv02_findFrameCompressedSize(src, compressedSize); + ZSTDv02_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; #endif #if (ZSTD_LEGACY_SUPPORT <= 3) case 3 : - return ZSTDv03_findFrameCompressedSize(src, compressedSize); + ZSTDv03_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; #endif #if (ZSTD_LEGACY_SUPPORT <= 4) case 4 : - return ZSTDv04_findFrameCompressedSize(src, compressedSize); + ZSTDv04_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; #endif #if (ZSTD_LEGACY_SUPPORT <= 5) case 5 : - return ZSTDv05_findFrameCompressedSize(src, compressedSize); + ZSTDv05_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; #endif #if (ZSTD_LEGACY_SUPPORT <= 6) case 6 : - return ZSTDv06_findFrameCompressedSize(src, compressedSize); + ZSTDv06_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; #endif #if (ZSTD_LEGACY_SUPPORT <= 7) case 7 : - return ZSTDv07_findFrameCompressedSize(src, compressedSize); + ZSTDv07_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; #endif default : - return ERROR(prefix_unknown); + frameSizeInfo.compressedSize = ERROR(prefix_unknown); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + break; } + return frameSizeInfo; +} + +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize); + return frameSizeInfo.compressedSize; } MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index c007e7ceb..bb0f4b593 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -1336,6 +1336,8 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header #define LITERAL_NOENTROPY 63 #define COMMAND_NOENTROPY 7 /* to remove */ +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + static const size_t ZSTD_blockHeaderSize = 3; static const size_t ZSTD_frameHeaderSize = 4; @@ -1999,36 +2001,59 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); } -size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize) +/* ZSTD_errorFrameSizeInfoLegacy() : + assumes `cSize` and `dBound` are _not_ NULL */ +static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) +{ + *cSize = ret; + *dBound = ZSTD_CONTENTSIZE_ERROR; +} + +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; + size_t nbBlocks = 0; U32 magicNumber; blockProperties_t blockProperties; /* Frame Header */ - if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } magicNumber = ZSTD_readBE32(src); - if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + if (magicNumber != ZSTD_magicNumber) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); + return; + } ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; /* Loop on each block */ while (1) { size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTDv01_isError(blockSize)) return blockSize; + if (ZSTDv01_isError(blockSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize); + return; + } ip += ZSTD_blockHeaderSize; remainingSize -= ZSTD_blockHeaderSize; - if (blockSize > remainingSize) return ERROR(srcSize_wrong); + if (blockSize > remainingSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } if (blockSize == 0) break; /* bt_end */ ip += blockSize; remainingSize -= blockSize; + nbBlocks++; } - return ip - (const BYTE*)src; + *cSize = ip - (const BYTE*)src; + *dBound = nbBlocks * BLOCKSIZE; } /******************************* diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h index 42f0897c7..245f9dd31 100644 --- a/lib/legacy/zstd_v01.h +++ b/lib/legacy/zstd_v01.h @@ -35,13 +35,18 @@ ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, const void* src, size_t compressedSize); -/** -ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format - compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' - return : the number of bytes that would be read to decompress this frame - or an errorCode if it fails (which can be tested using ZSTDv01_isError()) -*/ -size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize); + /** + ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); /** ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index c09ef8cff..594835943 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -2728,6 +2728,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_ #define LITERAL_NOENTROPY 63 #define COMMAND_NOENTROPY 7 /* to remove */ +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + static const size_t ZSTD_blockHeaderSize = 3; static const size_t ZSTD_frameHeaderSize = 4; @@ -3312,37 +3314,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); } -static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +/* ZSTD_errorFrameSizeInfoLegacy() : + assumes `cSize` and `dBound` are _not_ NULL */ +static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) { + *cSize = ret; + *dBound = ZSTD_CONTENTSIZE_ERROR; +} +void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) +{ const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; + size_t nbBlocks = 0; U32 magicNumber; blockProperties_t blockProperties; /* Frame Header */ - if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } magicNumber = MEM_readLE32(src); - if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + if (magicNumber != ZSTD_magicNumber) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); + return; + } ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; /* Loop on each block */ while (1) { size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) return cBlockSize; + if (ZSTD_isError(cBlockSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize); + return; + } ip += ZSTD_blockHeaderSize; remainingSize -= ZSTD_blockHeaderSize; - if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + if (cBlockSize > remainingSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } if (cBlockSize == 0) break; /* bt_end */ ip += cBlockSize; remainingSize -= cBlockSize; + nbBlocks++; } - return ip - (const BYTE*)src; + *cSize = ip - (const BYTE*)src; + *dBound = nbBlocks * BLOCKSIZE; } /******************************* @@ -3458,11 +3482,6 @@ size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize); } -size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize) -{ - return ZSTD_findFrameCompressedSize(src, compressedSize); -} - ZSTDv02_Dctx* ZSTDv02_createDCtx(void) { return (ZSTDv02_Dctx*)ZSTD_createDCtx(); diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h index 0dde7a637..9d7d8d9b5 100644 --- a/lib/legacy/zstd_v02.h +++ b/lib/legacy/zstd_v02.h @@ -35,13 +35,18 @@ ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, const void* src, size_t compressedSize); -/** -ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.2.x format - compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' - return : the number of bytes that would be read to decompress this frame - or an errorCode if it fails (which can be tested using ZSTDv02_isError()) -*/ -size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize); + /** + ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); /** ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c index 0c4cdf688..b6c60d296 100644 --- a/lib/legacy/zstd_v03.c +++ b/lib/legacy/zstd_v03.c @@ -2369,6 +2369,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_ #define LITERAL_NOENTROPY 63 #define COMMAND_NOENTROPY 7 /* to remove */ +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + static const size_t ZSTD_blockHeaderSize = 3; static const size_t ZSTD_frameHeaderSize = 4; @@ -2953,36 +2955,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); } -static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) +/* ZSTD_errorFrameSizeInfoLegacy() : + assumes `cSize` and `dBound` are _not_ NULL */ +MEM_STATIC void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) +{ + *cSize = ret; + *dBound = ZSTD_CONTENTSIZE_ERROR; +} + +void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; + size_t nbBlocks = 0; U32 magicNumber; blockProperties_t blockProperties; /* Frame Header */ - if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } magicNumber = MEM_readLE32(src); - if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + if (magicNumber != ZSTD_magicNumber) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); + return; + } ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; /* Loop on each block */ while (1) { size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) return cBlockSize; + if (ZSTD_isError(cBlockSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize); + return; + } ip += ZSTD_blockHeaderSize; remainingSize -= ZSTD_blockHeaderSize; - if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + if (cBlockSize > remainingSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } if (cBlockSize == 0) break; /* bt_end */ ip += cBlockSize; remainingSize -= cBlockSize; + nbBlocks++; } - return ip - (const BYTE*)src; + *cSize = ip - (const BYTE*)src; + *dBound = nbBlocks * BLOCKSIZE; } @@ -3099,11 +3124,6 @@ size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize); } -size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize) -{ - return ZSTD_findFrameCompressedSize(src, srcSize); -} - ZSTDv03_Dctx* ZSTDv03_createDCtx(void) { return (ZSTDv03_Dctx*)ZSTD_createDCtx(); diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h index b4449e299..efd8c2b92 100644 --- a/lib/legacy/zstd_v03.h +++ b/lib/legacy/zstd_v03.h @@ -35,13 +35,18 @@ ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, const void* src, size_t compressedSize); -/** -ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.3.x format - compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' - return : the number of bytes that would be read to decompress this frame - or an errorCode if it fails (which can be tested using ZSTDv03_isError()) -*/ -size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize); + /** + ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); /** ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index d7522025e..65dc64dbb 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -373,6 +373,8 @@ static const size_t ZSTD_frameHeaderSize_min = 5; #define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/) #define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; @@ -3119,34 +3121,57 @@ static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, return op-ostart; } -static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) +/* ZSTD_errorFrameSizeInfoLegacy() : + assumes `cSize` and `dBound` are _not_ NULL */ +static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) +{ + *cSize = ret; + *dBound = ZSTD_CONTENTSIZE_ERROR; +} + +void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; + size_t nbBlocks = 0; blockProperties_t blockProperties; /* Frame Header */ - if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); - if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown); + if (srcSize < ZSTD_frameHeaderSize_min) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } + if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); + return; + } ip += ZSTD_frameHeaderSize_min; remainingSize -= ZSTD_frameHeaderSize_min; /* Loop on each block */ while (1) { size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) return cBlockSize; + if (ZSTD_isError(cBlockSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize); + return; + } ip += ZSTD_blockHeaderSize; remainingSize -= ZSTD_blockHeaderSize; - if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + if (cBlockSize > remainingSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } if (cBlockSize == 0) break; /* bt_end */ ip += cBlockSize; remainingSize -= cBlockSize; + nbBlocks++; } - return ip - (const BYTE*)src; + *cSize = ip - (const BYTE*)src; + *dBound = nbBlocks * BLOCKSIZE; } /* ****************************** @@ -3578,11 +3603,6 @@ size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t #endif } -size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize) -{ - return ZSTD_findFrameCompressedSize(src, srcSize); -} - size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); } size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx) diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h index 6391631fc..bb5f3b7d0 100644 --- a/lib/legacy/zstd_v04.h +++ b/lib/legacy/zstd_v04.h @@ -35,13 +35,18 @@ ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize, const void* src, size_t compressedSize); -/** -ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.4.x format - compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' - return : the number of bytes that would be read to decompress this frame - or an errorCode if it fails (which can be tested using ZSTDv04_isError()) -*/ -size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize); + /** + ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); /** ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index 96bffc44a..1c39f2f26 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -491,6 +491,8 @@ static const size_t ZSTDv05_frameHeaderSize_min = 5; #define WILDCOPY_OVERLENGTH 8 +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; @@ -3508,34 +3510,57 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t #endif } -size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize) +/* ZSTD_errorFrameSizeInfoLegacy() : + assumes `cSize` and `dBound` are _not_ NULL */ +static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) +{ + *cSize = ret; + *dBound = ZSTD_CONTENTSIZE_ERROR; +} + +void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; + size_t nbBlocks = 0; blockProperties_t blockProperties; /* Frame Header */ - if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong); - if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown); + if (srcSize < ZSTDv05_frameHeaderSize_min) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } + if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); + return; + } ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min; /* Loop on each block */ while (1) { size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTDv05_isError(cBlockSize)) return cBlockSize; + if (ZSTDv05_isError(cBlockSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize); + return; + } ip += ZSTDv05_blockHeaderSize; remainingSize -= ZSTDv05_blockHeaderSize; - if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + if (cBlockSize > remainingSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } if (cBlockSize == 0) break; /* bt_end */ ip += cBlockSize; remainingSize -= cBlockSize; + nbBlocks++; } - return ip - (const BYTE*)src; + *cSize = ip - (const BYTE*)src; + *dBound = nbBlocks * BLOCKSIZE; } /* ****************************** diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h index b68fd578e..4a979854b 100644 --- a/lib/legacy/zstd_v05.h +++ b/lib/legacy/zstd_v05.h @@ -33,13 +33,18 @@ extern "C" { size_t ZSTDv05_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); -/** -ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame - compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' - return : the number of bytes that would be read to decompress this frame - or an errorCode if it fails (which can be tested using ZSTDv05_isError()) -*/ -size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize); + /** + ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); /* ************************************* * Helper functions diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index 60d8d6fd9..65975ac29 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -506,6 +506,8 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define FSEv06_ENCODING_STATIC 2 #define FSEv06_ENCODING_DYNAMIC 3 +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12, 13,14,15,16 }; @@ -3654,36 +3656,62 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t #endif } -size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize) +/* ZSTD_errorFrameSizeInfoLegacy() : + assumes `cSize` and `dBound` are _not_ NULL */ +static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) +{ + *cSize = ret; + *dBound = ZSTD_CONTENTSIZE_ERROR; +} + +void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; + size_t nbBlocks = 0; blockProperties_t blockProperties = { bt_compressed, 0 }; /* Frame Header */ { size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min); - if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize; - if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown); - if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong); + if (ZSTDv06_isError(frameHeaderSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize); + return; + } + if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); + return; + } + if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } ip += frameHeaderSize; remainingSize -= frameHeaderSize; } /* Loop on each block */ while (1) { size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTDv06_isError(cBlockSize)) return cBlockSize; + if (ZSTDv06_isError(cBlockSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize); + return; + } ip += ZSTDv06_blockHeaderSize; remainingSize -= ZSTDv06_blockHeaderSize; - if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + if (cBlockSize > remainingSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } if (cBlockSize == 0) break; /* bt_end */ ip += cBlockSize; remainingSize -= cBlockSize; + nbBlocks++; } - return ip - (const BYTE*)src; + *cSize = ip - (const BYTE*)src; + *dBound = nbBlocks * ZSTDv06_BLOCKSIZE_MAX; } /*_****************************** diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h index fb4eb37c8..07818571d 100644 --- a/lib/legacy/zstd_v06.h +++ b/lib/legacy/zstd_v06.h @@ -43,12 +43,17 @@ ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); /** -ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame - compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' - return : the number of bytes that would be read to decompress this frame - or an errorCode if it fails (which can be tested using ZSTDv06_isError()) +ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. */ -size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize); +void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); /* ************************************* * Helper functions diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index c7bb7a529..443524b3a 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -2740,6 +2740,8 @@ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t; #define FSEv07_ENCODING_STATIC 2 #define FSEv07_ENCODING_DYNAMIC 3 +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12, 13,14,15,16 }; @@ -3895,19 +3897,40 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t #endif } -size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize) +/* ZSTD_errorFrameSizeInfoLegacy() : + assumes `cSize` and `dBound` are _not_ NULL */ +static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) +{ + *cSize = ret; + *dBound = ZSTD_CONTENTSIZE_ERROR; +} + +void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) { const BYTE* ip = (const BYTE*)src; size_t remainingSize = srcSize; + size_t nbBlocks = 0; /* check */ - if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong); + if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } /* Frame Header */ { size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min); - if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize; - if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown); - if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong); + if (ZSTDv07_isError(frameHeaderSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize); + return; + } + if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); + return; + } + if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } ip += frameHeaderSize; remainingSize -= frameHeaderSize; } @@ -3915,20 +3938,28 @@ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize) while (1) { blockProperties_t blockProperties; size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTDv07_isError(cBlockSize)) return cBlockSize; + if (ZSTDv07_isError(cBlockSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize); + return; + } ip += ZSTDv07_blockHeaderSize; remainingSize -= ZSTDv07_blockHeaderSize; if (blockProperties.blockType == bt_end) break; - if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + if (cBlockSize > remainingSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } ip += cBlockSize; remainingSize -= cBlockSize; + nbBlocks++; } - return ip - (const BYTE*)src; + *cSize = ip - (const BYTE*)src; + *dBound = nbBlocks * ZSTDv07_BLOCKSIZE_ABSOLUTEMAX; } /*_****************************** diff --git a/lib/legacy/zstd_v07.h b/lib/legacy/zstd_v07.h index 6591cd301..a566c1d10 100644 --- a/lib/legacy/zstd_v07.h +++ b/lib/legacy/zstd_v07.h @@ -50,12 +50,17 @@ ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); /** -ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame - compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' - return : the number of bytes that would be read to decompress this frame - or an errorCode if it fails (which can be tested using ZSTDv07_isError()) +ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. */ -size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize); +void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); /*====== Helper functions ======*/ ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ diff --git a/lib/zstd.h b/lib/zstd.h index 237635cba..0c9ebe5b6 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -70,8 +70,8 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 3 -#define ZSTD_VERSION_RELEASE 8 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 0 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ @@ -89,6 +89,21 @@ ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ # define ZSTD_CLEVEL_DEFAULT 3 #endif +/*************************************** +* Constants +***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + /*====== Helper functions ======*/ #define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ @@ -195,279 +219,6 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, const void* src, size_t srcSize); -/************************** -* Simple dictionary API -***************************/ -/*! ZSTD_compress_usingDict() : - * Compression at an explicit compression level using a Dictionary. - * A dictionary can be any arbitrary data segment (also called a prefix), - * or a buffer with specified information (see dictBuilder/zdict.h). - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - int compressionLevel); - -/*! ZSTD_decompress_usingDict() : - * Decompression using a known Dictionary. - * Dictionary must be identical to the one used during compression. - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize); - - -/*********************************** - * Bulk processing dictionary API - **********************************/ -typedef struct ZSTD_CDict_s ZSTD_CDict; - -/*! ZSTD_createCDict() : - * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. - * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. - * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. - * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. - * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, - int compressionLevel); - -/*! ZSTD_freeCDict() : - * Function frees memory allocated by ZSTD_createCDict(). */ -ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); - -/*! ZSTD_compress_usingCDict() : - * Compression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. - * Note : compression level is _decided at dictionary creation time_, - * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict); - - -typedef struct ZSTD_DDict_s ZSTD_DDict; - -/*! ZSTD_createDDict() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); - -/*! ZSTD_freeDDict() : - * Function frees memory allocated with ZSTD_createDDict() */ -ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); - -/*! ZSTD_decompress_usingDDict() : - * Decompression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); - - -/**************************** -* Streaming -****************************/ - -typedef struct ZSTD_inBuffer_s { - const void* src; /**< start of input buffer */ - size_t size; /**< size of input buffer */ - size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_inBuffer; - -typedef struct ZSTD_outBuffer_s { - void* dst; /**< start of output buffer */ - size_t size; /**< size of output buffer */ - size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_outBuffer; - - - -/*-*********************************************************************** -* Streaming compression - HowTo -* -* A ZSTD_CStream object is required to track streaming operation. -* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. -* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. -* -* For parallel execution, use one separate ZSTD_CStream per thread. -* -* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. -* -* Parameters are sticky : when starting a new compression on the same context, -* it will re-use the same sticky parameters as previous compression session. -* When in doubt, it's recommended to fully initialize the context before usage. -* Use ZSTD_initCStream() to set the parameter to a selected compression level. -* Use advanced API (ZSTD_CCtx_setParameter(), etc.) to set more specific parameters. -* -* Use ZSTD_compressStream() as many times as necessary to consume input stream. -* The function will automatically update both `pos` fields within `input` and `output`. -* Note that the function may not consume the entire input, -* for example, because the output buffer is already full, -* in which case `input.pos < input.size`. -* The caller must check if input has been entirely consumed. -* If not, the caller must make some room to receive more compressed data, -* and then present again remaining input data. -* @return : a size hint, preferred nb of bytes to use as input for next function call -* or an error code, which can be tested using ZSTD_isError(). -* Note 1 : it's just a hint, to help latency a little, any value will work fine. -* Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() -* -* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, -* using ZSTD_flushStream(). `output->pos` will be updated. -* Note that, if `output->size` is too small, a single invocation of ZSTD_flushStream() might not be enough (return code > 0). -* In which case, make some room to receive more compressed data, and call again ZSTD_flushStream(). -* @return : 0 if internal buffers are entirely flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* ZSTD_endStream() instructs to finish a frame. -* It will perform a flush and write frame epilogue. -* The epilogue is required for decoders to consider a frame completed. -* flush() operation is the same, and follows same rules as ZSTD_flushStream(). -* @return : 0 if frame fully completed and fully flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* *******************************************************************/ - -typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ - /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ -/*===== ZSTD_CStream management functions =====*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); -ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); - -/*===== Streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); - -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ - - - -/*-*************************************************************************** -* Streaming decompression - HowTo -* -* A ZSTD_DStream object is required to track streaming operations. -* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. -* -* Use ZSTD_initDStream() to start a new decompression operation. -* @return : recommended first input size -* Alternatively, use advanced API to set specific properties. -* -* Use ZSTD_decompressStream() repetitively to consume your input. -* The function will update both `pos` fields. -* If `input.pos < input.size`, some input has not been consumed. -* It's up to the caller to present again remaining data. -* The function tries to flush all data decoded immediately, respecting output buffer size. -* If `output.pos < output.size`, decoder has flushed everything it could. -* But if `output.pos == output.size`, there might be some data left within internal buffers., -* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. -* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. -* @return : 0 when a frame is completely decoded and fully flushed, -* or an error code, which can be tested using ZSTD_isError(), -* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : -* the return value is a suggested next input size (just a hint for better latency) -* that will never request more than the remaining frame size. -* *******************************************************************************/ - -typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ - /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ -/*===== ZSTD_DStream management functions =====*/ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); -ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); - -/*===== Streaming decompression functions =====*/ -ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); -ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); - -ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ - -#endif /* ZSTD_H_235446 */ - - - - -/**************************************************************************************** - * ADVANCED AND EXPERIMENTAL FUNCTIONS - **************************************************************************************** - * The definitions in the following section are considered experimental. - * They are provided for advanced scenarios. - * They should never be used with a dynamic library, as prototypes may change in the future. - * Use them only in association with static linking. - * ***************************************************************************************/ - -#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) -#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY - - -/**************************************************************************************** - * Candidate API for promotion to stable status - **************************************************************************************** - * The following symbols and constants form the "staging area" : - * they are considered to join "stable API" by v1.4.0. - * The proposal is written so that it can be made stable "as is", - * though it's still possible to suggest improvements. - * Staging is in fact last chance for changes, - * the API is locked once reaching "stable" status. - * ***************************************************************************************/ - - -/* === Constants === */ - -/* all magic numbers are supposed read/written to/from files/memory using little-endian convention */ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ -#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ -#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 - -#define ZSTD_BLOCKSIZELOG_MAX 17 -#define ZSTD_BLOCKSIZE_MAX (1<= first frame size - * @return : the compressed size of the first frame starting at `src`, - * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, - * or an error code if input is invalid */ -ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); - - -/* === Memory management === */ - -/*! ZSTD_sizeof_*() : - * These functions give the _current_ memory usage of selected object. - * Note that object memory usage can evolve (increase or decrease) over time. */ -ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); -ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); -ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); -ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); - - /*************************************** * Advanced compression API ***************************************/ @@ -503,7 +254,10 @@ typedef enum { ZSTD_fast=1, typedef enum { - /* compression parameters */ + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict() + * for more info (superseded-by-cdict). */ ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table * Default level is ZSTD_CLEVEL_DEFAULT==3. * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. @@ -625,6 +379,7 @@ typedef enum { * ZSTD_c_format * ZSTD_c_forceMaxWindow * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -632,10 +387,10 @@ typedef enum { ZSTD_c_experimentalParam1=500, ZSTD_c_experimentalParam2=10, ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4=1001 + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, } ZSTD_cParameter; - typedef struct { size_t error; int lowerBound; @@ -681,59 +436,6 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param */ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); -/*! ZSTD_CCtx_loadDictionary() : - * Create an internal CDict from `dict` buffer. - * Decompression will have to use same dictionary. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). - * Note 2 : Loading a dictionary involves building tables. - * It's also a CPU consuming operation, with non-negligible impact on latency. - * Tables are dependent on compression parameters, and for this reason, - * compression parameters can no longer be changed after loading a dictionary. - * Note 3 :`dict` content will be copied internally. - * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. - * In such a case, dictionary buffer must outlive its users. - * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() - * to precisely select how dictionary content must be interpreted. */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); - -/*! ZSTD_CCtx_refCDict() : - * Reference a prepared dictionary, to be used for all next compressed frames. - * Note that compression parameters are enforced from within CDict, - * and supercede any compression parameter previously set within CCtx. - * The dictionary will remain valid for future compressed frames using same CCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Referencing a NULL CDict means "return to no-dictionary mode". - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ -ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); - -/*! ZSTD_CCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) for next compressed frame. - * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). - * Decompression will need same prefix to properly regenerate data. - * Compressing with a prefix is similar in outcome as performing a diff and compressing it, - * but performs much faster, especially during decompression (compression speed is tunable with compression level). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - * Note 1 : Prefix buffer is referenced. It **must** outlive compression. - * Its content must remain unmodified during compression. - * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, - * ensure that the window size is large enough to contain the entire source. - * See ZSTD_c_windowLog. - * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. - * It's a CPU consuming operation, with non-negligible impact on latency. - * If there is a need to use the same prefix multiple times, consider loadDictionary instead. - * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent). - * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, - const void* prefix, size_t prefixSize); - - typedef enum { ZSTD_reset_session_only = 1, ZSTD_reset_parameters = 2, @@ -756,8 +458,6 @@ typedef enum { */ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); - - /*! ZSTD_compress2() : * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. * ZSTD_compress2() always starts a new frame. @@ -772,15 +472,163 @@ ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/*************************************** +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_format + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); + +/*===== Streaming compression functions =====*/ typedef enum { ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ ZSTD_e_flush=1, /* flush any data provided so far, * it creates (at least) one new block, that can be decoded immediately on reception; - * frame will continue: any future data can still reference previously compressed data, improving compression. */ + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. * note that frame is only closed after compressed data is fully flushed (return value == 0). * After that point, any additional data starts a new frame. - * note : each frame is independent (does not reference any content from previous frame). */ + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ } ZSTD_EndDirective; /*! ZSTD_compressStream2() : @@ -809,59 +657,248 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_inBuffer* input, ZSTD_EndDirective endOp); +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ +/******************************************************************************* + * This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and + * ZSTD_compressStream2(). It is redundent, but is still fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ -/* ============================== */ -/* Advanced decompression API */ -/* ============================== */ - -/* The advanced API pushes parameters one by one into an existing DCtx context. - * Parameters are sticky, and remain valid for all following frames - * using the same DCtx context. - * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). - * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). - * Therefore, no new decompression function is necessary. +/** + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); */ - - -typedef enum { - - ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which - * the streaming API will refuse to allocate memory buffer - * in order to protect the host from unreasonable memory requirements. - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) */ - - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_c_format - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly - */ - ZSTD_d_experimentalParam1=1000 - -} ZSTD_dParameter; - - -/*! ZSTD_dParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - both lower and upper bounds, inclusive +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/** + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the number of bytes left to flush (if non-zero and not an error). */ -ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -/*! ZSTD_DCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_dParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is only possible during frame initialization (before starting decompression). - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + +/*===== Streaming decompression functions =====*/ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. + * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. + * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); /*! ZSTD_DCtx_loadDictionary() : * Create an internal DDict from dict buffer, @@ -910,15 +947,32 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); -/*! ZSTD_DCtx_reset() : - * Return a DCtx to clean state. - * Session and parameters can be reset jointly or separately. - * Parameters can only be reset when no active frame is being decompressed. - * @return : 0, or an error code, which can be tested with ZSTD_isError() - */ -ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ +/**************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY /**************************************************************************************** * experimental API (static linking only) @@ -1064,15 +1118,24 @@ typedef enum { ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ } ZSTD_dictAttachPref_e; +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2, /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + /*************************************** * Frame size functions ***************************************/ /*! ZSTD_findDecompressedSize() : - * `src` should point the start of a series of ZSTD encoded and/or skippable frames + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames * `srcSize` must be the _exact_ size of this series - * (i.e. there should be a frame boundary exactly at `srcSize` bytes after `src`) + * (i.e. there should be a frame boundary at `src + srcSize`) * @return : - decompressed size of all data in all successive frames * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN * - if an error occurred: ZSTD_CONTENTSIZE_ERROR @@ -1092,6 +1155,21 @@ typedef enum { * however it does mean that all frame data must be present and valid. */ ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); +/** ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occured: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + /*! ZSTD_frameHeaderSize() : * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. * @return : size of the Frame Header, @@ -1110,7 +1188,7 @@ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); * It will also consider src size to be arbitrarily "large", which is worst case. * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. * Note : CCtx size estimation is only correct for single-threaded compression. */ ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); @@ -1122,7 +1200,7 @@ ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); * It will also consider src size to be arbitrarily "large", which is worst case. * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. * Note : CStream size estimation is only correct for single-threaded compression. * ZSTD_DStream memory budget depends on window Size. * This information can be passed manually, using ZSTD_estimateDStreamSize, @@ -1318,6 +1396,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * See the comments on that enum for an explanation of the feature. */ #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -1329,10 +1413,10 @@ ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param /*! ZSTD_CCtx_params : * Quick howto : * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure - * - ZSTD_CCtxParam_setParameter() : Push parameters one by one into - * an existing ZSTD_CCtx_params structure. - * This is similar to - * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to * an existing CCtx. * These parameters will be applied to @@ -1363,20 +1447,20 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compre */ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); -/*! ZSTD_CCtxParam_setParameter() : +/*! ZSTD_CCtxParams_setParameter() : * Similar to ZSTD_CCtx_setParameter. * Set one compression parameter, selected by enum ZSTD_cParameter. * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); -/*! ZSTD_CCtxParam_getParameter() : +/*! ZSTD_CCtxParams_getParameter() : * Similar to ZSTD_CCtx_getParameter. * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); /*! ZSTD_CCtx_setParametersUsingCCtxParams() : * Apply a set of ZSTD_CCtx_params to the compression context. @@ -1419,31 +1503,6 @@ ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); * it must remain read accessible throughout the lifetime of DDict */ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); - /*! ZSTD_DCtx_loadDictionary_byReference() : * Same as ZSTD_DCtx_loadDictionary(), * but references `dict` content instead of copying it into `dctx`. @@ -1505,14 +1564,68 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( ********************************************************************/ /*===== Advanced Streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */ -ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/ +/**! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + */ +ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); +/**! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); +/**! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. + */ ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters. pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. */ + ZSTD_parameters params, unsigned long long pledgedSrcSize); +/**! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); +/**! ZSTD_initCStream_usingCDict_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * * start a new frame, using same parameters from previous frame. * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. * Note that zcs must be init at least once before using ZSTD_resetCStream(). diff --git a/programs/Makefile b/programs/Makefile index d1910fbb4..692980e4f 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -51,7 +51,7 @@ endif CFLAGS ?= -O3 DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \ + -Wstrict-prototypes -Wundef -Wpointer-arith \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls -Wmissing-prototypes -Wc++-compat CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) diff --git a/programs/benchzstd.c b/programs/benchzstd.c index 09f39cef5..8f3511919 100644 --- a/programs/benchzstd.c +++ b/programs/benchzstd.c @@ -135,7 +135,8 @@ BMK_advancedParams_t BMK_initAdvancedParams(void) { 0, /* ldmMinMatch */ 0, /* ldmHashLog */ 0, /* ldmBuckSizeLog */ - 0 /* ldmHashRateLog */ + 0, /* ldmHashRateLog */ + ZSTD_lcm_auto /* literalCompressionMode */ }; return res; } @@ -184,6 +185,7 @@ BMK_initCCtx(ZSTD_CCtx* ctx, CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog)); CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch)); CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode)); CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, comprParams->strategy)); CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize)); } diff --git a/programs/benchzstd.h b/programs/benchzstd.h index fd7c54f5a..376a80a90 100644 --- a/programs/benchzstd.h +++ b/programs/benchzstd.h @@ -116,6 +116,7 @@ typedef struct { int ldmHashLog; int ldmBucketSizeLog; int ldmHashRateLog; + ZSTD_literalCompressionMode_e literalCompressionMode; } BMK_advancedParams_t; /* returns default parameters used by nonAdvanced functions */ diff --git a/programs/fileio.c b/programs/fileio.c index dd47a1e32..412ef4762 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -24,7 +24,7 @@ * Includes ***************************************/ #include "platform.h" /* Large Files support, SET_BINARY_MODE */ -#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile */ +#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */ #include /* fprintf, fopen, fread, _fileno, stdin, stdout */ #include /* malloc, free */ #include /* strcmp, strlen */ @@ -296,6 +296,7 @@ struct FIO_prefs_s { int ldmMinMatch; int ldmBucketSizeLog; int ldmHashRateLog; + ZSTD_literalCompressionMode_e literalCompressionMode; /* IO preferences */ U32 removeSrcFile; @@ -339,6 +340,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->ldmMinMatch = 0; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; + ret->literalCompressionMode = ZSTD_lcm_auto; return ret; } @@ -406,6 +408,12 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { prefs->rsyncable = rsyncable; } +void FIO_setLiteralCompressionMode( + FIO_prefs_t* const prefs, + ZSTD_literalCompressionMode_e mode) { + prefs->literalCompressionMode = mode; +} + void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel) { #ifndef ZSTD_NOCOMPRESS @@ -507,28 +515,10 @@ static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName, return stdout; } - /* ensure dst is not the same file as src */ - if (srcFileName != NULL) { -#ifdef _MSC_VER - /* note : Visual does not support file identification by inode. - * The following work-around is limited to detecting exact name repetition only, - * aka `filename` is considered different from `subdir/../filename` */ - if (!strcmp(srcFileName, dstFileName)) { - DISPLAYLEVEL(1, "zstd: Refusing to open a output file which will overwrite the input file \n"); - return NULL; - } -#else - stat_t srcStat; - stat_t dstStat; - if (UTIL_getFileStat(srcFileName, &srcStat) - && UTIL_getFileStat(dstFileName, &dstStat)) { - if (srcStat.st_dev == dstStat.st_dev - && srcStat.st_ino == dstStat.st_ino) { - DISPLAYLEVEL(1, "zstd: Refusing to open a output file which will overwrite the input file \n"); - return NULL; - } - } -#endif + /* ensure dst is not the same as src */ + if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) { + DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n"); + return NULL; } if (prefs->sparseFileSupport == 1) { @@ -620,6 +610,7 @@ typedef struct { size_t srcBufferSize; void* dstBuffer; size_t dstBufferSize; + const char* dictFileName; ZSTD_CStream* cctx; } cRess_t; @@ -647,6 +638,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */ if (dictFileName && (dictBuffer==NULL)) EXM_THROW(32, "allocation error : can't create dictBuffer"); + ress.dictFileName = dictFileName; if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog) comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT; @@ -674,6 +666,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); /* multi-threading */ #ifdef ZSTD_MULTITHREAD DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); @@ -1299,12 +1292,18 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, { int result; - /* File check */ + /* ensure src is not a directory */ if (UTIL_isDirectory(srcFileName)) { DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); return 1; } + /* ensure src is not the same as dict (if present) */ + if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) { + DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName); + return 1; + } + ress.srcFile = FIO_openSrcFile(srcFileName); if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ diff --git a/programs/fileio.h b/programs/fileio.h index b20570bcb..e46633752 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -71,6 +71,9 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog); void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); +void FIO_setLiteralCompressionMode( + FIO_prefs_t* const prefs, + ZSTD_literalCompressionMode_e mode); void FIO_setNoProgress(unsigned noProgress); void FIO_setNotificationLevel(int level); diff --git a/programs/util.c b/programs/util.c index 49eea148e..622e5025f 100644 --- a/programs/util.c +++ b/programs/util.c @@ -87,6 +87,23 @@ U32 UTIL_isDirectory(const char* infilename) return 0; } +int UTIL_isSameFile(const char* file1, const char* file2) +{ +#if defined(_MSC_VER) + /* note : Visual does not support file identification by inode. + * The following work-around is limited to detecting exact name repetition only, + * aka `filename` is considered different from `subdir/../filename` */ + return !strcmp(file1, file2); +#else + stat_t file1Stat; + stat_t file2Stat; + return UTIL_getFileStat(file1, &file1Stat) + && UTIL_getFileStat(file2, &file2Stat) + && (file1Stat.st_dev == file2Stat.st_dev) + && (file1Stat.st_ino == file2Stat.st_ino); +#endif +} + U32 UTIL_isLink(const char* infilename) { /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ @@ -95,7 +112,9 @@ U32 UTIL_isLink(const char* infilename) || (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE >= 500)) \ || (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) \ || (defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) \ - || (defined(__APPLE__) && defined(__MACH__)) + || (defined(__APPLE__) && defined(__MACH__)) \ + || defined(__OpenBSD__) \ + || defined(__FreeBSD__) int r; stat_t statbuf; r = lstat(infilename, &statbuf); diff --git a/programs/util.h b/programs/util.h index f78bcbe1b..eee7ebfc3 100644 --- a/programs/util.h +++ b/programs/util.h @@ -174,6 +174,7 @@ int UTIL_isRegularFile(const char* infilename); int UTIL_setFileStat(const char* filename, stat_t* statbuf); U32 UTIL_isDirectory(const char* infilename); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); +int UTIL_isSameFile(const char* file1, const char* file2); U32 UTIL_isLink(const char* infilename); #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) diff --git a/programs/zstd.1 b/programs/zstd.1 index c93755f87..cb4e1271a 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -164,7 +164,7 @@ keep source file(s) after successful compression or decompression\. This is the . .TP \fB\-r\fR -operate recursively on dictionaries +operate recursively on directories . .TP \fB\-\-format=FORMAT\fR diff --git a/programs/zstd.1.md b/programs/zstd.1.md index a029af5ff..93c6fa400 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -178,7 +178,7 @@ the last one takes effect. keep source file(s) after successful compression or decompression. This is the default behavior. * `-r`: - operate recursively on dictionaries + operate recursively on directories * `--format=FORMAT`: compress and decompress in other formats. If compiled with support, zstd can compress to or decompress from other compression algorithm diff --git a/programs/zstdcli.c b/programs/zstdcli.c index f4452636b..904bcdf80 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -148,6 +148,7 @@ static int usage_advanced(const char* programName) #endif DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n"); DISPLAY( "--[no-]check : integrity check (default: enabled) \n"); + DISPLAY( "--[no-]compress-literals : force (un)compressed literals \n"); #endif #ifdef UTIL_HAS_CREATEFILELIST DISPLAY( " -r : operate recursively on directories \n"); @@ -569,6 +570,7 @@ int main(int argCount, const char* argv[]) #ifndef ZSTD_NOBENCH BMK_advancedParams_t benchParams = BMK_initAdvancedParams(); #endif + ZSTD_literalCompressionMode_e literalCompressionMode = ZSTD_lcm_auto; /* init */ @@ -661,6 +663,8 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; } #endif if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; } + if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; } + if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; } if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; } /* long commands with arguments */ @@ -953,6 +957,8 @@ int main(int argCount, const char* argv[]) filenameTable[fileNamesNb++] = filenameTable[u]; } } + if (fileNamesNb == 0 && filenameIdx > 0) + CLEAN_RETURN(1); filenameIdx = fileNamesNb; } if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */ @@ -995,6 +1001,7 @@ int main(int argCount, const char* argv[]) if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) { benchParams.ldmHashRateLog = g_ldmHashRateLog; } + benchParams.literalCompressionMode = literalCompressionMode; if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); @@ -1108,6 +1115,7 @@ int main(int argCount, const char* argv[]) FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMax(prefs, adaptMax); FIO_setRsyncable(prefs, rsyncable); + FIO_setLiteralCompressionMode(prefs, literalCompressionMode); if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMax < cLevel) cLevel = adaptMax; @@ -1116,7 +1124,7 @@ int main(int argCount, const char* argv[]) else operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); #else - (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; /* not used when ZSTD_NOCOMPRESS set */ + (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); #endif } else { /* decompression or test */ diff --git a/tests/Makefile b/tests/Makefile index 2daf0970f..2a9cd3d8f 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -33,7 +33,7 @@ endif CFLAGS ?= -O3 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef -Wformat-security \ + -Wstrict-prototypes -Wundef \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls -Wmissing-prototypes CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index 17c2c1899..b03dc55ea 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -938,7 +938,9 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, FSE_CState_t stateOffsetBits; FSE_CState_t stateLitLength; - CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */ + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, op, oend-op)), + dstSize_tooSmall, "not enough space remaining"); /* first symbols */ FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 4130f18e3..12ec9524b 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -27,16 +27,16 @@ PRGDIR = ../../programs FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \ - $(CPPFLAGS) + -DZSTD_MULTITHREAD $(CPPFLAGS) FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef -Wformat-security \ + -Wstrict-prototypes -Wundef \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ -Wredundant-decls \ -g -fno-omit-frame-pointer FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS) FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS) -FUZZ_LDFLAGS := $(LDFLAGS) +FUZZ_LDFLAGS := -pthread $(LDFLAGS) FUZZ_ARFLAGS := $(ARFLAGS) FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) @@ -46,11 +46,13 @@ FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c +ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c FUZZ_SRC := \ $(FUZZ_SRC) \ $(ZSTDDECOMP_SRC) \ $(ZSTDCOMMON_SRC) \ - $(ZSTDCOMP_SRC) + $(ZSTDCOMP_SRC) \ + $(ZSTDDICT_SRC) FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC))) @@ -65,7 +67,9 @@ FUZZ_TARGETS := \ block_round_trip \ simple_decompress \ stream_decompress \ - block_decompress + block_decompress \ + dictionary_round_trip \ + dictionary_decompress all: $(FUZZ_TARGETS) @@ -90,6 +94,12 @@ stream_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) stream_decompress.o block_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) block_decompress.o $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) block_decompress.o $(LIB_FUZZING_ENGINE) -o $@ +dictionary_round_trip: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_round_trip.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_round_trip.o $(LIB_FUZZING_ENGINE) -o $@ + +dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_decompress.o + $(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@ + libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c new file mode 100644 index 000000000..7d3a7678a --- /dev/null +++ b/tests/fuzz/dictionary_decompress.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target attempts to decompress the fuzzed data with the dictionary + * decompression function to ensure the decompressor never crashes. It does not + * fuzz the dictionary. + */ + +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +static ZSTD_DCtx *dctx = NULL; +static void* rBuf = NULL; +static size_t bufSize = 0; + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + FUZZ_dict_t dict; + size_t neededBufSize; + + uint32_t seed = FUZZ_seed(&src, &size); + neededBufSize = MAX(20 * size, (size_t)256 << 10); + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBufSize > bufSize) { + free(rBuf); + rBuf = malloc(neededBufSize); + bufSize = neededBufSize; + FUZZ_ASSERT(rBuf); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + dict = FUZZ_train(src, size, &seed); + if (FUZZ_rand32(&seed, 0, 1) == 0) { + ZSTD_decompress_usingDict(dctx, + rBuf, neededBufSize, + src, size, + dict.buff, dict.size); + } else { + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2))); + ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size); + } + + free(dict.buff); +#ifndef STATEFUL_FUZZING + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c new file mode 100644 index 000000000..e28c65c98 --- /dev/null +++ b/tests/fuzz/dictionary_round_trip.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * This fuzz target performs a zstd round-trip test (compress & decompress) with + * a dictionary, compares the result with the original, and calls abort() on + * corruption. + */ + +#include +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" + +static const int kMaxClevel = 19; + +static ZSTD_CCtx *cctx = NULL; +static ZSTD_DCtx *dctx = NULL; +static uint32_t seed; + +static size_t roundTripTest(void *result, size_t resultCapacity, + void *compressed, size_t compressedCapacity, + const void *src, size_t srcSize) +{ + ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; + FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed); + size_t cSize; + if ((FUZZ_rand(&seed) & 15) == 0) { + int const cLevel = FUZZ_rand(&seed) % kMaxClevel; + + cSize = ZSTD_compress_usingDict(cctx, + compressed, compressedCapacity, + src, srcSize, + dict.buff, dict.size, + cLevel); + } else { + dictContentType = FUZZ_rand32(&seed, 0, 2); + FUZZ_setRandomParameters(cctx, srcSize, &seed); + /* Disable checksum so we can use sizes smaller than compress bound. */ + FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + dictContentType)); + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); + } + FUZZ_ZASSERT(cSize); + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict.buff, dict.size, + (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + dictContentType)); + { + size_t const ret = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, cSize); + free(dict.buff); + return ret; + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + size_t const rBufSize = size; + void* rBuf = malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void* cBuf; + + seed = FUZZ_seed(&src, &size); + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we force the checksum to be disabled, + * giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_rand32(&seed, 0, 1); + cBuf = malloc(cBufSize); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + { + size_t const result = + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); + } + free(rBuf); + free(cBuf); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; +#endif + return 0; +} diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 8ce293a3a..ee27015a5 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -34,6 +34,8 @@ TARGETS = [ 'simple_decompress', 'stream_decompress', 'block_decompress', + 'dictionary_round_trip', + 'dictionary_decompress', ] ALL_TARGETS = TARGETS + ['all'] FUZZ_RNG_SEED_SIZE = 4 @@ -192,11 +194,21 @@ def build_parser(args): default=LIB_FUZZING_ENGINE, help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) - parser.add_argument( + + fuzz_group = parser.add_mutually_exclusive_group() + fuzz_group.add_argument( '--enable-coverage', dest='coverage', action='store_true', help='Enable coverage instrumentation (-fsanitize-coverage)') + fuzz_group.add_argument( + '--enable-fuzzer', + dest='fuzzer', + action='store_true', + help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' + 'LIB_FUZZING_ENGINE is ignored') + ) + parser.add_argument( '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') parser.add_argument( @@ -364,13 +376,17 @@ def build(args): '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), ] - mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] - # Set flags for options + assert not (args.fuzzer and args.coverage) if args.coverage: common_flags += [ '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' ] + if args.fuzzer: + common_flags += ['-fsanitize=fuzzer'] + args.lib_fuzzing_engine = '' + + mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] if args.sanitize_recover: recover_flags = ['-fsanitize-recover=all'] diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c index 1553d436c..658c685f4 100644 --- a/tests/fuzz/regression_driver.c +++ b/tests/fuzz/regression_driver.c @@ -40,8 +40,13 @@ int main(int argc, char const **argv) { size_t readSize; FILE *file; - /* Check that it is a regular file, and that the fileSize is valid */ - FUZZ_ASSERT_MSG(UTIL_isRegularFile(fileName), fileName); + /* Check that it is a regular file, and that the fileSize is valid. + * If it is not a regular file, then it may have been deleted since we + * constructed the list, so just skip it. + */ + if (!UTIL_isRegularFile(fileName)) { + continue; + } FUZZ_ASSERT_MSG(fileSize <= kMaxFileSize, fileName); /* Ensure we have a large enough buffer allocated */ if (fileSize > bufferSize) { diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 83608b6e7..7e3b66098 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -25,9 +25,6 @@ static const int kMaxClevel = 19; static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; -static void* cBuf = NULL; -static void* rBuf = NULL; -static size_t bufSize = 0; static uint32_t seed; static size_t roundTripTest(void *result, size_t resultCapacity, @@ -36,16 +33,8 @@ static size_t roundTripTest(void *result, size_t resultCapacity, { size_t cSize; if (FUZZ_rand(&seed) & 1) { - ZSTD_inBuffer in = {src, srcSize, 0}; - ZSTD_outBuffer out = {compressed, compressedCapacity, 0}; - size_t err; - - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); FUZZ_setRandomParameters(cctx, srcSize, &seed); - err = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); - FUZZ_ZASSERT(err); - FUZZ_ASSERT(err == 0); - cSize = out.pos; + cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); } else { int const cLevel = FUZZ_rand(&seed) % kMaxClevel; cSize = ZSTD_compressCCtx( @@ -57,20 +46,21 @@ static size_t roundTripTest(void *result, size_t resultCapacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t neededBufSize; + size_t const rBufSize = size; + void* rBuf = malloc(rBufSize); + size_t cBufSize = ZSTD_compressBound(size); + void* cBuf; seed = FUZZ_seed(&src, &size); - neededBufSize = ZSTD_compressBound(size); + /* Half of the time fuzz with a 1 byte smaller output size. + * This will still succeed because we don't use a dictionary, so the dictID + * field is empty, giving us 4 bytes of overhead. + */ + cBufSize -= FUZZ_rand32(&seed, 0, 1); + cBuf = malloc(cBufSize); + + FUZZ_ASSERT(cBuf && rBuf); - /* Allocate all buffers and contexts if not already allocated */ - if (neededBufSize > bufSize) { - free(cBuf); - free(rBuf); - cBuf = malloc(neededBufSize); - rBuf = malloc(neededBufSize); - bufSize = neededBufSize; - FUZZ_ASSERT(cBuf && rBuf); - } if (!cctx) { cctx = ZSTD_createCCtx(); FUZZ_ASSERT(cctx); @@ -82,11 +72,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const result = - roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } + free(rBuf); + free(cBuf); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index 7ad571221..68e120d7e 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -62,9 +62,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) if (!dstream) { dstream = ZSTD_createDStream(); FUZZ_ASSERT(dstream); - FUZZ_ASSERT(!ZSTD_isError(ZSTD_initDStream(dstream))); } else { - FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream))); + FUZZ_ZASSERT(ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only)); } while (size > 0) { @@ -73,7 +72,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) ZSTD_outBuffer out = makeOutBuffer(); size_t const rc = ZSTD_decompressStream(dstream, &out, &in); if (ZSTD_isError(rc)) goto error; - if (rc == 0) FUZZ_ASSERT(!ZSTD_isError(ZSTD_resetDStream(dstream))); } } diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index d903bcb29..d13c2dbe7 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -63,7 +63,7 @@ static size_t compress(uint8_t *dst, size_t capacity, ZSTD_inBuffer in = makeInBuffer(&src, &srcSize); /* Mode controls the action. If mode == -1 we pick a new mode */ int mode = -1; - while (in.pos < in.size) { + while (in.pos < in.size || mode != -1) { ZSTD_outBuffer out = makeOutBuffer(dst, capacity); /* Previous action finished, pick a new mode. */ if (mode == -1) mode = FUZZ_rand(&seed) % 10; diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 10163e151..0e64400e6 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -8,10 +8,14 @@ */ #define ZSTD_STATIC_LINKING_ONLY +#define ZDICT_STATIC_LINKING_ONLY + +#include #include "zstd_helpers.h" #include "fuzz_helpers.h" #include "zstd.h" +#include "zdict.h" static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) { @@ -71,7 +75,6 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, state); setRand(cctx, ZSTD_c_checksumFlag, 0, 1, state); setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, state); - setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state); /* Select long distance matchig parameters */ setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, state); setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state); @@ -81,4 +84,54 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) state); setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX, state); + /* Set misc parameters */ + setRand(cctx, ZSTD_c_nbWorkers, 0, 2, state); + setRand(cctx, ZSTD_c_rsyncable, 0, 1, state); + setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state); + setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state); + setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state); +} + +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state) +{ + size_t const dictSize = MAX(srcSize / 8, 1024); + size_t const totalSampleSize = dictSize * 11; + FUZZ_dict_t dict = { malloc(dictSize), dictSize }; + char* const samples = (char*)malloc(totalSampleSize); + unsigned nbSamples = 100; + size_t* const samplesSizes = (size_t*)malloc(sizeof(size_t) * nbSamples); + size_t pos = 0; + size_t sample = 0; + ZDICT_fastCover_params_t params; + FUZZ_ASSERT(dict.buff && samples && samplesSizes); + + for (sample = 0; sample < nbSamples; ++sample) { + size_t const remaining = totalSampleSize - pos; + size_t const offset = FUZZ_rand32(state, 0, MAX(srcSize, 1) - 1); + size_t const limit = MIN(srcSize - offset, remaining); + size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); + memcpy(samples + pos, src + offset, toCopy); + pos += toCopy; + samplesSizes[sample] = toCopy; + + } + memset(samples + pos, 0, totalSampleSize - pos); + + memset(¶ms, 0, sizeof(params)); + params.accel = 5; + params.k = 40; + params.d = 8; + params.f = 14; + params.zParams.compressionLevel = 1; + dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, + samples, samplesSizes, nbSamples, params); + if (ZSTD_isError(dict.size)) { + free(dict.buff); + memset(&dict, 0, sizeof(dict)); + } + + free(samplesSizes); + free(samples); + + return dict; } diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h index 3856bebec..457e6e995 100644 --- a/tests/fuzz/zstd_helpers.h +++ b/tests/fuzz/zstd_helpers.h @@ -14,6 +14,8 @@ #ifndef ZSTD_HELPERS_H #define ZSTD_HELPERS_H +#define ZSTD_STATIC_LINKING_ONLY + #include "zstd.h" #include @@ -27,6 +29,17 @@ ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state); ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state); ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state); +typedef struct { + void* buff; + size_t size; +} FUZZ_dict_t; + +/* Quickly train a dictionary from a source for fuzzing. + * NOTE: Don't use this to train production dictionaries, it is only optimized + * for speed, and doesn't care about dictionary quality. + */ +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state); + #ifdef __cplusplus } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 946844e16..c38aef610 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -124,12 +124,14 @@ static U32 FUZ_highbit32(U32 v32) #define CHECK(fn) { CHECK_V(err, fn); } #define CHECKPLUS(var, fn, more) { CHECK_V(var, fn); more; } -#define CHECK_EQ(lhs, rhs) { \ - if ((lhs) != (rhs)) { \ - DISPLAY("Error L%u => %s != %s ", __LINE__, #lhs, #rhs); \ +#define CHECK_OP(op, lhs, rhs) { \ + if (!((lhs) op (rhs))) { \ + DISPLAY("Error L%u => FAILED %s %s %s ", __LINE__, #lhs, #op, #rhs); \ goto _output_error; \ } \ } +#define CHECK_EQ(lhs, rhs) CHECK_OP(==, lhs, rhs) +#define CHECK_LT(lhs, rhs) CHECK_OP(<, lhs, rhs) /*============================================= @@ -374,6 +376,20 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : tight ZSTD_decompressBound test : ", testNb++); + { + unsigned long long bound = ZSTD_decompressBound(compressedBuffer, cSize); + if (bound != CNBuffSize) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with invalid srcSize : ", testNb++); + { + unsigned long long bound = ZSTD_decompressBound(compressedBuffer, cSize - 1); + if (bound != ZSTD_CONTENTSIZE_ERROR) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, (unsigned)CNBuffSize); { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); if (r != CNBuffSize) goto _output_error; } @@ -429,6 +445,27 @@ static int basicUnitTests(U32 seed, double compressibility) if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with content size missing : ", testNb++); + { /* create compressed buffer with content size missing */ + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) ); + CHECKPLUS(r, ZSTD_compress2(cctx, + compressedBuffer, compressedBufferSize, + CNBuffer, CNBuffSize), + cSize=r ); + ZSTD_freeCCtx(cctx); + } + { /* ensure frame content size is missing */ + ZSTD_frameHeader zfh; + size_t const ret = ZSTD_getFrameHeader(&zfh, compressedBuffer, compressedBufferSize); + if (ret != 0 || zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) goto _output_error; + } + { /* ensure CNBuffSize <= decompressBound */ + unsigned long long const bound = ZSTD_decompressBound(compressedBuffer, compressedBufferSize); + if (CNBuffSize > bound) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++); { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19); @@ -828,6 +865,59 @@ static int basicUnitTests(U32 seed, double compressibility) ZSTDMT_freeCCtx(mtctx); } + DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++) + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + size_t cSize1, cSize2; + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2) ); + cSize1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK(cSize1); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_literalCompressionMode, ZSTD_lcm_uncompressed) ); + cSize2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK(cSize2); + CHECK_LT(cSize1, cSize2); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Multithreaded ZSTD_compress2() with rsyncable : ", testNb++) + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + /* Set rsyncable and don't give the ZSTD_compressBound(CNBuffSize) so + * ZSTDMT is forced to not take the shortcut. + */ + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) ); + CHECK( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) ); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : setting multithreaded parameters : ", testNb++) + { ZSTD_CCtx_params* params = ZSTD_createCCtxParams(); + int value; + /* Check that the overlap log and job size are unset. */ + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_EQ(value, 0); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_EQ(value, 0); + /* Set and check the overlap log and job size. */ + CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) ); + CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, 2 MB) ); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_EQ(value, 5); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_EQ(value, 2 MB); + /* Set the number of worksers and check the overlap log and job size. */ + CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) ); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) ); + CHECK_EQ(value, 5); + CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) ); + CHECK_EQ(value, 2 MB); + ZSTD_freeCCtxParams(params); + + } + DISPLAYLEVEL(3, "OK \n"); /* Simple API multiframe test */ DISPLAYLEVEL(3, "test%3i : compress multiple frames : ", testNb++); @@ -859,6 +949,11 @@ static int basicUnitTests(U32 seed, double compressibility) if (r != CNBuffSize / 2) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : get tight decompressed bound of multiple frames : ", testNb++); + { unsigned long long const bound = ZSTD_decompressBound(compressedBuffer, cSize); + if (bound != CNBuffSize / 2) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : decompress multiple frames : ", testNb++); { CHECK_V(r, ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize)); if (r != CNBuffSize / 2) goto _output_error; } @@ -1203,9 +1298,13 @@ static int basicUnitTests(U32 seed, double compressibility) { size_t ret; MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY); + /* Either operation is allowed to fail, but one must fail. */ ret = ZSTD_CCtx_loadDictionary_advanced( cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_auto); - if (!ZSTD_isError(ret)) goto _output_error; + if (!ZSTD_isError(ret)) { + ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)); + if (!ZSTD_isError(ret)) goto _output_error; + } } DISPLAYLEVEL(3, "OK \n"); @@ -1216,6 +1315,152 @@ static int basicUnitTests(U32 seed, double compressibility) ret = ZSTD_CCtx_loadDictionary_advanced( cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_rawContent); if (ZSTD_isError(ret)) goto _output_error; + ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)); + if (ZSTD_isError(ret)) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_CCtx_refCDict() then set parameters : ", testNb++); + { ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 )); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 )); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading dictionary before setting parameters is the same as loading after : ", testNb++); + { + size_t size1, size2; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 7) ); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) ); + size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size1)) goto _output_error; + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 7) ); + size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size2)) goto _output_error; + + if (size1 != size2) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a dictionary clears the prefix : ", testNb++); + { + CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a dictionary clears the cdict : ", testNb++); + { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a cdict clears the prefix : ", testNb++); + { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a cdict clears the dictionary : ", testNb++); + { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a prefix clears the dictionary : ", testNb++); + { + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loading a prefix clears the cdict : ", testNb++); + { + ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, 1); + CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) ); + CHECK_Z( ZSTD_CCtx_refPrefix(cctx, (const char*)dictBuffer, dictSize) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100)) ); + ZSTD_freeCDict(cdict); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loaded dictionary persists across reset session : ", testNb++); + { + size_t size1, size2; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) ); + size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size1)) goto _output_error; + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size2)) goto _output_error; + + if (size1 != size2) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : Loaded dictionary is cleared after resetting parameters : ", testNb++); + { + size_t size1, size2; + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, CNBuffer, MIN(CNBuffSize, 10 KB)) ); + size1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size1)) goto _output_error; + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + size2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + if (ZSTD_isError(size2)) goto _output_error; + + if (size1 == size2) goto _output_error; + } + DISPLAYLEVEL(3, "OK \n"); + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_CCtx_loadDictionary(cctx, dictBuffer, dictSize) ); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100 KB)); + CHECK_Z(cSize); + DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with dictionary : ", testNb++); + { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + size_t ret; + /* We should fail to decompress without a dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + /* We should succeed to decompress with the dictionary. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + CHECK_Z( ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictSize) ); + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* The dictionary should presist across calls. */ + CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) ); + /* When we reset the context the dictionary is cleared. */ + ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters); + ret = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (!ZSTD_isError(ret)) goto _output_error; + ZSTD_freeDCtx(dctx); } DISPLAYLEVEL(3, "OK \n"); diff --git a/tests/legacy.c b/tests/legacy.c index b749567f4..eb3292038 100644 --- a/tests/legacy.c +++ b/tests/legacy.c @@ -16,10 +16,11 @@ /*=========================================== * Dependencies *==========================================*/ -#include /* size_t */ -#include /* malloc, free */ -#include /* fprintf */ -#include /* strlen */ +#include /* size_t */ +#include /* malloc, free */ +#include /* fprintf */ +#include /* strlen */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_decompressBound */ #include "zstd.h" #include "zstd_errors.h" @@ -130,12 +131,41 @@ static int testStreamingAPI(void) return error_code; } +static int testFrameDecoding(void) +{ + if (strlen(EXPECTED) > ZSTD_decompressBound(COMPRESSED, COMPRESSED_SIZE)) { + DISPLAY("ERROR: ZSTD_decompressBound: decompressed bound too small\n"); + return 1; + } + { const char* ip = COMPRESSED; + size_t remainingSize = COMPRESSED_SIZE; + while (1) { + size_t frameSize = ZSTD_findFrameCompressedSize(ip, remainingSize); + if (ZSTD_isError(frameSize)) { + DISPLAY("ERROR: ZSTD_findFrameCompressedSize: %s\n", ZSTD_getErrorName(frameSize)); + return 1; + } + if (frameSize > remainingSize) { + DISPLAY("ERROR: ZSTD_findFrameCompressedSize: expected frameSize to align with src buffer"); + return 1; + } + ip += frameSize; + remainingSize -= frameSize; + if (remainingSize == 0) break; + } + } + DISPLAY("Frame Decoding OK\n"); + return 0; +} + int main(void) { { int const ret = testSimpleAPI(); if (ret) return ret; } { int const ret = testStreamingAPI(); if (ret) return ret; } + { int const ret = testFrameDecoding(); + if (ret) return ret; } DISPLAY("OK\n"); return 0; diff --git a/tests/playTests.sh b/tests/playTests.sh index 8342455a1..d22f617e4 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -200,6 +200,15 @@ $ZSTD tmp -fo tmp && die "zstd compression overwrote the input file" $ZSTD tmp.zst -dfo tmp.zst && die "zstd decompression overwrote the input file" $ECHO "test: detect that input file does not exist" $ZSTD nothere && die "zstd hasn't detected that input file does not exist" +$ECHO "test: --[no-]compress-literals" +$ZSTD tmp -c --no-compress-literals -1 | $ZSTD -t +$ZSTD tmp -c --no-compress-literals --fast=1 | $ZSTD -t +$ZSTD tmp -c --no-compress-literals -19 | $ZSTD -t +$ZSTD tmp -c --compress-literals -1 | $ZSTD -t +$ZSTD tmp -c --compress-literals --fast=1 | $ZSTD -t +$ZSTD tmp -c --compress-literals -19 | $ZSTD -t +$ZSTD -b --fast=1 -i1e1 tmp --compress-literals +$ZSTD -b --fast=1 -i1e1 tmp --no-compress-literals $ECHO "test : file removal" $ZSTD -f --rm tmp @@ -314,18 +323,28 @@ $ECHO foo | $ZSTD > /dev/full && die "write error not detected!" $ECHO "$ECHO foo | $ZSTD | $ZSTD -d > /dev/full" $ECHO foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!" +fi + + +if [ "$isWindows" = false ] && [ "$UNAME" != 'SunOS' ] ; then $ECHO "\n===> symbolic link test " -rm -f hello.tmp world.tmp hello.tmp.zst world.tmp.zst +rm -f hello.tmp world.tmp world2.tmp hello.tmp.zst world.tmp.zst $ECHO "hello world" > hello.tmp ln -s hello.tmp world.tmp -$ZSTD world.tmp hello.tmp +ln -s hello.tmp world2.tmp +$ZSTD world.tmp hello.tmp || true test -f hello.tmp.zst # regular file should have been compressed! test ! -f world.tmp.zst # symbolic link should not have been compressed! +$ZSTD world.tmp || true +test ! -f world.tmp.zst # symbolic link should not have been compressed! +$ZSTD world.tmp world2.tmp || true +test ! -f world.tmp.zst # symbolic link should not have been compressed! +test ! -f world2.tmp.zst # symbolic link should not have been compressed! $ZSTD world.tmp hello.tmp -f test -f world.tmp.zst # symbolic link should have been compressed with --force -rm -f hello.tmp world.tmp hello.tmp.zst world.tmp.zst +rm -f hello.tmp world.tmp world2.tmp hello.tmp.zst world.tmp.zst fi @@ -391,6 +410,8 @@ $ECHO "- Create first dictionary " TESTFILE=../programs/zstdcli.c $ZSTD --train *.c ../programs/*.c -o tmpDict cp $TESTFILE tmp +$ECHO "- Test dictionary compression with tmpDict as an input file and dictionary" +$ZSTD -f tmpDict -D tmpDict && die "compression error not detected!" $ECHO "- Dictionary compression roundtrip" $ZSTD -f tmp -D tmpDict $ZSTD -d tmp.zst -D tmpDict -fo result @@ -815,7 +836,7 @@ FULL_COMPRESSED_FILE=${TEST_DATA_FILE}.zst TRUNCATED_COMPRESSED_FILE=truncated-input.txt.zst ./datagen -g50000 > $TEST_DATA_FILE $ZSTD -f $TEST_DATA_FILE -o $FULL_COMPRESSED_FILE -head -c 100 $FULL_COMPRESSED_FILE > $TRUNCATED_COMPRESSED_FILE +dd bs=1 count=100 if=$FULL_COMPRESSED_FILE of=$TRUNCATED_COMPRESSED_FILE status=none $ZSTD --list $TRUNCATED_COMPRESSED_FILE && die "-l must fail on truncated file" rm $TEST_DATA_FILE diff --git a/tests/regression/config.c b/tests/regression/config.c index 8a6103b29..b82482f46 100644 --- a/tests/regression/config.c +++ b/tests/regression/config.c @@ -90,6 +90,17 @@ static config_t mt_ldm = { .param_values = PARAM_VALUES(mt_ldm_param_values), }; +static param_value_t mt_advanced_param_values[] = { + {.param = ZSTD_c_nbWorkers, .value = 2}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t mt_advanced = { + .name = "multithreaded with advanced params", + .cli_args = "-T2 --no-compress-literals", + .param_values = PARAM_VALUES(mt_advanced_param_values), +}; + static param_value_t const small_wlog_param_values[] = { {.param = ZSTD_c_windowLog, .value = 10}, }; @@ -122,6 +133,39 @@ static config_t small_clog = { .param_values = PARAM_VALUES(small_clog_param_values), }; +static param_value_t const uncompressed_literals_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = 3}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t uncompressed_literals = { + .name = "uncompressed literals", + .cli_args = "-3 --no-compress-literals", + .param_values = PARAM_VALUES(uncompressed_literals_param_values), +}; + +static param_value_t const uncompressed_literals_opt_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = 19}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t uncompressed_literals_opt = { + .name = "uncompressed literals optimal", + .cli_args = "-19 --no-compress-literals", + .param_values = PARAM_VALUES(uncompressed_literals_opt_param_values), +}; + +static param_value_t const huffman_literals_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = -1}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_huffman}, +}; + +static config_t huffman_literals = { + .name = "huffman literals", + .cli_args = "--fast=1 --compress-literals", + .param_values = PARAM_VALUES(huffman_literals_param_values), +}; + static param_value_t const explicit_params_param_values[] = { {.param = ZSTD_c_checksumFlag, .value = 1}, {.param = ZSTD_c_contentSizeFlag, .value = 0}, @@ -155,6 +199,10 @@ static config_t const* g_configs[] = { &small_hlog, &small_clog, &explicit_params, + &uncompressed_literals, + &uncompressed_literals_opt, + &huffman_literals, + &mt_advanced, NULL, }; diff --git a/tests/regression/method.c b/tests/regression/method.c index 51a0fb9d1..1e84021c3 100644 --- a/tests/regression/method.c +++ b/tests/regression/method.c @@ -175,8 +175,8 @@ static result_t compress_cctx_compress( state->compressed.capacity, input.data, input.size, - state->dictionary.data, - state->dictionary.size, + config->use_dictionary ? state->dictionary.data : NULL, + config->use_dictionary ? state->dictionary.size : 0, params); else if (config->use_dictionary) state->compressed.size = ZSTD_compress_usingDict( @@ -432,77 +432,158 @@ out: return result; } -static result_t old_streaming_compress( - method_state_t* base, - config_t const* config) { - buffer_state_t* state = container_of(base, buffer_state_t, base); - - if (buffer_state_bad(state, config)) - return result_error(result_error_system_error); - - int const level = config_get_level(config); - if (level == CONFIG_NO_LEVEL) - return result_error(result_error_skip); - - ZSTD_CStream* zcs = ZSTD_createCStream(); - result_t result; - if (zcs == NULL) { - result = result_error(result_error_compression_error); - goto out; - } +static int init_cstream( + buffer_state_t* state, + ZSTD_CStream* zcs, + config_t const* config, + int const advanced, + ZSTD_CDict** cdict) +{ size_t zret; - if (config->use_dictionary) { - zret = ZSTD_initCStream_usingDict( - zcs, state->dictionary.data, state->dictionary.size, level); + if (advanced) { + ZSTD_parameters const params = config_get_zstd_params(config, 0, 0); + ZSTD_CDict* dict = NULL; + if (cdict) { + *cdict = ZSTD_createCDict_advanced( + state->dictionary.data, + state->dictionary.size, + ZSTD_dlm_byRef, + ZSTD_dct_auto, + params.cParams, + ZSTD_defaultCMem); + if (!*cdict) { + return 1; + } + zret = ZSTD_initCStream_usingCDict_advanced( + zcs, *cdict, params.fParams, ZSTD_CONTENTSIZE_UNKNOWN); + } else { + zret = ZSTD_initCStream_advanced( + zcs, + state->dictionary.data, + state->dictionary.size, + params, + ZSTD_CONTENTSIZE_UNKNOWN); + } } else { - zret = ZSTD_initCStream(zcs, level); + int const level = config_get_level(config); + if (cdict) { + *cdict = ZSTD_createCDict( + state->dictionary.data, + state->dictionary.size, + level); + if (!*cdict) { + return 1; + } + zret = ZSTD_initCStream_usingCDict(zcs, *cdict); + } else if (config->use_dictionary) { + zret = ZSTD_initCStream_usingDict( + zcs, state->dictionary.data, state->dictionary.size, level); + } else { + zret = ZSTD_initCStream(zcs, level); + } } if (ZSTD_isError(zret)) { - result = result_error(result_error_compression_error); - goto out; + return 1; + } + return 0; +} + +static result_t old_streaming_compress_internal( + method_state_t* base, + config_t const* config, + int const advanced, + int const cdict) { + buffer_state_t* state = container_of(base, buffer_state_t, base); + + if (buffer_state_bad(state, config)) + return result_error(result_error_system_error); + + + ZSTD_CStream* zcs = ZSTD_createCStream(); + ZSTD_CDict* cd = NULL; + result_t result; + if (zcs == NULL) { + result = result_error(result_error_compression_error); + goto out; + } + if (init_cstream(state, zcs, config, advanced, cdict ? &cd : NULL)) { + result = result_error(result_error_compression_error); + goto out; + } + + result_data_t data = {.total_size = 0}; + for (size_t i = 0; i < state->inputs.size; ++i) { + data_buffer_t input = state->inputs.buffers[i]; + size_t zret = ZSTD_resetCStream( + zcs, + config->no_pledged_src_size ? ZSTD_CONTENTSIZE_UNKNOWN : input.size); + if (ZSTD_isError(zret)) { + result = result_error(result_error_compression_error); + goto out; } - result_data_t data = {.total_size = 0}; - for (size_t i = 0; i < state->inputs.size; ++i) { - data_buffer_t input = state->inputs.buffers[i]; - zret = ZSTD_resetCStream( - zcs, - config->no_pledged_src_size ? ZSTD_CONTENTSIZE_UNKNOWN - : input.size); + while (input.size > 0) { + ZSTD_inBuffer in = {input.data, MIN(input.size, 4096)}; + input.data += in.size; + input.size -= in.size; + ZSTD_EndDirective const op = + input.size > 0 ? ZSTD_e_continue : ZSTD_e_end; + zret = 0; + while (in.pos < in.size || (op == ZSTD_e_end && zret != 0)) { + ZSTD_outBuffer out = {state->compressed.data, + MIN(state->compressed.capacity, 1024)}; + if (op == ZSTD_e_continue || in.pos < in.size) + zret = ZSTD_compressStream(zcs, &out, &in); + else + zret = ZSTD_endStream(zcs, &out); if (ZSTD_isError(zret)) { - result = result_error(result_error_compression_error); - goto out; - } - - while (input.size > 0) { - ZSTD_inBuffer in = {input.data, MIN(input.size, 4096)}; - input.data += in.size; - input.size -= in.size; - ZSTD_EndDirective const op = - input.size > 0 ? ZSTD_e_continue : ZSTD_e_end; - zret = 0; - while (in.pos < in.size || (op == ZSTD_e_end && zret != 0)) { - ZSTD_outBuffer out = {state->compressed.data, - MIN(state->compressed.capacity, 1024)}; - if (op == ZSTD_e_continue || in.pos < in.size) - zret = ZSTD_compressStream(zcs, &out, &in); - else - zret = ZSTD_endStream(zcs, &out); - if (ZSTD_isError(zret)) { - result = result_error(result_error_compression_error); - goto out; - } - data.total_size += out.pos; - } + result = result_error(result_error_compression_error); + goto out; } + data.total_size += out.pos; + } } + } - result = result_data(data); + result = result_data(data); out: ZSTD_freeCStream(zcs); + ZSTD_freeCDict(cd); return result; } +static result_t old_streaming_compress( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 0, /* cdict */ 0); +} + +static result_t old_streaming_compress_advanced( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 1, /* cdict */ 0); +} + +static result_t old_streaming_compress_cdict( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 0, /* cdict */ 1); +} + +static result_t old_streaming_compress_cdict_advanced( + method_state_t* base, + config_t const* config) +{ + return old_streaming_compress_internal( + base, config, /* advanced */ 1, /* cdict */ 1); +} + method_t const simple = { .name = "compress simple", .create = buffer_state_create, @@ -545,6 +626,27 @@ method_t const old_streaming = { .destroy = buffer_state_destroy, }; +method_t const old_streaming_advanced = { + .name = "old streaming advanced", + .create = buffer_state_create, + .compress = old_streaming_compress, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming_cdict = { + .name = "old streaming cdcit", + .create = buffer_state_create, + .compress = old_streaming_compress, + .destroy = buffer_state_destroy, +}; + +method_t const old_streaming_advanced_cdict = { + .name = "old streaming advanced cdict", + .create = buffer_state_create, + .compress = old_streaming_compress, + .destroy = buffer_state_destroy, +}; + method_t const cli = { .name = "zstdcli", .create = method_state_create, @@ -560,6 +662,9 @@ static method_t const* g_methods[] = { &advanced_one_pass_small_out, &advanced_streaming, &old_streaming, + &old_streaming_advanced, + &old_streaming_cdict, + &old_streaming_advanced_cdict, NULL, }; diff --git a/tests/regression/results.csv b/tests/regression/results.csv index a62178d6e..7ac94f207 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -1,448 +1,816 @@ -Data, Config, Method, Total compressed size -silesia.tar, level -5, compress simple, 7160438 -silesia.tar, level -3, compress simple, 6789024 -silesia.tar, level -1, compress simple, 6195462 -silesia.tar, level 0, compress simple, 4875008 -silesia.tar, level 1, compress simple, 5339697 -silesia.tar, level 3, compress simple, 4875008 -silesia.tar, level 4, compress simple, 4813507 -silesia.tar, level 5, compress simple, 4722235 -silesia.tar, level 6, compress simple, 4672194 -silesia.tar, level 7, compress simple, 4606658 -silesia.tar, level 9, compress simple, 4554098 -silesia.tar, level 13, compress simple, 4491702 -silesia.tar, level 16, compress simple, 4381277 -silesia.tar, level 19, compress simple, 4281514 -silesia, level -5, compress cctx, 7152294 -silesia, level -3, compress cctx, 6789969 -silesia, level -1, compress cctx, 6191548 -silesia, level 0, compress cctx, 4862377 -silesia, level 1, compress cctx, 5318036 -silesia, level 3, compress cctx, 4862377 -silesia, level 4, compress cctx, 4800629 -silesia, level 5, compress cctx, 4710178 -silesia, level 6, compress cctx, 4659996 -silesia, level 7, compress cctx, 4596234 -silesia, level 9, compress cctx, 4543862 -silesia, level 13, compress cctx, 4482073 -silesia, level 16, compress cctx, 4377391 -silesia, level 19, compress cctx, 4293262 -silesia, long distance mode, compress cctx, 4862377 -silesia, multithreaded, compress cctx, 4862377 -silesia, multithreaded long distance mode, compress cctx, 4862377 -silesia, small window log, compress cctx, 7115734 -silesia, small hash log, compress cctx, 6554898 -silesia, small chain log, compress cctx, 4931093 -silesia, explicit params, compress cctx, 4813352 -github, level -5, compress cctx, 232744 -github, level -5 with dict, compress cctx, 47294 -github, level -3, compress cctx, 220611 -github, level -3 with dict, compress cctx, 48047 -github, level -1, compress cctx, 176575 -github, level -1 with dict, compress cctx, 43527 -github, level 0, compress cctx, 136397 -github, level 0 with dict, compress cctx, 41536 -github, level 1, compress cctx, 143457 -github, level 1 with dict, compress cctx, 42157 -github, level 3, compress cctx, 136397 -github, level 3 with dict, compress cctx, 41536 -github, level 4, compress cctx, 136144 -github, level 4 with dict, compress cctx, 41721 -github, level 5, compress cctx, 135106 -github, level 5 with dict, compress cctx, 38934 -github, level 6, compress cctx, 135108 -github, level 6 with dict, compress cctx, 38628 -github, level 7, compress cctx, 135108 -github, level 7 with dict, compress cctx, 38741 -github, level 9, compress cctx, 135108 -github, level 9 with dict, compress cctx, 39335 -github, level 13, compress cctx, 133717 -github, level 13 with dict, compress cctx, 39923 -github, level 16, compress cctx, 133717 -github, level 16 with dict, compress cctx, 37568 -github, level 19, compress cctx, 133717 -github, level 19 with dict, compress cctx, 37567 -github, long distance mode, compress cctx, decompression error -github, multithreaded, compress cctx, decompression error -github, multithreaded long distance mode, compress cctx, decompression error -github, small window log, compress cctx, decompression error -github, small hash log, compress cctx, decompression error -github, small chain log, compress cctx, decompression error -github, explicit params, compress cctx, decompression error -silesia, level -5, zstdcli, 7152342 -silesia, level -3, zstdcli, 6790021 -silesia, level -1, zstdcli, 6191597 -silesia, level 0, zstdcli, 4862425 -silesia, level 1, zstdcli, 5318084 -silesia, level 3, zstdcli, 4862425 -silesia, level 4, zstdcli, 4800677 -silesia, level 5, zstdcli, 4710226 -silesia, level 6, zstdcli, 4660044 -silesia, level 7, zstdcli, 4596282 -silesia, level 9, zstdcli, 4543910 -silesia, level 13, zstdcli, 4482121 -silesia, level 16, zstdcli, 4377439 -silesia, level 19, zstdcli, 4293310 -silesia, long distance mode, zstdcli, 4853437 -silesia, multithreaded, zstdcli, 4862425 -silesia, multithreaded long distance mode, zstdcli, 4853437 -silesia, small window log, zstdcli, 7126434 -silesia, small hash log, zstdcli, 6554946 -silesia, small chain log, zstdcli, 4931141 -silesia, explicit params, zstdcli, 4815380 -silesia.tar, level -5, zstdcli, 7161160 -silesia.tar, level -3, zstdcli, 6789865 -silesia.tar, level -1, zstdcli, 6196433 -silesia.tar, level 0, zstdcli, 4875136 -silesia.tar, level 1, zstdcli, 5340573 -silesia.tar, level 3, zstdcli, 4875136 -silesia.tar, level 4, zstdcli, 4814531 -silesia.tar, level 5, zstdcli, 4723284 -silesia.tar, level 6, zstdcli, 4673591 -silesia.tar, level 7, zstdcli, 4608342 -silesia.tar, level 9, zstdcli, 4554700 -silesia.tar, level 13, zstdcli, 4491706 -silesia.tar, level 16, zstdcli, 4381281 -silesia.tar, level 19, zstdcli, 4281518 -silesia.tar, no source size, zstdcli, 4875132 -silesia.tar, long distance mode, zstdcli, 4866975 -silesia.tar, multithreaded, zstdcli, 4875136 -silesia.tar, multithreaded long distance mode, zstdcli, 4866975 -silesia.tar, small window log, zstdcli, 7130434 -silesia.tar, small hash log, zstdcli, 6587841 -silesia.tar, small chain log, zstdcli, 4943259 -silesia.tar, explicit params, zstdcli, 4839202 -github, level -5, zstdcli, 234744 -github, level -5 with dict, zstdcli, 48718 -github, level -3, zstdcli, 222611 -github, level -3 with dict, zstdcli, 47395 -github, level -1, zstdcli, 178575 -github, level -1 with dict, zstdcli, 45170 -github, level 0, zstdcli, 138397 -github, level 0 with dict, zstdcli, 43170 -github, level 1, zstdcli, 145457 -github, level 1 with dict, zstdcli, 43682 -github, level 3, zstdcli, 138397 -github, level 3 with dict, zstdcli, 43170 -github, level 4, zstdcli, 138144 -github, level 4 with dict, zstdcli, 43306 -github, level 5, zstdcli, 137106 -github, level 5 with dict, zstdcli, 40938 -github, level 6, zstdcli, 137108 -github, level 6 with dict, zstdcli, 40632 -github, level 7, zstdcli, 137108 -github, level 7 with dict, zstdcli, 40766 -github, level 9, zstdcli, 137108 -github, level 9 with dict, zstdcli, 41326 -github, level 13, zstdcli, 135717 -github, level 13 with dict, zstdcli, 41716 -github, level 16, zstdcli, 135717 -github, level 16 with dict, zstdcli, 39577 -github, level 19, zstdcli, 135717 -github, level 19 with dict, zstdcli, 39576 -github, long distance mode, zstdcli, 138397 -github, multithreaded, zstdcli, 138397 -github, multithreaded long distance mode, zstdcli, 138397 -github, small window log, zstdcli, 138397 -github, small hash log, zstdcli, 137467 -github, small chain log, zstdcli, 138314 -github, explicit params, zstdcli, 136140 -silesia, level -5, advanced one pass, 7152294 -silesia, level -3, advanced one pass, 6789969 -silesia, level -1, advanced one pass, 6191548 -silesia, level 0, advanced one pass, 4862377 -silesia, level 1, advanced one pass, 5318036 -silesia, level 3, advanced one pass, 4862377 -silesia, level 4, advanced one pass, 4800629 -silesia, level 5, advanced one pass, 4710178 -silesia, level 6, advanced one pass, 4659996 -silesia, level 7, advanced one pass, 4596234 -silesia, level 9, advanced one pass, 4543862 -silesia, level 13, advanced one pass, 4482073 -silesia, level 16, advanced one pass, 4377391 -silesia, level 19, advanced one pass, 4293262 -silesia, no source size, advanced one pass, 4862377 -silesia, long distance mode, advanced one pass, 4853389 -silesia, multithreaded, advanced one pass, 4862377 -silesia, multithreaded long distance mode, advanced one pass, 4853389 -silesia, small window log, advanced one pass, 7126386 -silesia, small hash log, advanced one pass, 6554898 -silesia, small chain log, advanced one pass, 4931093 -silesia, explicit params, advanced one pass, 4815369 -silesia.tar, level -5, advanced one pass, 7160438 -silesia.tar, level -3, advanced one pass, 6789024 -silesia.tar, level -1, advanced one pass, 6195462 -silesia.tar, level 0, advanced one pass, 4875008 -silesia.tar, level 1, advanced one pass, 5339697 -silesia.tar, level 3, advanced one pass, 4875008 -silesia.tar, level 4, advanced one pass, 4813507 -silesia.tar, level 5, advanced one pass, 4722235 -silesia.tar, level 6, advanced one pass, 4672194 -silesia.tar, level 7, advanced one pass, 4606658 -silesia.tar, level 9, advanced one pass, 4554098 -silesia.tar, level 13, advanced one pass, 4491702 -silesia.tar, level 16, advanced one pass, 4381277 -silesia.tar, level 19, advanced one pass, 4281514 -silesia.tar, no source size, advanced one pass, 4875008 -silesia.tar, long distance mode, advanced one pass, 4861218 -silesia.tar, multithreaded, advanced one pass, 4874631 -silesia.tar, multithreaded long distance mode, advanced one pass, 4860683 -silesia.tar, small window log, advanced one pass, 7130394 -silesia.tar, small hash log, advanced one pass, 6587833 -silesia.tar, small chain log, advanced one pass, 4943255 -silesia.tar, explicit params, advanced one pass, 4829974 -github, level -5, advanced one pass, 232744 -github, level -5 with dict, advanced one pass, 46718 -github, level -3, advanced one pass, 220611 -github, level -3 with dict, advanced one pass, 45395 -github, level -1, advanced one pass, 176575 -github, level -1 with dict, advanced one pass, 43170 -github, level 0, advanced one pass, 136397 -github, level 0 with dict, advanced one pass, 41170 -github, level 1, advanced one pass, 143457 -github, level 1 with dict, advanced one pass, 41682 -github, level 3, advanced one pass, 136397 -github, level 3 with dict, advanced one pass, 41170 -github, level 4, advanced one pass, 136144 -github, level 4 with dict, advanced one pass, 41306 -github, level 5, advanced one pass, 135106 -github, level 5 with dict, advanced one pass, 38938 -github, level 6, advanced one pass, 135108 -github, level 6 with dict, advanced one pass, 38632 -github, level 7, advanced one pass, 135108 -github, level 7 with dict, advanced one pass, 38766 -github, level 9, advanced one pass, 135108 -github, level 9 with dict, advanced one pass, 39326 -github, level 13, advanced one pass, 133717 -github, level 13 with dict, advanced one pass, 39716 -github, level 16, advanced one pass, 133717 -github, level 16 with dict, advanced one pass, 37577 -github, level 19, advanced one pass, 133717 -github, level 19 with dict, advanced one pass, 37576 -github, no source size, advanced one pass, 136397 -github, long distance mode, advanced one pass, 136397 -github, multithreaded, advanced one pass, 136397 -github, multithreaded long distance mode, advanced one pass, 136397 -github, small window log, advanced one pass, 136397 -github, small hash log, advanced one pass, 135467 -github, small chain log, advanced one pass, 136314 -github, explicit params, advanced one pass, 137670 -silesia, level -5, advanced one pass small out, 7152294 -silesia, level -3, advanced one pass small out, 6789969 -silesia, level -1, advanced one pass small out, 6191548 -silesia, level 0, advanced one pass small out, 4862377 -silesia, level 1, advanced one pass small out, 5318036 -silesia, level 3, advanced one pass small out, 4862377 -silesia, level 4, advanced one pass small out, 4800629 -silesia, level 5, advanced one pass small out, 4710178 -silesia, level 6, advanced one pass small out, 4659996 -silesia, level 7, advanced one pass small out, 4596234 -silesia, level 9, advanced one pass small out, 4543862 -silesia, level 13, advanced one pass small out, 4482073 -silesia, level 16, advanced one pass small out, 4377391 -silesia, level 19, advanced one pass small out, 4293262 -silesia, no source size, advanced one pass small out, 4862377 -silesia, long distance mode, advanced one pass small out, 4853389 -silesia, multithreaded, advanced one pass small out, 4862377 -silesia, multithreaded long distance mode, advanced one pass small out, 4853389 -silesia, small window log, advanced one pass small out, 7126386 -silesia, small hash log, advanced one pass small out, 6554898 -silesia, small chain log, advanced one pass small out, 4931093 -silesia, explicit params, advanced one pass small out, 4815369 -silesia.tar, level -5, advanced one pass small out, 7160438 -silesia.tar, level -3, advanced one pass small out, 6789024 -silesia.tar, level -1, advanced one pass small out, 6195462 -silesia.tar, level 0, advanced one pass small out, 4875008 -silesia.tar, level 1, advanced one pass small out, 5339697 -silesia.tar, level 3, advanced one pass small out, 4875008 -silesia.tar, level 4, advanced one pass small out, 4813507 -silesia.tar, level 5, advanced one pass small out, 4722235 -silesia.tar, level 6, advanced one pass small out, 4672194 -silesia.tar, level 7, advanced one pass small out, 4606658 -silesia.tar, level 9, advanced one pass small out, 4554098 -silesia.tar, level 13, advanced one pass small out, 4491702 -silesia.tar, level 16, advanced one pass small out, 4381277 -silesia.tar, level 19, advanced one pass small out, 4281514 -silesia.tar, no source size, advanced one pass small out, 4875008 -silesia.tar, long distance mode, advanced one pass small out, 4861218 -silesia.tar, multithreaded, advanced one pass small out, 4874631 -silesia.tar, multithreaded long distance mode, advanced one pass small out, 4860683 -silesia.tar, small window log, advanced one pass small out, 7130394 -silesia.tar, small hash log, advanced one pass small out, 6587833 -silesia.tar, small chain log, advanced one pass small out, 4943255 -silesia.tar, explicit params, advanced one pass small out, 4829974 -github, level -5, advanced one pass small out, 232744 -github, level -5 with dict, advanced one pass small out, 46718 -github, level -3, advanced one pass small out, 220611 -github, level -3 with dict, advanced one pass small out, 45395 -github, level -1, advanced one pass small out, 176575 -github, level -1 with dict, advanced one pass small out, 43170 -github, level 0, advanced one pass small out, 136397 -github, level 0 with dict, advanced one pass small out, 41170 -github, level 1, advanced one pass small out, 143457 -github, level 1 with dict, advanced one pass small out, 41682 -github, level 3, advanced one pass small out, 136397 -github, level 3 with dict, advanced one pass small out, 41170 -github, level 4, advanced one pass small out, 136144 -github, level 4 with dict, advanced one pass small out, 41306 -github, level 5, advanced one pass small out, 135106 -github, level 5 with dict, advanced one pass small out, 38938 -github, level 6, advanced one pass small out, 135108 -github, level 6 with dict, advanced one pass small out, 38632 -github, level 7, advanced one pass small out, 135108 -github, level 7 with dict, advanced one pass small out, 38766 -github, level 9, advanced one pass small out, 135108 -github, level 9 with dict, advanced one pass small out, 39326 -github, level 13, advanced one pass small out, 133717 -github, level 13 with dict, advanced one pass small out, 39716 -github, level 16, advanced one pass small out, 133717 -github, level 16 with dict, advanced one pass small out, 37577 -github, level 19, advanced one pass small out, 133717 -github, level 19 with dict, advanced one pass small out, 37576 -github, no source size, advanced one pass small out, 136397 -github, long distance mode, advanced one pass small out, 136397 -github, multithreaded, advanced one pass small out, 136397 -github, multithreaded long distance mode, advanced one pass small out, 136397 -github, small window log, advanced one pass small out, 136397 -github, small hash log, advanced one pass small out, 135467 -github, small chain log, advanced one pass small out, 136314 -github, explicit params, advanced one pass small out, 137670 -silesia, level -5, advanced streaming, 7152294 -silesia, level -3, advanced streaming, 6789973 -silesia, level -1, advanced streaming, 6191549 -silesia, level 0, advanced streaming, 4862377 -silesia, level 1, advanced streaming, 5318036 -silesia, level 3, advanced streaming, 4862377 -silesia, level 4, advanced streaming, 4800629 -silesia, level 5, advanced streaming, 4710178 -silesia, level 6, advanced streaming, 4659996 -silesia, level 7, advanced streaming, 4596234 -silesia, level 9, advanced streaming, 4543862 -silesia, level 13, advanced streaming, 4482073 -silesia, level 16, advanced streaming, 4377391 -silesia, level 19, advanced streaming, 4293262 -silesia, no source size, advanced streaming, 4862341 -silesia, long distance mode, advanced streaming, 4853389 -silesia, multithreaded, advanced streaming, 4862377 -silesia, multithreaded long distance mode, advanced streaming, 4853389 -silesia, small window log, advanced streaming, 7126389 -silesia, small hash log, advanced streaming, 6554898 -silesia, small chain log, advanced streaming, 4931093 -silesia, explicit params, advanced streaming, 4815380 -silesia.tar, level -5, advanced streaming, 7160440 -silesia.tar, level -3, advanced streaming, 6789026 -silesia.tar, level -1, advanced streaming, 6195465 -silesia.tar, level 0, advanced streaming, 4875010 -silesia.tar, level 1, advanced streaming, 5339701 -silesia.tar, level 3, advanced streaming, 4875010 -silesia.tar, level 4, advanced streaming, 4813507 -silesia.tar, level 5, advanced streaming, 4722240 -silesia.tar, level 6, advanced streaming, 4672203 -silesia.tar, level 7, advanced streaming, 4606658 -silesia.tar, level 9, advanced streaming, 4554105 -silesia.tar, level 13, advanced streaming, 4491703 -silesia.tar, level 16, advanced streaming, 4381277 -silesia.tar, level 19, advanced streaming, 4281514 -silesia.tar, no source size, advanced streaming, 4875006 -silesia.tar, long distance mode, advanced streaming, 4861218 -silesia.tar, multithreaded, advanced streaming, 4875132 -silesia.tar, multithreaded long distance mode, advanced streaming, 4866971 -silesia.tar, small window log, advanced streaming, 7130394 -silesia.tar, small hash log, advanced streaming, 6587834 -silesia.tar, small chain log, advanced streaming, 4943260 -silesia.tar, explicit params, advanced streaming, 4830002 -github, level -5, advanced streaming, 232744 -github, level -5 with dict, advanced streaming, 46718 -github, level -3, advanced streaming, 220611 -github, level -3 with dict, advanced streaming, 45395 -github, level -1, advanced streaming, 176575 -github, level -1 with dict, advanced streaming, 43170 -github, level 0, advanced streaming, 136397 -github, level 0 with dict, advanced streaming, 41170 -github, level 1, advanced streaming, 143457 -github, level 1 with dict, advanced streaming, 41682 -github, level 3, advanced streaming, 136397 -github, level 3 with dict, advanced streaming, 41170 -github, level 4, advanced streaming, 136144 -github, level 4 with dict, advanced streaming, 41306 -github, level 5, advanced streaming, 135106 -github, level 5 with dict, advanced streaming, 38938 -github, level 6, advanced streaming, 135108 -github, level 6 with dict, advanced streaming, 38632 -github, level 7, advanced streaming, 135108 -github, level 7 with dict, advanced streaming, 38766 -github, level 9, advanced streaming, 135108 -github, level 9 with dict, advanced streaming, 39326 -github, level 13, advanced streaming, 133717 -github, level 13 with dict, advanced streaming, 39716 -github, level 16, advanced streaming, 133717 -github, level 16 with dict, advanced streaming, 37577 -github, level 19, advanced streaming, 133717 -github, level 19 with dict, advanced streaming, 37576 -github, no source size, advanced streaming, 136397 -github, long distance mode, advanced streaming, 136397 -github, multithreaded, advanced streaming, 136397 -github, multithreaded long distance mode, advanced streaming, 136397 -github, small window log, advanced streaming, 136397 -github, small hash log, advanced streaming, 135467 -github, small chain log, advanced streaming, 136314 -github, explicit params, advanced streaming, 137670 -silesia, level -5, old streaming, 7152294 -silesia, level -3, old streaming, 6789973 -silesia, level -1, old streaming, 6191549 -silesia, level 0, old streaming, 4862377 -silesia, level 1, old streaming, 5318036 -silesia, level 3, old streaming, 4862377 -silesia, level 4, old streaming, 4800629 -silesia, level 5, old streaming, 4710178 -silesia, level 6, old streaming, 4659996 -silesia, level 7, old streaming, 4596234 -silesia, level 9, old streaming, 4543862 -silesia, level 13, old streaming, 4482073 -silesia, level 16, old streaming, 4377391 -silesia, level 19, old streaming, 4293262 -silesia, no source size, old streaming, 4862341 -silesia.tar, level -5, old streaming, 7160440 -silesia.tar, level -3, old streaming, 6789026 -silesia.tar, level -1, old streaming, 6195465 -silesia.tar, level 0, old streaming, 4875010 -silesia.tar, level 1, old streaming, 5339701 -silesia.tar, level 3, old streaming, 4875010 -silesia.tar, level 4, old streaming, 4813507 -silesia.tar, level 5, old streaming, 4722240 -silesia.tar, level 6, old streaming, 4672203 -silesia.tar, level 7, old streaming, 4606658 -silesia.tar, level 9, old streaming, 4554105 -silesia.tar, level 13, old streaming, 4491703 -silesia.tar, level 16, old streaming, 4381277 -silesia.tar, level 19, old streaming, 4281514 -silesia.tar, no source size, old streaming, 4875006 -github, level -5, old streaming, 232744 -github, level -5 with dict, old streaming, 46718 -github, level -3, old streaming, 220611 -github, level -3 with dict, old streaming, 45395 -github, level -1, old streaming, 176575 -github, level -1 with dict, old streaming, 43170 -github, level 0, old streaming, 136397 -github, level 0 with dict, old streaming, 41170 -github, level 1, old streaming, 143457 -github, level 1 with dict, old streaming, 41682 -github, level 3, old streaming, 136397 -github, level 3 with dict, old streaming, 41170 -github, level 4, old streaming, 136144 -github, level 4 with dict, old streaming, 41306 -github, level 5, old streaming, 135106 -github, level 5 with dict, old streaming, 38938 -github, level 6, old streaming, 135108 -github, level 6 with dict, old streaming, 38632 -github, level 7, old streaming, 135108 -github, level 7 with dict, old streaming, 38766 -github, level 9, old streaming, 135108 -github, level 9 with dict, old streaming, 39326 -github, level 13, old streaming, 133717 -github, level 13 with dict, old streaming, 39716 -github, level 16, old streaming, 133717 -github, level 16 with dict, old streaming, 37577 -github, level 19, old streaming, 133717 -github, level 19 with dict, old streaming, 37576 -github, no source size, old streaming, 141003 +Data, Config, Method, Total compressed size +silesia.tar, level -5, compress simple, 6738558 +silesia.tar, level -3, compress simple, 6446362 +silesia.tar, level -1, compress simple, 6186038 +silesia.tar, level 0, compress simple, 4875008 +silesia.tar, level 1, compress simple, 5334825 +silesia.tar, level 3, compress simple, 4875008 +silesia.tar, level 4, compress simple, 4813507 +silesia.tar, level 5, compress simple, 4722235 +silesia.tar, level 6, compress simple, 4672194 +silesia.tar, level 7, compress simple, 4606658 +silesia.tar, level 9, compress simple, 4554098 +silesia.tar, level 13, compress simple, 4491702 +silesia.tar, level 16, compress simple, 4381277 +silesia.tar, level 19, compress simple, 4281514 +silesia.tar, uncompressed literals, compress simple, 4875008 +silesia.tar, uncompressed literals optimal, compress simple, 4281514 +silesia.tar, huffman literals, compress simple, 6186038 +silesia, level -5, compress cctx, 6737567 +silesia, level -3, compress cctx, 6444663 +silesia, level -1, compress cctx, 6178442 +silesia, level 0, compress cctx, 4862377 +silesia, level 1, compress cctx, 5313144 +silesia, level 3, compress cctx, 4862377 +silesia, level 4, compress cctx, 4800629 +silesia, level 5, compress cctx, 4710178 +silesia, level 6, compress cctx, 4659996 +silesia, level 7, compress cctx, 4596234 +silesia, level 9, compress cctx, 4543862 +silesia, level 13, compress cctx, 4482073 +silesia, level 16, compress cctx, 4377391 +silesia, level 19, compress cctx, 4293262 +silesia, long distance mode, compress cctx, 4862377 +silesia, multithreaded, compress cctx, 4862377 +silesia, multithreaded long distance mode, compress cctx, 4862377 +silesia, small window log, compress cctx, 7115734 +silesia, small hash log, compress cctx, 6554898 +silesia, small chain log, compress cctx, 4931093 +silesia, explicit params, compress cctx, 4813352 +silesia, uncompressed literals, compress cctx, 4862377 +silesia, uncompressed literals optimal, compress cctx, 4293262 +silesia, huffman literals, compress cctx, 6178442 +silesia, multithreaded with advanced params, compress cctx, 4862377 +github, level -5, compress cctx, 205285 +github, level -5 with dict, compress cctx, 47294 +github, level -3, compress cctx, 190643 +github, level -3 with dict, compress cctx, 48047 +github, level -1, compress cctx, 175568 +github, level -1 with dict, compress cctx, 43527 +github, level 0, compress cctx, 136397 +github, level 0 with dict, compress cctx, 41536 +github, level 1, compress cctx, 142450 +github, level 1 with dict, compress cctx, 42157 +github, level 3, compress cctx, 136397 +github, level 3 with dict, compress cctx, 41536 +github, level 4, compress cctx, 136144 +github, level 4 with dict, compress cctx, 41721 +github, level 5, compress cctx, 135106 +github, level 5 with dict, compress cctx, 38934 +github, level 6, compress cctx, 135108 +github, level 6 with dict, compress cctx, 38628 +github, level 7, compress cctx, 135108 +github, level 7 with dict, compress cctx, 38741 +github, level 9, compress cctx, 135108 +github, level 9 with dict, compress cctx, 39335 +github, level 13, compress cctx, 133717 +github, level 13 with dict, compress cctx, 39923 +github, level 16, compress cctx, 133717 +github, level 16 with dict, compress cctx, 37568 +github, level 19, compress cctx, 133717 +github, level 19 with dict, compress cctx, 37567 +github, long distance mode, compress cctx, 141473 +github, multithreaded, compress cctx, 141473 +github, multithreaded long distance mode, compress cctx, 141473 +github, small window log, compress cctx, 141473 +github, small hash log, compress cctx, 138943 +github, small chain log, compress cctx, 139239 +github, explicit params, compress cctx, 140924 +github, uncompressed literals, compress cctx, 136397 +github, uncompressed literals optimal, compress cctx, 133717 +github, huffman literals, compress cctx, 175568 +github, multithreaded with advanced params, compress cctx, 141473 +silesia, level -5, zstdcli, 6882514 +silesia, level -3, zstdcli, 6568406 +silesia, level -1, zstdcli, 6183433 +silesia, level 0, zstdcli, 4862425 +silesia, level 1, zstdcli, 5314157 +silesia, level 3, zstdcli, 4862425 +silesia, level 4, zstdcli, 4800677 +silesia, level 5, zstdcli, 4710226 +silesia, level 6, zstdcli, 4660044 +silesia, level 7, zstdcli, 4596282 +silesia, level 9, zstdcli, 4543910 +silesia, level 13, zstdcli, 4482121 +silesia, level 16, zstdcli, 4377439 +silesia, level 19, zstdcli, 4293310 +silesia, long distance mode, zstdcli, 4853437 +silesia, multithreaded, zstdcli, 4862425 +silesia, multithreaded long distance mode, zstdcli, 4853437 +silesia, small window log, zstdcli, 7126434 +silesia, small hash log, zstdcli, 6554946 +silesia, small chain log, zstdcli, 4931141 +silesia, explicit params, zstdcli, 4815380 +silesia, uncompressed literals, zstdcli, 5155472 +silesia, uncompressed literals optimal, zstdcli, 4325475 +silesia, huffman literals, zstdcli, 5331158 +silesia, multithreaded with advanced params, zstdcli, 5155472 +silesia.tar, level -5, zstdcli, 6738906 +silesia.tar, level -3, zstdcli, 6448409 +silesia.tar, level -1, zstdcli, 6186908 +silesia.tar, level 0, zstdcli, 4875136 +silesia.tar, level 1, zstdcli, 5336255 +silesia.tar, level 3, zstdcli, 4875136 +silesia.tar, level 4, zstdcli, 4814531 +silesia.tar, level 5, zstdcli, 4723284 +silesia.tar, level 6, zstdcli, 4673591 +silesia.tar, level 7, zstdcli, 4608342 +silesia.tar, level 9, zstdcli, 4554700 +silesia.tar, level 13, zstdcli, 4491706 +silesia.tar, level 16, zstdcli, 4381281 +silesia.tar, level 19, zstdcli, 4281518 +silesia.tar, no source size, zstdcli, 4875132 +silesia.tar, long distance mode, zstdcli, 4866975 +silesia.tar, multithreaded, zstdcli, 4875136 +silesia.tar, multithreaded long distance mode, zstdcli, 4866975 +silesia.tar, small window log, zstdcli, 7130434 +silesia.tar, small hash log, zstdcli, 6587841 +silesia.tar, small chain log, zstdcli, 4943259 +silesia.tar, explicit params, zstdcli, 4839202 +silesia.tar, uncompressed literals, zstdcli, 5158134 +silesia.tar, uncompressed literals optimal, zstdcli, 4321098 +silesia.tar, huffman literals, zstdcli, 5347560 +silesia.tar, multithreaded with advanced params, zstdcli, 5158134 +github, level -5, zstdcli, 207285 +github, level -5 with dict, zstdcli, 48718 +github, level -3, zstdcli, 192643 +github, level -3 with dict, zstdcli, 47395 +github, level -1, zstdcli, 177568 +github, level -1 with dict, zstdcli, 45170 +github, level 0, zstdcli, 138397 +github, level 0 with dict, zstdcli, 43170 +github, level 1, zstdcli, 144450 +github, level 1 with dict, zstdcli, 43682 +github, level 3, zstdcli, 138397 +github, level 3 with dict, zstdcli, 43170 +github, level 4, zstdcli, 138144 +github, level 4 with dict, zstdcli, 43306 +github, level 5, zstdcli, 137106 +github, level 5 with dict, zstdcli, 40938 +github, level 6, zstdcli, 137108 +github, level 6 with dict, zstdcli, 40632 +github, level 7, zstdcli, 137108 +github, level 7 with dict, zstdcli, 40766 +github, level 9, zstdcli, 137108 +github, level 9 with dict, zstdcli, 41326 +github, level 13, zstdcli, 135717 +github, level 13 with dict, zstdcli, 41716 +github, level 16, zstdcli, 135717 +github, level 16 with dict, zstdcli, 39577 +github, level 19, zstdcli, 135717 +github, level 19 with dict, zstdcli, 39576 +github, long distance mode, zstdcli, 138397 +github, multithreaded, zstdcli, 138397 +github, multithreaded long distance mode, zstdcli, 138397 +github, small window log, zstdcli, 138397 +github, small hash log, zstdcli, 137467 +github, small chain log, zstdcli, 138314 +github, explicit params, zstdcli, 136140 +github, uncompressed literals, zstdcli, 169004 +github, uncompressed literals optimal, zstdcli, 158824 +github, huffman literals, zstdcli, 144450 +github, multithreaded with advanced params, zstdcli, 169004 +silesia, level -5, advanced one pass, 6737567 +silesia, level -3, advanced one pass, 6444663 +silesia, level -1, advanced one pass, 6178442 +silesia, level 0, advanced one pass, 4862377 +silesia, level 1, advanced one pass, 5313144 +silesia, level 3, advanced one pass, 4862377 +silesia, level 4, advanced one pass, 4800629 +silesia, level 5, advanced one pass, 4710178 +silesia, level 6, advanced one pass, 4659996 +silesia, level 7, advanced one pass, 4596234 +silesia, level 9, advanced one pass, 4543862 +silesia, level 13, advanced one pass, 4482073 +silesia, level 16, advanced one pass, 4377391 +silesia, level 19, advanced one pass, 4293262 +silesia, no source size, advanced one pass, 4862377 +silesia, long distance mode, advanced one pass, 4853389 +silesia, multithreaded, advanced one pass, 4862377 +silesia, multithreaded long distance mode, advanced one pass, 4853389 +silesia, small window log, advanced one pass, 7126386 +silesia, small hash log, advanced one pass, 6554898 +silesia, small chain log, advanced one pass, 4931093 +silesia, explicit params, advanced one pass, 4815369 +silesia, uncompressed literals, advanced one pass, 5155424 +silesia, uncompressed literals optimal, advanced one pass, 4325427 +silesia, huffman literals, advanced one pass, 5326210 +silesia, multithreaded with advanced params, advanced one pass, 5155424 +silesia.tar, level -5, advanced one pass, 6738558 +silesia.tar, level -3, advanced one pass, 6446362 +silesia.tar, level -1, advanced one pass, 6186038 +silesia.tar, level 0, advanced one pass, 4875008 +silesia.tar, level 1, advanced one pass, 5334825 +silesia.tar, level 3, advanced one pass, 4875008 +silesia.tar, level 4, advanced one pass, 4813507 +silesia.tar, level 5, advanced one pass, 4722235 +silesia.tar, level 6, advanced one pass, 4672194 +silesia.tar, level 7, advanced one pass, 4606658 +silesia.tar, level 9, advanced one pass, 4554098 +silesia.tar, level 13, advanced one pass, 4491702 +silesia.tar, level 16, advanced one pass, 4381277 +silesia.tar, level 19, advanced one pass, 4281514 +silesia.tar, no source size, advanced one pass, 4875008 +silesia.tar, long distance mode, advanced one pass, 4861218 +silesia.tar, multithreaded, advanced one pass, 4874631 +silesia.tar, multithreaded long distance mode, advanced one pass, 4860683 +silesia.tar, small window log, advanced one pass, 7130394 +silesia.tar, small hash log, advanced one pass, 6587833 +silesia.tar, small chain log, advanced one pass, 4943255 +silesia.tar, explicit params, advanced one pass, 4829974 +silesia.tar, uncompressed literals, advanced one pass, 5157992 +silesia.tar, uncompressed literals optimal, advanced one pass, 4321094 +silesia.tar, huffman literals, advanced one pass, 5347283 +silesia.tar, multithreaded with advanced params, advanced one pass, 5158545 +github, level -5, advanced one pass, 205285 +github, level -5 with dict, advanced one pass, 46718 +github, level -3, advanced one pass, 190643 +github, level -3 with dict, advanced one pass, 45395 +github, level -1, advanced one pass, 175568 +github, level -1 with dict, advanced one pass, 43170 +github, level 0, advanced one pass, 136397 +github, level 0 with dict, advanced one pass, 41170 +github, level 1, advanced one pass, 142450 +github, level 1 with dict, advanced one pass, 41682 +github, level 3, advanced one pass, 136397 +github, level 3 with dict, advanced one pass, 41170 +github, level 4, advanced one pass, 136144 +github, level 4 with dict, advanced one pass, 41306 +github, level 5, advanced one pass, 135106 +github, level 5 with dict, advanced one pass, 38938 +github, level 6, advanced one pass, 135108 +github, level 6 with dict, advanced one pass, 38632 +github, level 7, advanced one pass, 135108 +github, level 7 with dict, advanced one pass, 38766 +github, level 9, advanced one pass, 135108 +github, level 9 with dict, advanced one pass, 39326 +github, level 13, advanced one pass, 133717 +github, level 13 with dict, advanced one pass, 39716 +github, level 16, advanced one pass, 133717 +github, level 16 with dict, advanced one pass, 37577 +github, level 19, advanced one pass, 133717 +github, level 19 with dict, advanced one pass, 37576 +github, no source size, advanced one pass, 136397 +github, long distance mode, advanced one pass, 136397 +github, multithreaded, advanced one pass, 136397 +github, multithreaded long distance mode, advanced one pass, 136397 +github, small window log, advanced one pass, 136397 +github, small hash log, advanced one pass, 135467 +github, small chain log, advanced one pass, 136314 +github, explicit params, advanced one pass, 137670 +github, uncompressed literals, advanced one pass, 167004 +github, uncompressed literals optimal, advanced one pass, 156824 +github, huffman literals, advanced one pass, 142450 +github, multithreaded with advanced params, advanced one pass, 167004 +silesia, level -5, advanced one pass small out, 6737567 +silesia, level -3, advanced one pass small out, 6444663 +silesia, level -1, advanced one pass small out, 6178442 +silesia, level 0, advanced one pass small out, 4862377 +silesia, level 1, advanced one pass small out, 5313144 +silesia, level 3, advanced one pass small out, 4862377 +silesia, level 4, advanced one pass small out, 4800629 +silesia, level 5, advanced one pass small out, 4710178 +silesia, level 6, advanced one pass small out, 4659996 +silesia, level 7, advanced one pass small out, 4596234 +silesia, level 9, advanced one pass small out, 4543862 +silesia, level 13, advanced one pass small out, 4482073 +silesia, level 16, advanced one pass small out, 4377391 +silesia, level 19, advanced one pass small out, 4293262 +silesia, no source size, advanced one pass small out, 4862377 +silesia, long distance mode, advanced one pass small out, 4853389 +silesia, multithreaded, advanced one pass small out, 4862377 +silesia, multithreaded long distance mode, advanced one pass small out, 4853389 +silesia, small window log, advanced one pass small out, 7126386 +silesia, small hash log, advanced one pass small out, 6554898 +silesia, small chain log, advanced one pass small out, 4931093 +silesia, explicit params, advanced one pass small out, 4815369 +silesia, uncompressed literals, advanced one pass small out, 5155424 +silesia, uncompressed literals optimal, advanced one pass small out, 4325427 +silesia, huffman literals, advanced one pass small out, 5326210 +silesia, multithreaded with advanced params, advanced one pass small out, 5155424 +silesia.tar, level -5, advanced one pass small out, 6738558 +silesia.tar, level -3, advanced one pass small out, 6446362 +silesia.tar, level -1, advanced one pass small out, 6186038 +silesia.tar, level 0, advanced one pass small out, 4875008 +silesia.tar, level 1, advanced one pass small out, 5334825 +silesia.tar, level 3, advanced one pass small out, 4875008 +silesia.tar, level 4, advanced one pass small out, 4813507 +silesia.tar, level 5, advanced one pass small out, 4722235 +silesia.tar, level 6, advanced one pass small out, 4672194 +silesia.tar, level 7, advanced one pass small out, 4606658 +silesia.tar, level 9, advanced one pass small out, 4554098 +silesia.tar, level 13, advanced one pass small out, 4491702 +silesia.tar, level 16, advanced one pass small out, 4381277 +silesia.tar, level 19, advanced one pass small out, 4281514 +silesia.tar, no source size, advanced one pass small out, 4875008 +silesia.tar, long distance mode, advanced one pass small out, 4861218 +silesia.tar, multithreaded, advanced one pass small out, 4874631 +silesia.tar, multithreaded long distance mode, advanced one pass small out, 4860683 +silesia.tar, small window log, advanced one pass small out, 7130394 +silesia.tar, small hash log, advanced one pass small out, 6587833 +silesia.tar, small chain log, advanced one pass small out, 4943255 +silesia.tar, explicit params, advanced one pass small out, 4829974 +silesia.tar, uncompressed literals, advanced one pass small out, 5157992 +silesia.tar, uncompressed literals optimal, advanced one pass small out, 4321094 +silesia.tar, huffman literals, advanced one pass small out, 5347283 +silesia.tar, multithreaded with advanced params, advanced one pass small out, 5158545 +github, level -5, advanced one pass small out, 205285 +github, level -5 with dict, advanced one pass small out, 46718 +github, level -3, advanced one pass small out, 190643 +github, level -3 with dict, advanced one pass small out, 45395 +github, level -1, advanced one pass small out, 175568 +github, level -1 with dict, advanced one pass small out, 43170 +github, level 0, advanced one pass small out, 136397 +github, level 0 with dict, advanced one pass small out, 41170 +github, level 1, advanced one pass small out, 142450 +github, level 1 with dict, advanced one pass small out, 41682 +github, level 3, advanced one pass small out, 136397 +github, level 3 with dict, advanced one pass small out, 41170 +github, level 4, advanced one pass small out, 136144 +github, level 4 with dict, advanced one pass small out, 41306 +github, level 5, advanced one pass small out, 135106 +github, level 5 with dict, advanced one pass small out, 38938 +github, level 6, advanced one pass small out, 135108 +github, level 6 with dict, advanced one pass small out, 38632 +github, level 7, advanced one pass small out, 135108 +github, level 7 with dict, advanced one pass small out, 38766 +github, level 9, advanced one pass small out, 135108 +github, level 9 with dict, advanced one pass small out, 39326 +github, level 13, advanced one pass small out, 133717 +github, level 13 with dict, advanced one pass small out, 39716 +github, level 16, advanced one pass small out, 133717 +github, level 16 with dict, advanced one pass small out, 37577 +github, level 19, advanced one pass small out, 133717 +github, level 19 with dict, advanced one pass small out, 37576 +github, no source size, advanced one pass small out, 136397 +github, long distance mode, advanced one pass small out, 136397 +github, multithreaded, advanced one pass small out, 136397 +github, multithreaded long distance mode, advanced one pass small out, 136397 +github, small window log, advanced one pass small out, 136397 +github, small hash log, advanced one pass small out, 135467 +github, small chain log, advanced one pass small out, 136314 +github, explicit params, advanced one pass small out, 137670 +github, uncompressed literals, advanced one pass small out, 167004 +github, uncompressed literals optimal, advanced one pass small out, 156824 +github, huffman literals, advanced one pass small out, 142450 +github, multithreaded with advanced params, advanced one pass small out, 167004 +silesia, level -5, advanced streaming, 6882466 +silesia, level -3, advanced streaming, 6568358 +silesia, level -1, advanced streaming, 6183385 +silesia, level 0, advanced streaming, 4862377 +silesia, level 1, advanced streaming, 5314109 +silesia, level 3, advanced streaming, 4862377 +silesia, level 4, advanced streaming, 4800629 +silesia, level 5, advanced streaming, 4710178 +silesia, level 6, advanced streaming, 4659996 +silesia, level 7, advanced streaming, 4596234 +silesia, level 9, advanced streaming, 4543862 +silesia, level 13, advanced streaming, 4482073 +silesia, level 16, advanced streaming, 4377391 +silesia, level 19, advanced streaming, 4293262 +silesia, no source size, advanced streaming, 4862341 +silesia, long distance mode, advanced streaming, 4853389 +silesia, multithreaded, advanced streaming, 4862377 +silesia, multithreaded long distance mode, advanced streaming, 4853389 +silesia, small window log, advanced streaming, 7126389 +silesia, small hash log, advanced streaming, 6554898 +silesia, small chain log, advanced streaming, 4931093 +silesia, explicit params, advanced streaming, 4815380 +silesia, uncompressed literals, advanced streaming, 5155424 +silesia, uncompressed literals optimal, advanced streaming, 4325427 +silesia, huffman literals, advanced streaming, 5331110 +silesia, multithreaded with advanced params, advanced streaming, 5155424 +silesia.tar, level -5, advanced streaming, 6982738 +silesia.tar, level -3, advanced streaming, 6641264 +silesia.tar, level -1, advanced streaming, 6190789 +silesia.tar, level 0, advanced streaming, 4875010 +silesia.tar, level 1, advanced streaming, 5336879 +silesia.tar, level 3, advanced streaming, 4875010 +silesia.tar, level 4, advanced streaming, 4813507 +silesia.tar, level 5, advanced streaming, 4722240 +silesia.tar, level 6, advanced streaming, 4672203 +silesia.tar, level 7, advanced streaming, 4606658 +silesia.tar, level 9, advanced streaming, 4554105 +silesia.tar, level 13, advanced streaming, 4491703 +silesia.tar, level 16, advanced streaming, 4381277 +silesia.tar, level 19, advanced streaming, 4281514 +silesia.tar, no source size, advanced streaming, 4875006 +silesia.tar, long distance mode, advanced streaming, 4861218 +silesia.tar, multithreaded, advanced streaming, 4875132 +silesia.tar, multithreaded long distance mode, advanced streaming, 4866971 +silesia.tar, small window log, advanced streaming, 7130394 +silesia.tar, small hash log, advanced streaming, 6587834 +silesia.tar, small chain log, advanced streaming, 4943260 +silesia.tar, explicit params, advanced streaming, 4830002 +silesia.tar, uncompressed literals, advanced streaming, 5157995 +silesia.tar, uncompressed literals optimal, advanced streaming, 4321094 +silesia.tar, huffman literals, advanced streaming, 5352306 +silesia.tar, multithreaded with advanced params, advanced streaming, 5158130 +github, level -5, advanced streaming, 205285 +github, level -5 with dict, advanced streaming, 46718 +github, level -3, advanced streaming, 190643 +github, level -3 with dict, advanced streaming, 45395 +github, level -1, advanced streaming, 175568 +github, level -1 with dict, advanced streaming, 43170 +github, level 0, advanced streaming, 136397 +github, level 0 with dict, advanced streaming, 41170 +github, level 1, advanced streaming, 142450 +github, level 1 with dict, advanced streaming, 41682 +github, level 3, advanced streaming, 136397 +github, level 3 with dict, advanced streaming, 41170 +github, level 4, advanced streaming, 136144 +github, level 4 with dict, advanced streaming, 41306 +github, level 5, advanced streaming, 135106 +github, level 5 with dict, advanced streaming, 38938 +github, level 6, advanced streaming, 135108 +github, level 6 with dict, advanced streaming, 38632 +github, level 7, advanced streaming, 135108 +github, level 7 with dict, advanced streaming, 38766 +github, level 9, advanced streaming, 135108 +github, level 9 with dict, advanced streaming, 39326 +github, level 13, advanced streaming, 133717 +github, level 13 with dict, advanced streaming, 39716 +github, level 16, advanced streaming, 133717 +github, level 16 with dict, advanced streaming, 37577 +github, level 19, advanced streaming, 133717 +github, level 19 with dict, advanced streaming, 37576 +github, no source size, advanced streaming, 136397 +github, long distance mode, advanced streaming, 136397 +github, multithreaded, advanced streaming, 136397 +github, multithreaded long distance mode, advanced streaming, 136397 +github, small window log, advanced streaming, 136397 +github, small hash log, advanced streaming, 135467 +github, small chain log, advanced streaming, 136314 +github, explicit params, advanced streaming, 137670 +github, uncompressed literals, advanced streaming, 167004 +github, uncompressed literals optimal, advanced streaming, 156824 +github, huffman literals, advanced streaming, 142450 +github, multithreaded with advanced params, advanced streaming, 167004 +silesia, level -5, old streaming, 6882466 +silesia, level -3, old streaming, 6568358 +silesia, level -1, old streaming, 6183385 +silesia, level 0, old streaming, 4862377 +silesia, level 1, old streaming, 5314109 +silesia, level 3, old streaming, 4862377 +silesia, level 4, old streaming, 4800629 +silesia, level 5, old streaming, 4710178 +silesia, level 6, old streaming, 4659996 +silesia, level 7, old streaming, 4596234 +silesia, level 9, old streaming, 4543862 +silesia, level 13, old streaming, 4482073 +silesia, level 16, old streaming, 4377391 +silesia, level 19, old streaming, 4293262 +silesia, no source size, old streaming, 4862341 +silesia, long distance mode, old streaming, 12000408 +silesia, multithreaded, old streaming, 12000408 +silesia, multithreaded long distance mode, old streaming, 12000408 +silesia, small window log, old streaming, 12000408 +silesia, small hash log, old streaming, 12000408 +silesia, small chain log, old streaming, 12000408 +silesia, explicit params, old streaming, 12000408 +silesia, uncompressed literals, old streaming, 4862377 +silesia, uncompressed literals optimal, old streaming, 4293262 +silesia, huffman literals, old streaming, 6183385 +silesia, multithreaded with advanced params, old streaming, 12000408 +silesia.tar, level -5, old streaming, 6982738 +silesia.tar, level -3, old streaming, 6641264 +silesia.tar, level -1, old streaming, 6190789 +silesia.tar, level 0, old streaming, 4875010 +silesia.tar, level 1, old streaming, 5336879 +silesia.tar, level 3, old streaming, 4875010 +silesia.tar, level 4, old streaming, 4813507 +silesia.tar, level 5, old streaming, 4722240 +silesia.tar, level 6, old streaming, 4672203 +silesia.tar, level 7, old streaming, 4606658 +silesia.tar, level 9, old streaming, 4554105 +silesia.tar, level 13, old streaming, 4491703 +silesia.tar, level 16, old streaming, 4381277 +silesia.tar, level 19, old streaming, 4281514 +silesia.tar, no source size, old streaming, 4875006 +silesia.tar, long distance mode, old streaming, 12022046 +silesia.tar, multithreaded, old streaming, 12022046 +silesia.tar, multithreaded long distance mode, old streaming, 12022046 +silesia.tar, small window log, old streaming, 12022046 +silesia.tar, small hash log, old streaming, 12022046 +silesia.tar, small chain log, old streaming, 12022046 +silesia.tar, explicit params, old streaming, 12022046 +silesia.tar, uncompressed literals, old streaming, 4875010 +silesia.tar, uncompressed literals optimal, old streaming, 4281514 +silesia.tar, huffman literals, old streaming, 6190789 +silesia.tar, multithreaded with advanced params, old streaming, 12022046 +github, level -5, old streaming, 205285 +github, level -5 with dict, old streaming, 46718 +github, level -3, old streaming, 190643 +github, level -3 with dict, old streaming, 45395 +github, level -1, old streaming, 175568 +github, level -1 with dict, old streaming, 43170 +github, level 0, old streaming, 136397 +github, level 0 with dict, old streaming, 41170 +github, level 1, old streaming, 142450 +github, level 1 with dict, old streaming, 41682 +github, level 3, old streaming, 136397 +github, level 3 with dict, old streaming, 41170 +github, level 4, old streaming, 136144 +github, level 4 with dict, old streaming, 41306 +github, level 5, old streaming, 135106 +github, level 5 with dict, old streaming, 38938 +github, level 6, old streaming, 135108 +github, level 6 with dict, old streaming, 38632 +github, level 7, old streaming, 135108 +github, level 7 with dict, old streaming, 38766 +github, level 9, old streaming, 135108 +github, level 9 with dict, old streaming, 39326 +github, level 13, old streaming, 133717 +github, level 13 with dict, old streaming, 39716 +github, level 16, old streaming, 133717 +github, level 16 with dict, old streaming, 37577 +github, level 19, old streaming, 133717 +github, level 19 with dict, old streaming, 37576 +github, no source size, old streaming, 141003 +github, long distance mode, old streaming, 412933 +github, multithreaded, old streaming, 412933 +github, multithreaded long distance mode, old streaming, 412933 +github, small window log, old streaming, 412933 +github, small hash log, old streaming, 412933 +github, small chain log, old streaming, 412933 +github, explicit params, old streaming, 412933 +github, uncompressed literals, old streaming, 136397 +github, uncompressed literals optimal, old streaming, 133717 +github, huffman literals, old streaming, 175568 +github, multithreaded with advanced params, old streaming, 412933 +silesia, level -5, old streaming advanced, 6882466 +silesia, level -3, old streaming advanced, 6568358 +silesia, level -1, old streaming advanced, 6183385 +silesia, level 0, old streaming advanced, 4862377 +silesia, level 1, old streaming advanced, 5314109 +silesia, level 3, old streaming advanced, 4862377 +silesia, level 4, old streaming advanced, 4800629 +silesia, level 5, old streaming advanced, 4710178 +silesia, level 6, old streaming advanced, 4659996 +silesia, level 7, old streaming advanced, 4596234 +silesia, level 9, old streaming advanced, 4543862 +silesia, level 13, old streaming advanced, 4482073 +silesia, level 16, old streaming advanced, 4377391 +silesia, level 19, old streaming advanced, 4293262 +silesia, no source size, old streaming advanced, 4862341 +silesia, long distance mode, old streaming advanced, 12000408 +silesia, multithreaded, old streaming advanced, 12000408 +silesia, multithreaded long distance mode, old streaming advanced, 12000408 +silesia, small window log, old streaming advanced, 12000408 +silesia, small hash log, old streaming advanced, 12000408 +silesia, small chain log, old streaming advanced, 12000408 +silesia, explicit params, old streaming advanced, 12000408 +silesia, uncompressed literals, old streaming advanced, 4862377 +silesia, uncompressed literals optimal, old streaming advanced, 4293262 +silesia, huffman literals, old streaming advanced, 6183385 +silesia, multithreaded with advanced params, old streaming advanced, 12000408 +silesia.tar, level -5, old streaming advanced, 6982738 +silesia.tar, level -3, old streaming advanced, 6641264 +silesia.tar, level -1, old streaming advanced, 6190789 +silesia.tar, level 0, old streaming advanced, 4875010 +silesia.tar, level 1, old streaming advanced, 5336879 +silesia.tar, level 3, old streaming advanced, 4875010 +silesia.tar, level 4, old streaming advanced, 4813507 +silesia.tar, level 5, old streaming advanced, 4722240 +silesia.tar, level 6, old streaming advanced, 4672203 +silesia.tar, level 7, old streaming advanced, 4606658 +silesia.tar, level 9, old streaming advanced, 4554105 +silesia.tar, level 13, old streaming advanced, 4491703 +silesia.tar, level 16, old streaming advanced, 4381277 +silesia.tar, level 19, old streaming advanced, 4281514 +silesia.tar, no source size, old streaming advanced, 4875006 +silesia.tar, long distance mode, old streaming advanced, 12022046 +silesia.tar, multithreaded, old streaming advanced, 12022046 +silesia.tar, multithreaded long distance mode, old streaming advanced, 12022046 +silesia.tar, small window log, old streaming advanced, 12022046 +silesia.tar, small hash log, old streaming advanced, 12022046 +silesia.tar, small chain log, old streaming advanced, 12022046 +silesia.tar, explicit params, old streaming advanced, 12022046 +silesia.tar, uncompressed literals, old streaming advanced, 4875010 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4281514 +silesia.tar, huffman literals, old streaming advanced, 6190789 +silesia.tar, multithreaded with advanced params, old streaming advanced, 12022046 +github, level -5, old streaming advanced, 205285 +github, level -5 with dict, old streaming advanced, 46718 +github, level -3, old streaming advanced, 190643 +github, level -3 with dict, old streaming advanced, 45395 +github, level -1, old streaming advanced, 175568 +github, level -1 with dict, old streaming advanced, 43170 +github, level 0, old streaming advanced, 136397 +github, level 0 with dict, old streaming advanced, 41170 +github, level 1, old streaming advanced, 142450 +github, level 1 with dict, old streaming advanced, 41682 +github, level 3, old streaming advanced, 136397 +github, level 3 with dict, old streaming advanced, 41170 +github, level 4, old streaming advanced, 136144 +github, level 4 with dict, old streaming advanced, 41306 +github, level 5, old streaming advanced, 135106 +github, level 5 with dict, old streaming advanced, 38938 +github, level 6, old streaming advanced, 135108 +github, level 6 with dict, old streaming advanced, 38632 +github, level 7, old streaming advanced, 135108 +github, level 7 with dict, old streaming advanced, 38766 +github, level 9, old streaming advanced, 135108 +github, level 9 with dict, old streaming advanced, 39326 +github, level 13, old streaming advanced, 133717 +github, level 13 with dict, old streaming advanced, 39716 +github, level 16, old streaming advanced, 133717 +github, level 16 with dict, old streaming advanced, 37577 +github, level 19, old streaming advanced, 133717 +github, level 19 with dict, old streaming advanced, 37576 +github, no source size, old streaming advanced, 141003 +github, long distance mode, old streaming advanced, 412933 +github, multithreaded, old streaming advanced, 412933 +github, multithreaded long distance mode, old streaming advanced, 412933 +github, small window log, old streaming advanced, 412933 +github, small hash log, old streaming advanced, 412933 +github, small chain log, old streaming advanced, 412933 +github, explicit params, old streaming advanced, 412933 +github, uncompressed literals, old streaming advanced, 136397 +github, uncompressed literals optimal, old streaming advanced, 133717 +github, huffman literals, old streaming advanced, 175568 +github, multithreaded with advanced params, old streaming advanced, 412933 +silesia, level -5, old streaming cdcit, 6882466 +silesia, level -3, old streaming cdcit, 6568358 +silesia, level -1, old streaming cdcit, 6183385 +silesia, level 0, old streaming cdcit, 4862377 +silesia, level 1, old streaming cdcit, 5314109 +silesia, level 3, old streaming cdcit, 4862377 +silesia, level 4, old streaming cdcit, 4800629 +silesia, level 5, old streaming cdcit, 4710178 +silesia, level 6, old streaming cdcit, 4659996 +silesia, level 7, old streaming cdcit, 4596234 +silesia, level 9, old streaming cdcit, 4543862 +silesia, level 13, old streaming cdcit, 4482073 +silesia, level 16, old streaming cdcit, 4377391 +silesia, level 19, old streaming cdcit, 4293262 +silesia, no source size, old streaming cdcit, 4862341 +silesia, long distance mode, old streaming cdcit, 12000408 +silesia, multithreaded, old streaming cdcit, 12000408 +silesia, multithreaded long distance mode, old streaming cdcit, 12000408 +silesia, small window log, old streaming cdcit, 12000408 +silesia, small hash log, old streaming cdcit, 12000408 +silesia, small chain log, old streaming cdcit, 12000408 +silesia, explicit params, old streaming cdcit, 12000408 +silesia, uncompressed literals, old streaming cdcit, 4862377 +silesia, uncompressed literals optimal, old streaming cdcit, 4293262 +silesia, huffman literals, old streaming cdcit, 6183385 +silesia, multithreaded with advanced params, old streaming cdcit, 12000408 +silesia.tar, level -5, old streaming cdcit, 6982738 +silesia.tar, level -3, old streaming cdcit, 6641264 +silesia.tar, level -1, old streaming cdcit, 6190789 +silesia.tar, level 0, old streaming cdcit, 4875010 +silesia.tar, level 1, old streaming cdcit, 5336879 +silesia.tar, level 3, old streaming cdcit, 4875010 +silesia.tar, level 4, old streaming cdcit, 4813507 +silesia.tar, level 5, old streaming cdcit, 4722240 +silesia.tar, level 6, old streaming cdcit, 4672203 +silesia.tar, level 7, old streaming cdcit, 4606658 +silesia.tar, level 9, old streaming cdcit, 4554105 +silesia.tar, level 13, old streaming cdcit, 4491703 +silesia.tar, level 16, old streaming cdcit, 4381277 +silesia.tar, level 19, old streaming cdcit, 4281514 +silesia.tar, no source size, old streaming cdcit, 4875006 +silesia.tar, long distance mode, old streaming cdcit, 12022046 +silesia.tar, multithreaded, old streaming cdcit, 12022046 +silesia.tar, multithreaded long distance mode, old streaming cdcit, 12022046 +silesia.tar, small window log, old streaming cdcit, 12022046 +silesia.tar, small hash log, old streaming cdcit, 12022046 +silesia.tar, small chain log, old streaming cdcit, 12022046 +silesia.tar, explicit params, old streaming cdcit, 12022046 +silesia.tar, uncompressed literals, old streaming cdcit, 4875010 +silesia.tar, uncompressed literals optimal, old streaming cdcit, 4281514 +silesia.tar, huffman literals, old streaming cdcit, 6190789 +silesia.tar, multithreaded with advanced params, old streaming cdcit, 12022046 +github, level -5, old streaming cdcit, 205285 +github, level -5 with dict, old streaming cdcit, 46718 +github, level -3, old streaming cdcit, 190643 +github, level -3 with dict, old streaming cdcit, 45395 +github, level -1, old streaming cdcit, 175568 +github, level -1 with dict, old streaming cdcit, 43170 +github, level 0, old streaming cdcit, 136397 +github, level 0 with dict, old streaming cdcit, 41170 +github, level 1, old streaming cdcit, 142450 +github, level 1 with dict, old streaming cdcit, 41682 +github, level 3, old streaming cdcit, 136397 +github, level 3 with dict, old streaming cdcit, 41170 +github, level 4, old streaming cdcit, 136144 +github, level 4 with dict, old streaming cdcit, 41306 +github, level 5, old streaming cdcit, 135106 +github, level 5 with dict, old streaming cdcit, 38938 +github, level 6, old streaming cdcit, 135108 +github, level 6 with dict, old streaming cdcit, 38632 +github, level 7, old streaming cdcit, 135108 +github, level 7 with dict, old streaming cdcit, 38766 +github, level 9, old streaming cdcit, 135108 +github, level 9 with dict, old streaming cdcit, 39326 +github, level 13, old streaming cdcit, 133717 +github, level 13 with dict, old streaming cdcit, 39716 +github, level 16, old streaming cdcit, 133717 +github, level 16 with dict, old streaming cdcit, 37577 +github, level 19, old streaming cdcit, 133717 +github, level 19 with dict, old streaming cdcit, 37576 +github, no source size, old streaming cdcit, 141003 +github, long distance mode, old streaming cdcit, 412933 +github, multithreaded, old streaming cdcit, 412933 +github, multithreaded long distance mode, old streaming cdcit, 412933 +github, small window log, old streaming cdcit, 412933 +github, small hash log, old streaming cdcit, 412933 +github, small chain log, old streaming cdcit, 412933 +github, explicit params, old streaming cdcit, 412933 +github, uncompressed literals, old streaming cdcit, 136397 +github, uncompressed literals optimal, old streaming cdcit, 133717 +github, huffman literals, old streaming cdcit, 175568 +github, multithreaded with advanced params, old streaming cdcit, 412933 +silesia, level -5, old streaming advanced cdict, 6882466 +silesia, level -3, old streaming advanced cdict, 6568358 +silesia, level -1, old streaming advanced cdict, 6183385 +silesia, level 0, old streaming advanced cdict, 4862377 +silesia, level 1, old streaming advanced cdict, 5314109 +silesia, level 3, old streaming advanced cdict, 4862377 +silesia, level 4, old streaming advanced cdict, 4800629 +silesia, level 5, old streaming advanced cdict, 4710178 +silesia, level 6, old streaming advanced cdict, 4659996 +silesia, level 7, old streaming advanced cdict, 4596234 +silesia, level 9, old streaming advanced cdict, 4543862 +silesia, level 13, old streaming advanced cdict, 4482073 +silesia, level 16, old streaming advanced cdict, 4377391 +silesia, level 19, old streaming advanced cdict, 4293262 +silesia, no source size, old streaming advanced cdict, 4862341 +silesia, long distance mode, old streaming advanced cdict, 12000408 +silesia, multithreaded, old streaming advanced cdict, 12000408 +silesia, multithreaded long distance mode, old streaming advanced cdict, 12000408 +silesia, small window log, old streaming advanced cdict, 12000408 +silesia, small hash log, old streaming advanced cdict, 12000408 +silesia, small chain log, old streaming advanced cdict, 12000408 +silesia, explicit params, old streaming advanced cdict, 12000408 +silesia, uncompressed literals, old streaming advanced cdict, 4862377 +silesia, uncompressed literals optimal, old streaming advanced cdict, 4293262 +silesia, huffman literals, old streaming advanced cdict, 6183385 +silesia, multithreaded with advanced params, old streaming advanced cdict, 12000408 +silesia.tar, level -5, old streaming advanced cdict, 6982738 +silesia.tar, level -3, old streaming advanced cdict, 6641264 +silesia.tar, level -1, old streaming advanced cdict, 6190789 +silesia.tar, level 0, old streaming advanced cdict, 4875010 +silesia.tar, level 1, old streaming advanced cdict, 5336879 +silesia.tar, level 3, old streaming advanced cdict, 4875010 +silesia.tar, level 4, old streaming advanced cdict, 4813507 +silesia.tar, level 5, old streaming advanced cdict, 4722240 +silesia.tar, level 6, old streaming advanced cdict, 4672203 +silesia.tar, level 7, old streaming advanced cdict, 4606658 +silesia.tar, level 9, old streaming advanced cdict, 4554105 +silesia.tar, level 13, old streaming advanced cdict, 4491703 +silesia.tar, level 16, old streaming advanced cdict, 4381277 +silesia.tar, level 19, old streaming advanced cdict, 4281514 +silesia.tar, no source size, old streaming advanced cdict, 4875006 +silesia.tar, long distance mode, old streaming advanced cdict, 12022046 +silesia.tar, multithreaded, old streaming advanced cdict, 12022046 +silesia.tar, multithreaded long distance mode, old streaming advanced cdict, 12022046 +silesia.tar, small window log, old streaming advanced cdict, 12022046 +silesia.tar, small hash log, old streaming advanced cdict, 12022046 +silesia.tar, small chain log, old streaming advanced cdict, 12022046 +silesia.tar, explicit params, old streaming advanced cdict, 12022046 +silesia.tar, uncompressed literals, old streaming advanced cdict, 4875010 +silesia.tar, uncompressed literals optimal, old streaming advanced cdict, 4281514 +silesia.tar, huffman literals, old streaming advanced cdict, 6190789 +silesia.tar, multithreaded with advanced params, old streaming advanced cdict, 12022046 +github, level -5, old streaming advanced cdict, 205285 +github, level -5 with dict, old streaming advanced cdict, 46718 +github, level -3, old streaming advanced cdict, 190643 +github, level -3 with dict, old streaming advanced cdict, 45395 +github, level -1, old streaming advanced cdict, 175568 +github, level -1 with dict, old streaming advanced cdict, 43170 +github, level 0, old streaming advanced cdict, 136397 +github, level 0 with dict, old streaming advanced cdict, 41170 +github, level 1, old streaming advanced cdict, 142450 +github, level 1 with dict, old streaming advanced cdict, 41682 +github, level 3, old streaming advanced cdict, 136397 +github, level 3 with dict, old streaming advanced cdict, 41170 +github, level 4, old streaming advanced cdict, 136144 +github, level 4 with dict, old streaming advanced cdict, 41306 +github, level 5, old streaming advanced cdict, 135106 +github, level 5 with dict, old streaming advanced cdict, 38938 +github, level 6, old streaming advanced cdict, 135108 +github, level 6 with dict, old streaming advanced cdict, 38632 +github, level 7, old streaming advanced cdict, 135108 +github, level 7 with dict, old streaming advanced cdict, 38766 +github, level 9, old streaming advanced cdict, 135108 +github, level 9 with dict, old streaming advanced cdict, 39326 +github, level 13, old streaming advanced cdict, 133717 +github, level 13 with dict, old streaming advanced cdict, 39716 +github, level 16, old streaming advanced cdict, 133717 +github, level 16 with dict, old streaming advanced cdict, 37577 +github, level 19, old streaming advanced cdict, 133717 +github, level 19 with dict, old streaming advanced cdict, 37576 +github, no source size, old streaming advanced cdict, 141003 +github, long distance mode, old streaming advanced cdict, 412933 +github, multithreaded, old streaming advanced cdict, 412933 +github, multithreaded long distance mode, old streaming advanced cdict, 412933 +github, small window log, old streaming advanced cdict, 412933 +github, small hash log, old streaming advanced cdict, 412933 +github, small chain log, old streaming advanced cdict, 412933 +github, explicit params, old streaming advanced cdict, 412933 +github, uncompressed literals, old streaming advanced cdict, 136397 +github, uncompressed literals optimal, old streaming advanced cdict, 133717 +github, huffman literals, old streaming advanced cdict, 175568 +github, multithreaded with advanced params, old streaming advanced cdict, 412933 diff --git a/tests/roundTripCrash.c b/tests/roundTripCrash.c index 3f4ace8c9..3de593318 100644 --- a/tests/roundTripCrash.c +++ b/tests/roundTripCrash.c @@ -93,9 +93,9 @@ static size_t cctxParamRoundTripTest(void* resultBuff, size_t resultBuffCapacity int const cLevel = h32 % maxClevel; /* Set parameters */ - CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_c_compressionLevel, cLevel) ); - CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_c_nbWorkers, 2) ); - CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_c_overlapLog, 5) ); + CHECK_Z( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_compressionLevel, cLevel) ); + CHECK_Z( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 2) ); + CHECK_Z( ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_overlapLog, 5) ); /* Apply parameters */ diff --git a/tests/symbols.c b/tests/symbols.c index 600d81670..4d9c6fc0c 100644 --- a/tests/symbols.c +++ b/tests/symbols.c @@ -31,6 +31,7 @@ static const void *symbols[] = { &ZSTD_getFrameContentSize, &ZSTD_maxCLevel, &ZSTD_compressBound, + &ZSTD_decompressBound, &ZSTD_isError, &ZSTD_getErrorName, &ZSTD_createCCtx, diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index ac3008904..51cb27b54 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -344,6 +344,20 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "OK (%u bytes) \n", (unsigned)(cstreamSize + cdictSize)); } + /* context size functions */ + DISPLAYLEVEL(3, "test%3i : estimate CStream size using CCtxParams : ", testNb++); + { ZSTD_CCtx_params* const params = ZSTD_createCCtxParams(); + size_t cstreamSize, cctxSize; + CHECK_Z( ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, 19) ); + cstreamSize = ZSTD_estimateCStreamSize_usingCCtxParams(params); + CHECK_Z(cstreamSize); + cctxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); + CHECK_Z(cctxSize); + if (cstreamSize <= cctxSize + 2 * ZSTD_BLOCKSIZE_MAX) goto _output_error; + ZSTD_freeCCtxParams(params); + DISPLAYLEVEL(3, "OK \n"); + } + DISPLAYLEVEL(3, "test%3i : check actual CStream size : ", testNb++); { size_t const s = ZSTD_sizeof_CStream(zc); if (ZSTD_isError(s)) goto _output_error; @@ -495,7 +509,7 @@ static int basicUnitTests(U32 seed, double compressibility) /* _srcSize compression test */ DISPLAYLEVEL(3, "test%3i : compress_srcSize %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); - ZSTD_initCStream_srcSize(zc, 1, CNBufferSize); + CHECK_Z( ZSTD_initCStream_srcSize(zc, 1, CNBufferSize) ); outBuff.dst = (char*)(compressedBuffer); outBuff.size = compressedBufferSize; outBuff.pos = 0; @@ -503,11 +517,14 @@ static int basicUnitTests(U32 seed, double compressibility) inBuff.size = CNBufferSize; inBuff.pos = 0; CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); - if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ - { size_t const r = ZSTD_endStream(zc, &outBuff); - if (r != 0) goto _output_error; } /* error, or some data not flushed */ - { unsigned long long origSize = ZSTD_findDecompressedSize(outBuff.dst, outBuff.pos); - if ((size_t)origSize != CNBufferSize) goto _output_error; } /* exact original size must be present */ + CHECK(inBuff.pos != inBuff.size, "Entire input should be consumed"); + { size_t const r = ZSTD_endStream(zc, &outBuff); + CHECK(r != 0, "Error or some data not flushed (ret=%zu)", r); + } + { unsigned long long origSize = ZSTD_findDecompressedSize(outBuff.dst, outBuff.pos); + CHECK(origSize == ZSTD_CONTENTSIZE_UNKNOWN, "Unknown!"); + CHECK((size_t)origSize != CNBufferSize, "Exact original size must be present (got %llu)", origSize); + } DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); /* wrong _srcSize compression test */ @@ -1703,7 +1720,7 @@ static size_t setCCtxParameter(ZSTD_CCtx* zc, ZSTD_CCtx_params* cctxParams, int useOpaqueAPI) { if (useOpaqueAPI) { - return ZSTD_CCtxParam_setParameter(cctxParams, param, value); + return ZSTD_CCtxParams_setParameter(cctxParams, param, value); } else { return ZSTD_CCtx_setParameter(zc, param, value); } @@ -1930,16 +1947,6 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest, } else { CHECK_Z( ZSTD_CCtx_loadDictionary_byReference(zc, dict, dictSize) ); } - if (dict && dictSize) { - /* test that compression parameters are rejected (correctly) after loading a non-NULL dictionary */ - if (opaqueAPI) { - size_t const setError = ZSTD_CCtx_setParametersUsingCCtxParams(zc, cctxParams); - CHECK(!ZSTD_isError(setError), "ZSTD_CCtx_setParametersUsingCCtxParams should have failed"); - } else { - size_t const setError = ZSTD_CCtx_setParameter(zc, ZSTD_c_windowLog, cParams.windowLog-1); - CHECK(!ZSTD_isError(setError), "ZSTD_CCtx_setParameter should have failed"); - } - } } else { CHECK_Z( ZSTD_CCtx_refPrefix(zc, dict, dictSize) ); }