From a4ca246ecae023469a6c93c0c274f2bc97d7103f Mon Sep 17 00:00:00 2001 From: Sergey Ponomarev Date: Sat, 14 Sep 2019 20:15:35 +0300 Subject: [PATCH 01/50] build/cmake/README.md: improve --- build/cmake/README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/build/cmake/README.md b/build/cmake/README.md index 854389ad8..73b30dc77 100644 --- a/build/cmake/README.md +++ b/build/cmake/README.md @@ -5,9 +5,9 @@ use case sensitivity that matches modern (ie. cmake version 2.6 and above) conventions of using lower-case for commands, and upper-case for variables. -# How to build +## How to build -As cmake doesn't support command like `cmake clean`, it's recommanded to perform a "out of source build". +As cmake doesn't support command like `cmake clean`, it's recommended to perform a "out of source build". To do this, you can create a new directory and build in it: ```sh cd build/cmake @@ -16,7 +16,7 @@ cd builddir cmake .. make ``` -Then you can clean all cmake caches by simpily delete the new directory: +Then you can clean all cmake caches by simply delete the new directory: ```sh rm -rf build/cmake/builddir ``` @@ -34,19 +34,19 @@ cd build/cmake/builddir cmake -LH .. ``` -Bool options can be set to ON/OFF with -D\[option\]=\[ON/OFF\]. You can configure cmake options like this: +Bool options can be set to `ON/OFF` with `-D[option]=[ON/OFF]`. You can configure cmake options like this: ```sh cd build/cmake/builddir cmake -DZSTD_BUILD_TESTS=ON -DZSTD_LEGACY_SUPPORT=ON .. make ``` -## referring +### referring [Looking for a 'cmake clean' command to clear up CMake output](https://stackoverflow.com/questions/9680420/looking-for-a-cmake-clean-command-to-clear-up-cmake-output) -# CMake Style Recommendations +## CMake Style Recommendations -## Indent all code correctly, i.e. the body of +### Indent all code correctly, i.e. the body of * if/else/endif * foreach/endforeach @@ -57,7 +57,7 @@ make Use spaces for indenting, 2, 3 or 4 spaces preferably. Use the same amount of spaces for indenting as is used in the rest of the file. Do not use tabs. -## Upper/lower casing +### Upper/lower casing Most important: use consistent upper- or lowercasing within one file ! @@ -77,7 +77,7 @@ Add_Executable(hello hello.c) aDd_ExEcUtAbLe(blub blub.c) ``` -## End commands +### End commands To make the code easier to read, use empty commands for endforeach(), endif(), endfunction(), endmacro() and endwhile(). Also, use empty else() commands. @@ -99,6 +99,6 @@ if(BARVAR) endif(BARVAR) ``` -## Other resources for best practices +### Other resources for best practices -`https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules` +https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules From b804dd3e5bd3397fa5bbb3a5313496e9156d4fb5 Mon Sep 17 00:00:00 2001 From: Sergey Ponomarev Date: Sat, 14 Sep 2019 21:14:43 +0300 Subject: [PATCH 02/50] #754 move sufixlist upper and improve error message on missing suffix --- programs/fileio.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 8a45563d4..e5fb1aad1 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2169,12 +2169,24 @@ FIO_determineDstName(const char* srcFileName) static size_t dfnbCapacity = 0; static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ + const char* suffixlist = ZSTD_EXTENSION + #ifdef ZSTD_GZDECOMPRESS + "/" GZ_EXTENSION + #endif + #ifdef ZSTD_LZMADECOMPRESS + "/" XZ_EXTENSION "/" LZMA_EXTENSION + #endif + #ifdef ZSTD_LZ4DECOMPRESS + "/" LZ4_EXTENSION + #endif + ; + size_t const sfnSize = strlen(srcFileName); size_t suffixSize; const char* const suffixPtr = strrchr(srcFileName, '.'); if (suffixPtr == NULL) { - DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n", - srcFileName); + DISPLAYLEVEL(1, "zstd: %s: missing suffix (%s expected) -- ignored \n", + srcFileName, suffixlist); return NULL; } suffixSize = strlen(suffixPtr); @@ -2193,17 +2205,6 @@ FIO_determineDstName(const char* srcFileName) && strcmp(suffixPtr, LZ4_EXTENSION) #endif ) ) { - const char* suffixlist = ZSTD_EXTENSION - #ifdef ZSTD_GZDECOMPRESS - "/" GZ_EXTENSION - #endif - #ifdef ZSTD_LZMADECOMPRESS - "/" XZ_EXTENSION "/" LZMA_EXTENSION - #endif - #ifdef ZSTD_LZ4DECOMPRESS - "/" LZ4_EXTENSION - #endif - ; DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n", srcFileName, suffixlist); return NULL; From 8cc815a941d2b96a867a4c3d89af3b4cd574d0e8 Mon Sep 17 00:00:00 2001 From: Sergey Ponomarev Date: Sat, 14 Sep 2019 21:15:24 +0300 Subject: [PATCH 03/50] #754 sufixlist->SUFFIX_LIST --- programs/fileio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index e5fb1aad1..7ad540558 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2169,7 +2169,7 @@ FIO_determineDstName(const char* srcFileName) static size_t dfnbCapacity = 0; static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ - const char* suffixlist = ZSTD_EXTENSION + const char* SUFFIX_LIST = ZSTD_EXTENSION #ifdef ZSTD_GZDECOMPRESS "/" GZ_EXTENSION #endif @@ -2186,7 +2186,7 @@ FIO_determineDstName(const char* srcFileName) const char* const suffixPtr = strrchr(srcFileName, '.'); if (suffixPtr == NULL) { DISPLAYLEVEL(1, "zstd: %s: missing suffix (%s expected) -- ignored \n", - srcFileName, suffixlist); + srcFileName, SUFFIX_LIST); return NULL; } suffixSize = strlen(suffixPtr); @@ -2206,7 +2206,7 @@ FIO_determineDstName(const char* srcFileName) #endif ) ) { DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n", - srcFileName, suffixlist); + srcFileName, SUFFIX_LIST); return NULL; } From 7d9cd22e2145f68cb04b87d76907366726ab0bc3 Mon Sep 17 00:00:00 2001 From: Sergey Ponomarev Date: Sat, 14 Sep 2019 21:23:47 +0300 Subject: [PATCH 04/50] #754 Add a hint about -o option --- programs/fileio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 7ad540558..edf58fee6 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2185,7 +2185,7 @@ FIO_determineDstName(const char* srcFileName) size_t suffixSize; const char* const suffixPtr = strrchr(srcFileName, '.'); if (suffixPtr == NULL) { - DISPLAYLEVEL(1, "zstd: %s: missing suffix (%s expected) -- ignored \n", + DISPLAYLEVEL(1, "zstd: %s: missing suffix (%s expected). Can't derive the output file name so specify it with -o dstFileName. -- ignored \n", srcFileName, SUFFIX_LIST); return NULL; } @@ -2205,7 +2205,7 @@ FIO_determineDstName(const char* srcFileName) && strcmp(suffixPtr, LZ4_EXTENSION) #endif ) ) { - DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n", + DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected). Can't derive the output file name so specify it with -o dstFileName. -- ignored \n", srcFileName, SUFFIX_LIST); return NULL; } From a101721f4e2f3cfa0296b5b61147f272e77c8b68 Mon Sep 17 00:00:00 2001 From: Sergey Ponomarev Date: Sat, 14 Sep 2019 21:26:27 +0300 Subject: [PATCH 05/50] Use one strstr() call instead of chain of strcmp() --- programs/fileio.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index edf58fee6..8816115b5 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2193,18 +2193,7 @@ FIO_determineDstName(const char* srcFileName) /* check suffix is authorized */ if (sfnSize <= suffixSize - || ( strcmp(suffixPtr, ZSTD_EXTENSION) - #ifdef ZSTD_GZDECOMPRESS - && strcmp(suffixPtr, GZ_EXTENSION) - #endif - #ifdef ZSTD_LZMADECOMPRESS - && strcmp(suffixPtr, XZ_EXTENSION) - && strcmp(suffixPtr, LZMA_EXTENSION) - #endif - #ifdef ZSTD_LZ4DECOMPRESS - && strcmp(suffixPtr, LZ4_EXTENSION) - #endif - ) ) { + || (strstr(SUFFIX_LIST, suffixPtr) == NULL)) { DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected). Can't derive the output file name so specify it with -o dstFileName. -- ignored \n", srcFileName, SUFFIX_LIST); return NULL; From 59f369a6da86e83948084270a06ad72368f079cc Mon Sep 17 00:00:00 2001 From: Sergey Ponomarev Date: Sat, 14 Sep 2019 21:30:15 +0300 Subject: [PATCH 06/50] Add short tar's extensions .tgz (.tar.gz), .txz (.tar.xz), .tzst (.tar.zst) --- programs/fileio.c | 8 ++++---- programs/fileio.h | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 8816115b5..f7e3b2349 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2169,15 +2169,15 @@ FIO_determineDstName(const char* srcFileName) static size_t dfnbCapacity = 0; static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ - const char* SUFFIX_LIST = ZSTD_EXTENSION + const char* SUFFIX_LIST = ZSTD_EXTENSION "/" TZSTD_EXTENSION #ifdef ZSTD_GZDECOMPRESS - "/" GZ_EXTENSION + "/" GZ_EXTENSION "/" TGZ_EXTENSION #endif #ifdef ZSTD_LZMADECOMPRESS - "/" XZ_EXTENSION "/" LZMA_EXTENSION + "/" XZ_EXTENSION "/" LZMA_EXTENSION "/" TXZ_EXTENSION #endif #ifdef ZSTD_LZ4DECOMPRESS - "/" LZ4_EXTENSION + "/" LZ4_EXTENSION "/" TLZ4_EXTENSION #endif ; diff --git a/programs/fileio.h b/programs/fileio.h index 096d90b5c..ebd2ffbee 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -32,9 +32,13 @@ extern "C" { #endif #define LZMA_EXTENSION ".lzma" #define XZ_EXTENSION ".xz" +#define TXZ_EXTENSION ".txz" #define GZ_EXTENSION ".gz" +#define TGZ_EXTENSION ".tgz" #define ZSTD_EXTENSION ".zst" +#define TZSTD_EXTENSION ".tzst" #define LZ4_EXTENSION ".lz4" +#define TLZ4_EXTENSION ".tlz4" /*-************************************* From bfb4d830b299feb14da189b775700b5fa41950d5 Mon Sep 17 00:00:00 2001 From: Sergey Ponomarev Date: Wed, 18 Sep 2019 09:21:00 +0300 Subject: [PATCH 07/50] FIO_determineDstName: extract dstFileNameEndPos variable --- programs/fileio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index f7e3b2349..96170b14a 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2210,8 +2210,9 @@ FIO_determineDstName(const char* srcFileName) /* return dst name == src name truncated from suffix */ assert(dstFileNameBuffer != NULL); - memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize); - dstFileNameBuffer[sfnSize-suffixSize] = '\0'; + size_t dstFileNameEndPos = sfnSize - suffixSize; + memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos); + dstFileNameBuffer[dstFileNameEndPos] = '\0'; return dstFileNameBuffer; /* note : dstFileNameBuffer memory is not going to be free */ From dafe796e39492a180dacec35b4a4c963dcd88a37 Mon Sep 17 00:00:00 2001 From: Sergey Ponomarev Date: Wed, 18 Sep 2019 09:23:10 +0300 Subject: [PATCH 08/50] #1790 short tar's extensions tgz, txz, tlz4m .tzst should be decompressed with .tar suffix --- programs/fileio.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/programs/fileio.c b/programs/fileio.c index 96170b14a..5aaad0e96 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2212,6 +2212,15 @@ FIO_determineDstName(const char* srcFileName) assert(dstFileNameBuffer != NULL); size_t dstFileNameEndPos = sfnSize - suffixSize; memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos); + /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" extension on decompression + * To check that the file is one of them we can check that it starts with "t" + */ + if (suffixPtr[1] == 't') { + dstFileNameBuffer[dstFileNameEndPos++] = '.'; + dstFileNameBuffer[dstFileNameEndPos++] = 't'; + dstFileNameBuffer[dstFileNameEndPos++] = 'a'; + dstFileNameBuffer[dstFileNameEndPos++] = 'r'; + } dstFileNameBuffer[dstFileNameEndPos] = '\0'; return dstFileNameBuffer; From a65eb39f9d79a895ae246ac4b053828711b0eeed Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 10:22:06 -0400 Subject: [PATCH 09/50] Add compressionlevel to cdict --- lib/compress/zstd_compress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index df4e828cd..acbd3a288 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -51,6 +51,7 @@ struct ZSTD_CDict_s { ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; U32 dictID; + BYTE compressionLevel; /* 0 indicates that advanced API was used to select params */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) From 0c8df5c928e3d26e601a39423cc47e9ce4a9ddf5 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 12:28:23 -0400 Subject: [PATCH 10/50] Fix error --- lib/compress/zstd_compress.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index acbd3a288..a4e73fcda 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -51,7 +51,7 @@ struct ZSTD_CDict_s { ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; U32 dictID; - BYTE compressionLevel; /* 0 indicates that advanced API was used to select params */ + BYTE compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) @@ -2826,6 +2826,8 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace); } +#define ZSTD_USE_CDICT_PARAMS_CUTOFF (1 MB) + /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, @@ -2840,16 +2842,22 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - - if (cdict && cdict->dictContentSize>0) { + if ( (cdict) + && (cdict->dictContentSize > 0) + && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_continue, zbuff) ); - { size_t const dictID = ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, dtlm, cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); FORWARD_IF_ERROR(dictID); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; @@ -3152,6 +3160,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, cdict->customMem = customMem; cdict->workspace = workspace; cdict->workspaceSize = workspaceSize; + cdict->compressionLevel = 0; /* signals advanced API usage */ + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dictBuffer, dictSize, dictLoadMethod, dictContentType, @@ -3167,9 +3177,11 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTD_defaultCMem); + ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; } ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) @@ -3255,7 +3267,11 @@ size_t ZSTD_compressBegin_usingCDict_advanced( DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ZSTD_getCParamsFromCDict(cdict); + params.cParams = (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); /* Increase window log to fit the entire dictionary and source if the * source size is known. Limit the increase to 19, which is the * window log for compression level 1 with the largest source size. From 23dac23a49a859e275b37d32e00cf23a63917303 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 12:44:48 -0400 Subject: [PATCH 11/50] formatting --- lib/compress/zstd_compress.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a4e73fcda..55e9c8a65 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3178,8 +3178,8 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL { ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTD_defaultCMem); + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; return cdict; } @@ -3270,8 +3270,8 @@ size_t ZSTD_compressBegin_usingCDict_advanced( params.cParams = (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, - pledgedSrcSize, - cdict->dictContentSize); + pledgedSrcSize, + cdict->dictContentSize); /* Increase window log to fit the entire dictionary and source if the * source size is known. Limit the increase to 19, which is the * window log for compression level 1 with the largest source size. From 5c010c9d2d9c78622fff02c539159c94d185347b Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 13:10:05 -0400 Subject: [PATCH 12/50] merge conflicts round 2 --- lib/compress/zstd_compress.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c6ab40ecd..adf0ccab7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2917,7 +2917,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - ¶ms, cdict->dictContent, cdict->dictContentSize, + &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize, dictContentType, dtlm, cctx->entropyWorkspace) : ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, @@ -3237,13 +3237,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, assert(cdict != NULL); ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; -<<<<<<< HEAD - cdict->workspace = workspace; - cdict->workspaceSize = workspaceSize; cdict->compressionLevel = 0; /* signals advanced API usage */ -======= ->>>>>>> 8b6d96827c24dd09109830272f413254833317d9 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dictBuffer, dictSize, dictLoadMethod, dictContentType, From 5e901b6f32932aa00fda4dc118b0cef1c2029461 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 13:58:44 -0400 Subject: [PATCH 13/50] Cast to BYTE to appease appveyor --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index adf0ccab7..2737b05f7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3257,7 +3257,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); - cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; return cdict; } From 8cb217444617f0432a33a3cd307487cb026e62d3 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 16 Oct 2019 10:29:31 -0400 Subject: [PATCH 14/50] Fix test --- lib/compress/zstd_compress.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2737b05f7..0207effe9 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3257,7 +3257,9 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); - cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; + if (cdict) + cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; + return cdict; } From cf00ea367a1c9056bad1c86038a4cb35a44e08f1 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 16 Oct 2019 10:31:27 -0400 Subject: [PATCH 15/50] Trailing whitespace --- lib/compress/zstd_compress.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0207effe9..4de67777f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2911,7 +2911,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } - + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, ZSTDcrp_makeClean, zbuff) ); { size_t const dictID = cdict ? @@ -3259,7 +3259,6 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL cParams, ZSTD_defaultCMem); if (cdict) cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; - return cdict; } From 4f7d26b0eec5eff6fc30a3927f80bd317f8e763a Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 16 Oct 2019 15:05:29 -0400 Subject: [PATCH 16/50] Changed to int from BYTE --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4de67777f..a8d2b3a47 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -50,7 +50,7 @@ struct ZSTD_CDict_s { ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; U32 dictID; - BYTE compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) From 4455f00cb8b476c152e8681835a94ce8674439fc Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 16 Oct 2019 15:06:02 -0400 Subject: [PATCH 17/50] Changed to int from BYTE --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a8d2b3a47..672f76080 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3258,7 +3258,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); if (cdict) - cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; + cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; return cdict; } From 1daa898c93545d659ae4987a428bac4883961dc6 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Sun, 20 Oct 2019 14:03:09 -0400 Subject: [PATCH 18/50] Added support for forcing new CDict behavior and updated enum --- lib/compress/zstd_compress.c | 3 ++- lib/zstd.h | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 672f76080..f0e45f518 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2908,7 +2908,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if ( (cdict) && (cdict->dictContentSize > 0) - && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) ) { + && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceInputParams) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } diff --git a/lib/zstd.h b/lib/zstd.h index 667845627..4859733bb 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1149,7 +1149,7 @@ typedef enum { * to evolve and should be considered only in the context of extremely * advanced performance tuning. * - * Zstd currently supports the use of a CDict in two ways: + * Zstd currently supports the use of a CDict in three ways: * * - The contents of the CDict can be copied into the working context. This * means that the compression can search both the dictionary and input @@ -1164,6 +1164,10 @@ typedef enum { * tables. However, this model incurs no start-up cost (as long as the * working context's tables can be reused). For small inputs, this can be * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to determine how our tables are initialized. This method + * should be used when using a small dictionary to compress a large input. * * Zstd has a simple internal heuristic that selects which strategy to use * at the beginning of a compression. However, if experimentation shows that @@ -1173,6 +1177,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceInputParams = 3, /* Always use input to determine tables */ } ZSTD_dictAttachPref_e; typedef enum { From 6d297265f910dc08e99b4bb81a8da22524240305 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Sun, 20 Oct 2019 19:02:47 -0400 Subject: [PATCH 19/50] Add enum to decision process --- lib/compress/zstd_compress.c | 5 +++-- lib/zstd.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f0e45f518..51e8f59c0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2909,7 +2909,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, if ( (cdict) && (cdict->dictContentSize > 0) && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) - && (params->attachDictPref != ZSTD_dictForceInputParams) ) { + && (params->attachDictPref != ZSTD_dictForceSource) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } @@ -3348,7 +3348,8 @@ size_t ZSTD_compressBegin_usingCDict_advanced( DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ? + params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ) + && (params->attachDictPref != ZSTD_dictForceSource) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, pledgedSrcSize, diff --git a/lib/zstd.h b/lib/zstd.h index 4859733bb..c0521c1b8 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1177,7 +1177,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceInputParams = 3, /* Always use input to determine tables */ + ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ } ZSTD_dictAttachPref_e; typedef enum { From e8aa3e486d7ad23bafb19cc39be9db5bce4ee3c7 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Sun, 20 Oct 2019 22:01:08 -0400 Subject: [PATCH 20/50] Updated forceAttachDict param bounds --- lib/compress/zstd_compress.c | 4 ++-- lib/zstd.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 51e8f59c0..98d064397 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -387,7 +387,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) case ZSTD_c_forceAttachDict: ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); bounds.lowerBound = ZSTD_dictDefaultAttach; - bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ + bounds.upperBound = ZSTD_dictForceSource; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; case ZSTD_c_literalCompressionMode: @@ -3349,7 +3349,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced( RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ) - && (params->attachDictPref != ZSTD_dictForceSource) ? + && (params.attachDictPref != ZSTD_dictForceSource) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, pledgedSrcSize, diff --git a/lib/zstd.h b/lib/zstd.h index c0521c1b8..5468c34cc 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1177,7 +1177,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ + ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ } ZSTD_dictAttachPref_e; typedef enum { From 3328348c63b8b1bd5a6eaf53683b8ed68bf01f6e Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 10:22:06 -0400 Subject: [PATCH 21/50] Add compressionlevel to cdict --- lib/compress/zstd_compress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7facbeff0..f16da6a85 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -50,6 +50,7 @@ struct ZSTD_CDict_s { ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; U32 dictID; + BYTE compressionLevel; /* 0 indicates that advanced API was used to select params */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) From 3fa4daaa55a95efdb985b3919fa67dddc4211c49 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 12:28:23 -0400 Subject: [PATCH 22/50] Fix error --- lib/compress/zstd_compress.c | 44 ++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f16da6a85..5d4648790 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -50,7 +50,7 @@ struct ZSTD_CDict_s { ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; U32 dictID; - BYTE compressionLevel; /* 0 indicates that advanced API was used to select params */ + BYTE compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) @@ -2890,6 +2890,8 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, bs, ms, ws, params, dict, dictSize, dtlm, workspace); } +#define ZSTD_USE_CDICT_PARAMS_CUTOFF (1 MB) + /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, @@ -2904,17 +2906,22 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - - if (cdict && cdict->dictContentSize>0) { + if ( (cdict) + && (cdict->dictContentSize > 0) + && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff) ); - { size_t const dictID = ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->workspace, params, dict, dictSize, dictContentType, dtlm, - cctx->entropyWorkspace); + ZSTDcrp_continue, zbuff) ); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + params, cdict->dictContent, cdict->dictContentSize, + dictContentType, dtlm, cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); FORWARD_IF_ERROR(dictID); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; @@ -3229,6 +3236,13 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, assert(cdict != NULL); ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; +<<<<<<< HEAD +======= + cdict->workspace = workspace; + cdict->workspaceSize = workspaceSize; + cdict->compressionLevel = 0; /* signals advanced API usage */ + +>>>>>>> Fix error if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dictBuffer, dictSize, dictLoadMethod, dictContentType, @@ -3244,9 +3258,11 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTD_defaultCMem); + ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; } ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) @@ -3334,7 +3350,11 @@ size_t ZSTD_compressBegin_usingCDict_advanced( DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ZSTD_getCParamsFromCDict(cdict); + params.cParams = (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); /* Increase window log to fit the entire dictionary and source if the * source size is known. Limit the increase to 19, which is the * window log for compression level 1 with the largest source size. From 053a35fd6428cef16fcbf4c39d8746e82f932e3c Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 12:44:48 -0400 Subject: [PATCH 23/50] formatting --- lib/compress/zstd_compress.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 5d4648790..9002616b6 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3259,8 +3259,8 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL { ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTD_defaultCMem); + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; return cdict; } @@ -3353,8 +3353,8 @@ size_t ZSTD_compressBegin_usingCDict_advanced( params.cParams = (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, - pledgedSrcSize, - cdict->dictContentSize); + pledgedSrcSize, + cdict->dictContentSize); /* Increase window log to fit the entire dictionary and source if the * source size is known. Limit the increase to 19, which is the * window log for compression level 1 with the largest source size. From a727a85a7eb60f72d375b90c6003b64ae54f2930 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 13:10:05 -0400 Subject: [PATCH 24/50] merge conflicts round 2 --- lib/compress/zstd_compress.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9002616b6..9dbc7f8fb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2917,7 +2917,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - params, cdict->dictContent, cdict->dictContentSize, + &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize, dictContentType, dtlm, cctx->entropyWorkspace) : ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, @@ -3237,12 +3237,17 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; <<<<<<< HEAD +<<<<<<< HEAD ======= cdict->workspace = workspace; cdict->workspaceSize = workspaceSize; cdict->compressionLevel = 0; /* signals advanced API usage */ >>>>>>> Fix error +======= + cdict->compressionLevel = 0; /* signals advanced API usage */ + +>>>>>>> merge conflicts round 2 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dictBuffer, dictSize, dictLoadMethod, dictContentType, From ea3cb6988f49d96e158f8180bf9aa2f4a8351058 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Tue, 15 Oct 2019 13:58:44 -0400 Subject: [PATCH 25/50] Cast to BYTE to appease appveyor --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9dbc7f8fb..7c4e299a7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3266,7 +3266,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); - cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; return cdict; } From cf51501d2f6b83b5dbf958775827592febbf9071 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 16 Oct 2019 10:29:31 -0400 Subject: [PATCH 26/50] Fix test --- lib/compress/zstd_compress.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7c4e299a7..3e1e370ef 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3266,7 +3266,9 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); - cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; + if (cdict) + cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; + return cdict; } From bb2df8c4999f8e9679678d3c0c5188d3c239f692 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 16 Oct 2019 10:31:27 -0400 Subject: [PATCH 27/50] Trailing whitespace --- lib/compress/zstd_compress.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3e1e370ef..d0b01c073 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3268,7 +3268,6 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL cParams, ZSTD_defaultCMem); if (cdict) cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; - return cdict; } From f0fccc8847dcd2e279a215a4d8024e46e32f8de6 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 16 Oct 2019 15:05:29 -0400 Subject: [PATCH 28/50] Changed to int from BYTE --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d0b01c073..2daa588f1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -50,7 +50,7 @@ struct ZSTD_CDict_s { ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; U32 dictID; - BYTE compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) From 9294f4826b3bbcd1581574af859cb0678d60c7a1 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 16 Oct 2019 15:06:02 -0400 Subject: [PATCH 29/50] Changed to int from BYTE --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2daa588f1..634a0f857 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3267,7 +3267,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); if (cdict) - cdict->compressionLevel = compressionLevel == 0 ? (BYTE)ZSTD_CLEVEL_DEFAULT : (BYTE)compressionLevel; + cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; return cdict; } From e4de8b098af8e4e375d6af5ce17bdffb986209a1 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Sun, 20 Oct 2019 14:03:09 -0400 Subject: [PATCH 30/50] Added support for forcing new CDict behavior and updated enum --- lib/compress/zstd_compress.c | 3 ++- lib/zstd.h | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 634a0f857..ab5b13d01 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2908,7 +2908,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if ( (cdict) && (cdict->dictContentSize > 0) - && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) ) { + && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceInputParams) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } diff --git a/lib/zstd.h b/lib/zstd.h index 667845627..4859733bb 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1149,7 +1149,7 @@ typedef enum { * to evolve and should be considered only in the context of extremely * advanced performance tuning. * - * Zstd currently supports the use of a CDict in two ways: + * Zstd currently supports the use of a CDict in three ways: * * - The contents of the CDict can be copied into the working context. This * means that the compression can search both the dictionary and input @@ -1164,6 +1164,10 @@ typedef enum { * tables. However, this model incurs no start-up cost (as long as the * working context's tables can be reused). For small inputs, this can be * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to determine how our tables are initialized. This method + * should be used when using a small dictionary to compress a large input. * * Zstd has a simple internal heuristic that selects which strategy to use * at the beginning of a compression. However, if experimentation shows that @@ -1173,6 +1177,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceInputParams = 3, /* Always use input to determine tables */ } ZSTD_dictAttachPref_e; typedef enum { From 8f69c476439b6492161ea798832a708a3b877164 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Sun, 20 Oct 2019 19:02:47 -0400 Subject: [PATCH 31/50] Add enum to decision process --- lib/compress/zstd_compress.c | 5 +++-- lib/zstd.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ab5b13d01..84af6feea 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2909,7 +2909,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, if ( (cdict) && (cdict->dictContentSize > 0) && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) - && (params->attachDictPref != ZSTD_dictForceInputParams) ) { + && (params->attachDictPref != ZSTD_dictForceSource) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } @@ -3357,7 +3357,8 @@ size_t ZSTD_compressBegin_usingCDict_advanced( DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ? + params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ) + && (params->attachDictPref != ZSTD_dictForceSource) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, pledgedSrcSize, diff --git a/lib/zstd.h b/lib/zstd.h index 4859733bb..c0521c1b8 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1177,7 +1177,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceInputParams = 3, /* Always use input to determine tables */ + ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ } ZSTD_dictAttachPref_e; typedef enum { From 1f3a51fb52034af5bd2a662b27977dfe0d9bf2e4 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Sun, 20 Oct 2019 22:01:08 -0400 Subject: [PATCH 32/50] Updated forceAttachDict param bounds --- lib/compress/zstd_compress.c | 4 ++-- lib/zstd.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 84af6feea..3aa111ddb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -387,7 +387,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) case ZSTD_c_forceAttachDict: ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); bounds.lowerBound = ZSTD_dictDefaultAttach; - bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ + bounds.upperBound = ZSTD_dictForceSource; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; case ZSTD_c_literalCompressionMode: @@ -3358,7 +3358,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced( RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ) - && (params->attachDictPref != ZSTD_dictForceSource) ? + && (params.attachDictPref != ZSTD_dictForceSource) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, pledgedSrcSize, diff --git a/lib/zstd.h b/lib/zstd.h index c0521c1b8..5468c34cc 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1177,7 +1177,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ + ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ } ZSTD_dictAttachPref_e; typedef enum { From 676f89902acbb2fb8294e601e31ae396e125c297 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Mon, 21 Oct 2019 15:29:55 -0400 Subject: [PATCH 33/50] Added multiplier, renamed new enum to something more useful --- lib/compress/zstd_compress.c | 17 +++++++++++------ lib/zstd.h | 8 +++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3aa111ddb..73e6618c1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -387,7 +387,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) case ZSTD_c_forceAttachDict: ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); bounds.lowerBound = ZSTD_dictDefaultAttach; - bounds.upperBound = ZSTD_dictForceSource; /* note : how to ensure at compile time that this is the highest value enum ? */ + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; case ZSTD_c_literalCompressionMode: @@ -2890,7 +2890,8 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, bs, ms, ws, params, dict, dictSize, dtlm, workspace); } -#define ZSTD_USE_CDICT_PARAMS_CUTOFF (1 MB) +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ @@ -2908,8 +2909,10 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if ( (cdict) && (cdict->dictContentSize > 0) - && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) - && (params->attachDictPref != ZSTD_dictForceSource) ) { + && ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER) + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } @@ -3357,8 +3360,10 @@ size_t ZSTD_compressBegin_usingCDict_advanced( DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ) - && (params.attachDictPref != ZSTD_dictForceSource) ? + params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER) + || (cdict->compressionLevel == 0) ) + && (params.attachDictPref != ZSTD_dictForceLoad) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, pledgedSrcSize, diff --git a/lib/zstd.h b/lib/zstd.h index 5468c34cc..3ba476e88 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1166,8 +1166,10 @@ typedef enum { * faster than copying the CDict's tables. * * - The CDict's tables are not used at all, and instead we use the working - * context alone to determine how our tables are initialized. This method - * should be used when using a small dictionary to compress a large input. + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. * * Zstd has a simple internal heuristic that selects which strategy to use * at the beginning of a compression. However, if experimentation shows that @@ -1177,7 +1179,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ + ZSTD_dictForceLoad = 3, /* Always reload the dictionary */ } ZSTD_dictAttachPref_e; typedef enum { From 2ab484a5f90f95a6a1b063b6da27657243d66635 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Mon, 21 Oct 2019 18:55:17 -0400 Subject: [PATCH 34/50] Fix bad merge --- lib/compress/zstd_compress.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c0ec593f3..d03a2cefc 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2917,7 +2917,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, } FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, - ZSTDcrp_continue, zbuff) ); + ZSTDcrp_makeClean, zbuff) ); { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, @@ -2925,7 +2925,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, dictContentType, dtlm, cctx->entropyWorkspace) : ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); + &cctx->workspace, params, dict, dictSize, + dictContentType, dtlm, cctx->entropyWorkspace); FORWARD_IF_ERROR(dictID); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; From dbda8c318a9c0c9dc226ab4eef9b2c5c0b5966e4 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Mon, 21 Oct 2019 19:10:13 -0400 Subject: [PATCH 35/50] Trailing comma --- lib/zstd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd.h b/lib/zstd.h index 788f5d9ec..da29ad222 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1179,7 +1179,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceLoad = 3, /* Always reload the dictionary */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ } ZSTD_dictAttachPref_e; typedef enum { From 59c81aa31b689b3d40dc1eb559b566a521a07fef Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Mon, 21 Oct 2019 19:12:15 -0400 Subject: [PATCH 36/50] Line up comments :) --- lib/zstd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd.h b/lib/zstd.h index da29ad222..71012afe2 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1179,7 +1179,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ } ZSTD_dictAttachPref_e; typedef enum { From c2e1e54f2445c7ec329e812340762f8be858d38b Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Mon, 21 Oct 2019 19:16:50 -0400 Subject: [PATCH 37/50] ((x or y) or z) == (x or y or z), remove brackets --- lib/compress/zstd_compress.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c03944941..dd411aba9 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2932,8 +2932,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if ( (cdict) && (cdict->dictContentSize > 0) - && ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF - || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER || cdict->compressionLevel == 0) && (params->attachDictPref != ZSTD_dictForceLoad) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); @@ -3379,9 +3379,9 @@ size_t ZSTD_compressBegin_usingCDict_advanced( DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF - || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER) - || (cdict->compressionLevel == 0) ) + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || cdict->compressionLevel == 0 ) && (params.attachDictPref != ZSTD_dictForceLoad) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, From cf210039955548d32e29479de0eaeab27407509e Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 22 Oct 2019 15:29:48 -0700 Subject: [PATCH 38/50] [regression] Fix the old streaming regression test * A copy-paste error made it so we weren't running the advanced/cdict streaming tests with the old API. * Clean up the old streaming tests to skip incompatible configs. * Update `results.csv`. The tests now catch the bug in #1787. --- tests/regression/method.c | 33 +++- tests/regression/results.csv | 310 ++++++++--------------------------- 2 files changed, 90 insertions(+), 253 deletions(-) diff --git a/tests/regression/method.c b/tests/regression/method.c index 1e84021c3..b74f54819 100644 --- a/tests/regression/method.c +++ b/tests/regression/method.c @@ -444,6 +444,8 @@ static int init_cstream( ZSTD_parameters const params = config_get_zstd_params(config, 0, 0); ZSTD_CDict* dict = NULL; if (cdict) { + if (!config->use_dictionary) + return 1; *cdict = ZSTD_createCDict_advanced( state->dictionary.data, state->dictionary.size, @@ -459,14 +461,18 @@ static int init_cstream( } else { zret = ZSTD_initCStream_advanced( zcs, - state->dictionary.data, - state->dictionary.size, + config->use_dictionary ? state->dictionary.data : NULL, + config->use_dictionary ? state->dictionary.size : 0, params, ZSTD_CONTENTSIZE_UNKNOWN); } } else { int const level = config_get_level(config); + if (level == CONFIG_NO_LEVEL) + return 1; if (cdict) { + if (!config->use_dictionary) + return 1; *cdict = ZSTD_createCDict( state->dictionary.data, state->dictionary.size, @@ -477,7 +483,10 @@ static int init_cstream( zret = ZSTD_initCStream_usingCDict(zcs, *cdict); } else if (config->use_dictionary) { zret = ZSTD_initCStream_usingDict( - zcs, state->dictionary.data, state->dictionary.size, level); + zcs, + state->dictionary.data, + state->dictionary.size, + level); } else { zret = ZSTD_initCStream(zcs, level); } @@ -506,9 +515,17 @@ static result_t old_streaming_compress_internal( result = result_error(result_error_compression_error); goto out; } + if (!advanced && config_get_level(config) == CONFIG_NO_LEVEL) { + result = result_error(result_error_skip); + goto out; + } + if (cdict && !config->use_dictionary) { + result = result_error(result_error_skip); + goto out; + } if (init_cstream(state, zcs, config, advanced, cdict ? &cd : NULL)) { - result = result_error(result_error_compression_error); - goto out; + result = result_error(result_error_compression_error); + goto out; } result_data_t data = {.total_size = 0}; @@ -629,21 +646,21 @@ method_t const old_streaming = { method_t const old_streaming_advanced = { .name = "old streaming advanced", .create = buffer_state_create, - .compress = old_streaming_compress, + .compress = old_streaming_compress_advanced, .destroy = buffer_state_destroy, }; method_t const old_streaming_cdict = { .name = "old streaming cdcit", .create = buffer_state_create, - .compress = old_streaming_compress, + .compress = old_streaming_compress_cdict, .destroy = buffer_state_destroy, }; method_t const old_streaming_advanced_cdict = { .name = "old streaming advanced cdict", .create = buffer_state_create, - .compress = old_streaming_compress, + .compress = old_streaming_compress_cdict_advanced, .destroy = buffer_state_destroy, }; diff --git a/tests/regression/results.csv b/tests/regression/results.csv index ba1295c5c..a0e1566a5 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -461,17 +461,9 @@ silesia, level 13, old stre silesia, level 16, old streaming, 4377389 silesia, level 19, old streaming, 4293262 silesia, no source size, old streaming, 4849455 -silesia, long distance mode, old streaming, 12000408 -silesia, multithreaded, old streaming, 12000408 -silesia, multithreaded long distance mode, old streaming, 12000408 -silesia, small window log, old streaming, 12000408 -silesia, small hash log, old streaming, 12000408 -silesia, small chain log, old streaming, 12000408 -silesia, explicit params, old streaming, 12000408 silesia, uncompressed literals, old streaming, 4849491 silesia, uncompressed literals optimal, old streaming, 4293262 silesia, huffman literals, old streaming, 6183385 -silesia, multithreaded with advanced params, old streaming, 12000408 silesia.tar, level -5, old streaming, 6982738 silesia.tar, level -3, old streaming, 6641264 silesia.tar, level -1, old streaming, 6190789 @@ -487,17 +479,9 @@ silesia.tar, level 13, old stre silesia.tar, level 16, old streaming, 4381284 silesia.tar, level 19, old streaming, 4281511 silesia.tar, no source size, old streaming, 4861372 -silesia.tar, long distance mode, old streaming, 12022046 -silesia.tar, multithreaded, old streaming, 12022046 -silesia.tar, multithreaded long distance mode, old streaming, 12022046 -silesia.tar, small window log, old streaming, 12022046 -silesia.tar, small hash log, old streaming, 12022046 -silesia.tar, small chain log, old streaming, 12022046 -silesia.tar, explicit params, old streaming, 12022046 silesia.tar, uncompressed literals, old streaming, 4861376 silesia.tar, uncompressed literals optimal, old streaming, 4281511 silesia.tar, huffman literals, old streaming, 6190789 -silesia.tar, multithreaded with advanced params, old streaming, 12022046 github, level -5, old streaming, 205285 github, level -5 with dict, old streaming, 46718 github, level -3, old streaming, 190643 @@ -527,17 +511,9 @@ github, level 16 with dict, old stre github, level 19, old streaming, 133717 github, level 19 with dict, old streaming, 37576 github, no source size, old streaming, 140631 -github, long distance mode, old streaming, 412933 -github, multithreaded, old streaming, 412933 -github, multithreaded long distance mode, old streaming, 412933 -github, small window log, old streaming, 412933 -github, small hash log, old streaming, 412933 -github, small chain log, old streaming, 412933 -github, explicit params, old streaming, 412933 github, uncompressed literals, old streaming, 136311 github, uncompressed literals optimal, old streaming, 133717 github, huffman literals, old streaming, 175568 -github, multithreaded with advanced params, old streaming, 412933 silesia, level -5, old streaming advanced, 6882466 silesia, level -3, old streaming advanced, 6568358 silesia, level -1, old streaming advanced, 6183385 @@ -553,17 +529,17 @@ silesia, level 13, old stre silesia, level 16, old streaming advanced, 4377389 silesia, level 19, old streaming advanced, 4293262 silesia, no source size, old streaming advanced, 4849455 -silesia, long distance mode, old streaming advanced, 12000408 -silesia, multithreaded, old streaming advanced, 12000408 -silesia, multithreaded long distance mode, old streaming advanced, 12000408 -silesia, small window log, old streaming advanced, 12000408 -silesia, small hash log, old streaming advanced, 12000408 -silesia, small chain log, old streaming advanced, 12000408 -silesia, explicit params, old streaming advanced, 12000408 +silesia, long distance mode, old streaming advanced, 4849491 +silesia, multithreaded, old streaming advanced, 4849491 +silesia, multithreaded long distance mode, old streaming advanced, 4849491 +silesia, small window log, old streaming advanced, 7123534 +silesia, small hash log, old streaming advanced, 6554898 +silesia, small chain log, old streaming advanced, 4931093 +silesia, explicit params, old streaming advanced, 4797048 silesia, uncompressed literals, old streaming advanced, 4849491 silesia, uncompressed literals optimal, old streaming advanced, 4293262 silesia, huffman literals, old streaming advanced, 6183385 -silesia, multithreaded with advanced params, old streaming advanced, 12000408 +silesia, multithreaded with advanced params, old streaming advanced, 4849491 silesia.tar, level -5, old streaming advanced, 6982738 silesia.tar, level -3, old streaming advanced, 6641264 silesia.tar, level -1, old streaming advanced, 6190789 @@ -579,238 +555,82 @@ silesia.tar, level 13, old stre silesia.tar, level 16, old streaming advanced, 4381284 silesia.tar, level 19, old streaming advanced, 4281511 silesia.tar, no source size, old streaming advanced, 4861372 -silesia.tar, long distance mode, old streaming advanced, 12022046 -silesia.tar, multithreaded, old streaming advanced, 12022046 -silesia.tar, multithreaded long distance mode, old streaming advanced, 12022046 -silesia.tar, small window log, old streaming advanced, 12022046 -silesia.tar, small hash log, old streaming advanced, 12022046 -silesia.tar, small chain log, old streaming advanced, 12022046 -silesia.tar, explicit params, old streaming advanced, 12022046 +silesia.tar, long distance mode, old streaming advanced, 4861376 +silesia.tar, multithreaded, old streaming advanced, 4861376 +silesia.tar, multithreaded long distance mode, old streaming advanced, 4861376 +silesia.tar, small window log, old streaming advanced, 7127552 +silesia.tar, small hash log, old streaming advanced, 6587834 +silesia.tar, small chain log, old streaming advanced, 4943271 +silesia.tar, explicit params, old streaming advanced, 4808570 silesia.tar, uncompressed literals, old streaming advanced, 4861376 silesia.tar, uncompressed literals optimal, old streaming advanced, 4281511 silesia.tar, huffman literals, old streaming advanced, 6190789 -silesia.tar, multithreaded with advanced params, old streaming advanced, 12022046 -github, level -5, old streaming advanced, 205285 -github, level -5 with dict, old streaming advanced, 46718 -github, level -3, old streaming advanced, 190643 -github, level -3 with dict, old streaming advanced, 45395 -github, level -1, old streaming advanced, 175568 -github, level -1 with dict, old streaming advanced, 43170 -github, level 0, old streaming advanced, 136311 -github, level 0 with dict, old streaming advanced, 41148 -github, level 1, old streaming advanced, 142450 -github, level 1 with dict, old streaming advanced, 41682 -github, level 3, old streaming advanced, 136311 -github, level 3 with dict, old streaming advanced, 41148 -github, level 4, old streaming advanced, 136144 -github, level 4 with dict, old streaming advanced, 41251 -github, level 5, old streaming advanced, 135106 -github, level 5 with dict, old streaming advanced, 38938 -github, level 6, old streaming advanced, 135108 -github, level 6 with dict, old streaming advanced, 38632 -github, level 7, old streaming advanced, 135108 -github, level 7 with dict, old streaming advanced, 38766 -github, level 9, old streaming advanced, 135108 -github, level 9 with dict, old streaming advanced, 39326 -github, level 13, old streaming advanced, 133717 -github, level 13 with dict, old streaming advanced, 39716 -github, level 16, old streaming advanced, 133717 -github, level 16 with dict, old streaming advanced, 37577 +silesia.tar, multithreaded with advanced params, old streaming advanced, 4861376 +github, level -5, old streaming advanced, 216734 +github, level -5 with dict, old streaming advanced, 49562 +github, level -3, old streaming advanced, 192160 +github, level -3 with dict, old streaming advanced, 44956 +github, level -1, old streaming advanced, 181108 +github, level -1 with dict, old streaming advanced, 42383 +github, level 0, old streaming advanced, 141090 +github, level 0 with dict, old streaming advanced, 41113 +github, level 1, old streaming advanced, 143682 +github, level 1 with dict, old streaming advanced, 42430 +github, level 3, old streaming advanced, 141090 +github, level 3 with dict, old streaming advanced, 41113 +github, level 4, old streaming advanced, 141090 +github, level 4 with dict, old streaming advanced, 41084 +github, level 5, old streaming advanced, 139391 +github, level 5 with dict, old streaming advanced, 39159 +github, level 6, old streaming advanced, 139394 +github, level 6 with dict, old streaming advanced, 38749 +github, level 7, old streaming advanced, 138675 +github, level 7 with dict, old streaming advanced, 38746 +github, level 9, old streaming advanced, 138675 +github, level 9 with dict, old streaming advanced, 38987 +github, level 13, old streaming advanced, 138675 +github, level 13 with dict, old streaming advanced, 39724 +github, level 16, old streaming advanced, 138675 +github, level 16 with dict, old streaming advanced, 40771 github, level 19, old streaming advanced, 133717 github, level 19 with dict, old streaming advanced, 37576 github, no source size, old streaming advanced, 140631 -github, long distance mode, old streaming advanced, 412933 -github, multithreaded, old streaming advanced, 412933 -github, multithreaded long distance mode, old streaming advanced, 412933 -github, small window log, old streaming advanced, 412933 -github, small hash log, old streaming advanced, 412933 -github, small chain log, old streaming advanced, 412933 -github, explicit params, old streaming advanced, 412933 -github, uncompressed literals, old streaming advanced, 136311 +github, long distance mode, old streaming advanced, 141090 +github, multithreaded, old streaming advanced, 141090 +github, multithreaded long distance mode, old streaming advanced, 141090 +github, small window log, old streaming advanced, 141090 +github, small hash log, old streaming advanced, 141578 +github, small chain log, old streaming advanced, 139258 +github, explicit params, old streaming advanced, 140930 +github, uncompressed literals, old streaming advanced, 141090 github, uncompressed literals optimal, old streaming advanced, 133717 -github, huffman literals, old streaming advanced, 175568 -github, multithreaded with advanced params, old streaming advanced, 412933 -silesia, level -5, old streaming cdcit, 6882466 -silesia, level -3, old streaming cdcit, 6568358 -silesia, level -1, old streaming cdcit, 6183385 -silesia, level 0, old streaming cdcit, 4849491 -silesia, level 1, old streaming cdcit, 5314109 -silesia, level 3, old streaming cdcit, 4849491 -silesia, level 4, old streaming cdcit, 4786913 -silesia, level 5, old streaming cdcit, 4710178 -silesia, level 6, old streaming cdcit, 4659996 -silesia, level 7, old streaming cdcit, 4596234 -silesia, level 9, old streaming cdcit, 4543862 -silesia, level 13, old streaming cdcit, 4482073 -silesia, level 16, old streaming cdcit, 4377389 -silesia, level 19, old streaming cdcit, 4293262 -silesia, no source size, old streaming cdcit, 4849455 -silesia, long distance mode, old streaming cdcit, 12000408 -silesia, multithreaded, old streaming cdcit, 12000408 -silesia, multithreaded long distance mode, old streaming cdcit, 12000408 -silesia, small window log, old streaming cdcit, 12000408 -silesia, small hash log, old streaming cdcit, 12000408 -silesia, small chain log, old streaming cdcit, 12000408 -silesia, explicit params, old streaming cdcit, 12000408 -silesia, uncompressed literals, old streaming cdcit, 4849491 -silesia, uncompressed literals optimal, old streaming cdcit, 4293262 -silesia, huffman literals, old streaming cdcit, 6183385 -silesia, multithreaded with advanced params, old streaming cdcit, 12000408 -silesia.tar, level -5, old streaming cdcit, 6982738 -silesia.tar, level -3, old streaming cdcit, 6641264 -silesia.tar, level -1, old streaming cdcit, 6190789 -silesia.tar, level 0, old streaming cdcit, 4861376 -silesia.tar, level 1, old streaming cdcit, 5336879 -silesia.tar, level 3, old streaming cdcit, 4861376 -silesia.tar, level 4, old streaming cdcit, 4799583 -silesia.tar, level 5, old streaming cdcit, 4722276 -silesia.tar, level 6, old streaming cdcit, 4672240 -silesia.tar, level 7, old streaming cdcit, 4606657 -silesia.tar, level 9, old streaming cdcit, 4554106 -silesia.tar, level 13, old streaming cdcit, 4491707 -silesia.tar, level 16, old streaming cdcit, 4381284 -silesia.tar, level 19, old streaming cdcit, 4281511 -silesia.tar, no source size, old streaming cdcit, 4861372 -silesia.tar, long distance mode, old streaming cdcit, 12022046 -silesia.tar, multithreaded, old streaming cdcit, 12022046 -silesia.tar, multithreaded long distance mode, old streaming cdcit, 12022046 -silesia.tar, small window log, old streaming cdcit, 12022046 -silesia.tar, small hash log, old streaming cdcit, 12022046 -silesia.tar, small chain log, old streaming cdcit, 12022046 -silesia.tar, explicit params, old streaming cdcit, 12022046 -silesia.tar, uncompressed literals, old streaming cdcit, 4861376 -silesia.tar, uncompressed literals optimal, old streaming cdcit, 4281511 -silesia.tar, huffman literals, old streaming cdcit, 6190789 -silesia.tar, multithreaded with advanced params, old streaming cdcit, 12022046 -github, level -5, old streaming cdcit, 205285 +github, huffman literals, old streaming advanced, 181108 +github, multithreaded with advanced params, old streaming advanced, 141090 github, level -5 with dict, old streaming cdcit, 46718 -github, level -3, old streaming cdcit, 190643 github, level -3 with dict, old streaming cdcit, 45395 -github, level -1, old streaming cdcit, 175568 github, level -1 with dict, old streaming cdcit, 43170 -github, level 0, old streaming cdcit, 136311 github, level 0 with dict, old streaming cdcit, 41148 -github, level 1, old streaming cdcit, 142450 github, level 1 with dict, old streaming cdcit, 41682 -github, level 3, old streaming cdcit, 136311 github, level 3 with dict, old streaming cdcit, 41148 -github, level 4, old streaming cdcit, 136144 github, level 4 with dict, old streaming cdcit, 41251 -github, level 5, old streaming cdcit, 135106 github, level 5 with dict, old streaming cdcit, 38938 -github, level 6, old streaming cdcit, 135108 github, level 6 with dict, old streaming cdcit, 38632 -github, level 7, old streaming cdcit, 135108 github, level 7 with dict, old streaming cdcit, 38766 -github, level 9, old streaming cdcit, 135108 github, level 9 with dict, old streaming cdcit, 39326 -github, level 13, old streaming cdcit, 133717 github, level 13 with dict, old streaming cdcit, 39716 -github, level 16, old streaming cdcit, 133717 github, level 16 with dict, old streaming cdcit, 37577 -github, level 19, old streaming cdcit, 133717 github, level 19 with dict, old streaming cdcit, 37576 -github, no source size, old streaming cdcit, 140631 -github, long distance mode, old streaming cdcit, 412933 -github, multithreaded, old streaming cdcit, 412933 -github, multithreaded long distance mode, old streaming cdcit, 412933 -github, small window log, old streaming cdcit, 412933 -github, small hash log, old streaming cdcit, 412933 -github, small chain log, old streaming cdcit, 412933 -github, explicit params, old streaming cdcit, 412933 -github, uncompressed literals, old streaming cdcit, 136311 -github, uncompressed literals optimal, old streaming cdcit, 133717 -github, huffman literals, old streaming cdcit, 175568 -github, multithreaded with advanced params, old streaming cdcit, 412933 -silesia, level -5, old streaming advanced cdict, 6882466 -silesia, level -3, old streaming advanced cdict, 6568358 -silesia, level -1, old streaming advanced cdict, 6183385 -silesia, level 0, old streaming advanced cdict, 4849491 -silesia, level 1, old streaming advanced cdict, 5314109 -silesia, level 3, old streaming advanced cdict, 4849491 -silesia, level 4, old streaming advanced cdict, 4786913 -silesia, level 5, old streaming advanced cdict, 4710178 -silesia, level 6, old streaming advanced cdict, 4659996 -silesia, level 7, old streaming advanced cdict, 4596234 -silesia, level 9, old streaming advanced cdict, 4543862 -silesia, level 13, old streaming advanced cdict, 4482073 -silesia, level 16, old streaming advanced cdict, 4377389 -silesia, level 19, old streaming advanced cdict, 4293262 -silesia, no source size, old streaming advanced cdict, 4849455 -silesia, long distance mode, old streaming advanced cdict, 12000408 -silesia, multithreaded, old streaming advanced cdict, 12000408 -silesia, multithreaded long distance mode, old streaming advanced cdict, 12000408 -silesia, small window log, old streaming advanced cdict, 12000408 -silesia, small hash log, old streaming advanced cdict, 12000408 -silesia, small chain log, old streaming advanced cdict, 12000408 -silesia, explicit params, old streaming advanced cdict, 12000408 -silesia, uncompressed literals, old streaming advanced cdict, 4849491 -silesia, uncompressed literals optimal, old streaming advanced cdict, 4293262 -silesia, huffman literals, old streaming advanced cdict, 6183385 -silesia, multithreaded with advanced params, old streaming advanced cdict, 12000408 -silesia.tar, level -5, old streaming advanced cdict, 6982738 -silesia.tar, level -3, old streaming advanced cdict, 6641264 -silesia.tar, level -1, old streaming advanced cdict, 6190789 -silesia.tar, level 0, old streaming advanced cdict, 4861376 -silesia.tar, level 1, old streaming advanced cdict, 5336879 -silesia.tar, level 3, old streaming advanced cdict, 4861376 -silesia.tar, level 4, old streaming advanced cdict, 4799583 -silesia.tar, level 5, old streaming advanced cdict, 4722276 -silesia.tar, level 6, old streaming advanced cdict, 4672240 -silesia.tar, level 7, old streaming advanced cdict, 4606657 -silesia.tar, level 9, old streaming advanced cdict, 4554106 -silesia.tar, level 13, old streaming advanced cdict, 4491707 -silesia.tar, level 16, old streaming advanced cdict, 4381284 -silesia.tar, level 19, old streaming advanced cdict, 4281511 -silesia.tar, no source size, old streaming advanced cdict, 4861372 -silesia.tar, long distance mode, old streaming advanced cdict, 12022046 -silesia.tar, multithreaded, old streaming advanced cdict, 12022046 -silesia.tar, multithreaded long distance mode, old streaming advanced cdict, 12022046 -silesia.tar, small window log, old streaming advanced cdict, 12022046 -silesia.tar, small hash log, old streaming advanced cdict, 12022046 -silesia.tar, small chain log, old streaming advanced cdict, 12022046 -silesia.tar, explicit params, old streaming advanced cdict, 12022046 -silesia.tar, uncompressed literals, old streaming advanced cdict, 4861376 -silesia.tar, uncompressed literals optimal, old streaming advanced cdict, 4281511 -silesia.tar, huffman literals, old streaming advanced cdict, 6190789 -silesia.tar, multithreaded with advanced params, old streaming advanced cdict, 12022046 -github, level -5, old streaming advanced cdict, 205285 -github, level -5 with dict, old streaming advanced cdict, 46718 -github, level -3, old streaming advanced cdict, 190643 -github, level -3 with dict, old streaming advanced cdict, 45395 -github, level -1, old streaming advanced cdict, 175568 -github, level -1 with dict, old streaming advanced cdict, 43170 -github, level 0, old streaming advanced cdict, 136311 -github, level 0 with dict, old streaming advanced cdict, 41148 -github, level 1, old streaming advanced cdict, 142450 -github, level 1 with dict, old streaming advanced cdict, 41682 -github, level 3, old streaming advanced cdict, 136311 -github, level 3 with dict, old streaming advanced cdict, 41148 -github, level 4, old streaming advanced cdict, 136144 -github, level 4 with dict, old streaming advanced cdict, 41251 -github, level 5, old streaming advanced cdict, 135106 -github, level 5 with dict, old streaming advanced cdict, 38938 -github, level 6, old streaming advanced cdict, 135108 -github, level 6 with dict, old streaming advanced cdict, 38632 -github, level 7, old streaming advanced cdict, 135108 -github, level 7 with dict, old streaming advanced cdict, 38766 -github, level 9, old streaming advanced cdict, 135108 -github, level 9 with dict, old streaming advanced cdict, 39326 -github, level 13, old streaming advanced cdict, 133717 -github, level 13 with dict, old streaming advanced cdict, 39716 -github, level 16, old streaming advanced cdict, 133717 -github, level 16 with dict, old streaming advanced cdict, 37577 -github, level 19, old streaming advanced cdict, 133717 +github, level -5 with dict, old streaming advanced cdict, 49562 +github, level -3 with dict, old streaming advanced cdict, 44956 +github, level -1 with dict, old streaming advanced cdict, 42383 +github, level 0 with dict, old streaming advanced cdict, 41113 +github, level 1 with dict, old streaming advanced cdict, 42430 +github, level 3 with dict, old streaming advanced cdict, 41113 +github, level 4 with dict, old streaming advanced cdict, 41084 +github, level 5 with dict, old streaming advanced cdict, 39158 +github, level 6 with dict, old streaming advanced cdict, 38748 +github, level 7 with dict, old streaming advanced cdict, 38744 +github, level 9 with dict, old streaming advanced cdict, 38986 +github, level 13 with dict, old streaming advanced cdict, 39724 +github, level 16 with dict, old streaming advanced cdict, 40771 github, level 19 with dict, old streaming advanced cdict, 37576 -github, no source size, old streaming advanced cdict, 140631 -github, long distance mode, old streaming advanced cdict, 412933 -github, multithreaded, old streaming advanced cdict, 412933 -github, multithreaded long distance mode, old streaming advanced cdict, 412933 -github, small window log, old streaming advanced cdict, 412933 -github, small hash log, old streaming advanced cdict, 412933 -github, small chain log, old streaming advanced cdict, 412933 -github, explicit params, old streaming advanced cdict, 412933 -github, uncompressed literals, old streaming advanced cdict, 136311 -github, uncompressed literals optimal, old streaming advanced cdict, 133717 -github, huffman literals, old streaming advanced cdict, 175568 -github, multithreaded with advanced params, old streaming advanced cdict, 412933 From f966cd080a44c301afa7bbe88d0c3a2f7c4b8a23 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 22 Oct 2019 17:43:09 -0700 Subject: [PATCH 39/50] added documentation on DYNAMIC_BMI2 build macro --- lib/README.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/README.md b/lib/README.md index 792729b1f..0062c0d63 100644 --- a/lib/README.md +++ b/lib/README.md @@ -27,10 +27,10 @@ Enabling multithreading requires 2 conditions : Both conditions are automatically applied when invoking `make lib-mt` target. When linking a POSIX program with a multithreaded version of `libzstd`, -note that it's necessary to request the `-pthread` flag during link stage. +note that it's necessary to invoke the `-pthread` flag during link stage. Multithreading capabilities are exposed -via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592). +via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351). #### API @@ -112,6 +112,17 @@ The file structure is designed to make this selection manually achievable for an will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in the shared library, which is now hidden by default. +- The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries + which can detect at runtime the presence of BMI2 instructions, and use them only if present. + These instructions contribute to better performance, notably on the decoder side. + By default, this feature is automatically enabled on detecting + the right instruction set (x64) and compiler (clang or gcc >= 5). + It's obviously disabled for different cpus, + or when BMI2 instruction set is _required_ by the compiler command line + (in this case, only the BMI2 code path is generated). + Setting this macro will either force to generate the BMI2 dispatcher (1) + or prevent it (0). It overrides automatic detection. + #### Windows : using MinGW+MSYS to create DLL From 5228c0718dd87bcd05f52ebf2686d9acb3cb8013 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Tue, 22 Oct 2019 17:43:29 -0700 Subject: [PATCH 40/50] Adding arm64 make check --- .travis.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.travis.yml b/.travis.yml index a6e1a99ea..26db67532 100644 --- a/.travis.yml +++ b/.travis.yml @@ -222,5 +222,12 @@ matrix: - tree ./staging after_failure: - cat "$TRAVIS_BUILD_DIR"/builddir/meson-logs/testlog.txt + + # arm 64 + - name: arm64 + os: linux + arch: arm64 + script: + - make check allow_failures: - env: ALLOW_FAILURES=true From 4899b6fdbdafefa1e5cb06e4600897002d609e74 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Wed, 23 Oct 2019 10:43:20 -0700 Subject: [PATCH 41/50] Moving arm64 test outside the meson scope --- .travis.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 26db67532..97f95a60e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,12 @@ env: matrix: fast_finish: true include: + - name: arm64 + os: linux + arch: arm64 + script: + - make check + - name: Trusty (Test All) script: - make test @@ -223,11 +229,5 @@ matrix: after_failure: - cat "$TRAVIS_BUILD_DIR"/builddir/meson-logs/testlog.txt - # arm 64 - - name: arm64 - os: linux - arch: arm64 - script: - - make check allow_failures: - env: ALLOW_FAILURES=true From 63e435dda1226134ad9a7239b14681720555863f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Oct 2019 11:59:32 -0700 Subject: [PATCH 42/50] improve deprecation warning macro fix #1488 although, curiously enough, I was never able to reproduce the issue (according to the bug report, it should be present while using gcc 4.8). --- lib/deprecated/zbuff.h | 11 ++++++----- tests/Makefile | 14 ++++---------- tests/zbufftest.c | 4 ++-- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/lib/deprecated/zbuff.h b/lib/deprecated/zbuff.h index a93115da4..04183eab9 100644 --- a/lib/deprecated/zbuff.h +++ b/lib/deprecated/zbuff.h @@ -36,16 +36,17 @@ extern "C" { *****************************************************************/ /* Deprecation warnings */ /* Should these warnings be a problem, - it is generally possible to disable them, - typically with -Wno-deprecated-declarations for gcc - or _CRT_SECURE_NO_WARNINGS in Visual. - Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */ + * it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS + */ #ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS # define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ #else # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ # define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API -# elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__) +# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) # elif defined(__GNUC__) && (__GNUC__ >= 3) # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) diff --git a/tests/Makefile b/tests/Makefile index 161c823ed..3917a7cf8 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -160,19 +160,13 @@ fuzzer-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) -zbufftest : CPPFLAGS += -I$(ZSTDDIR)/deprecated -zbufftest : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings -zbufftest : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c - $(CC) $(FLAGS) $^ -o $@$(EXT) - -zbufftest32 : CPPFLAGS += -I$(ZSTDDIR)/deprecated -zbufftest32 : CFLAGS += -Wno-deprecated-declarations -m32 -zbufftest32 : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c +zbufftest zbufftest32 zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated +zbufftest zbufftest32 zbufftest-dll : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings +zbufftest32 : CFLAGS += -m32 +zbufftest zbufftest32 : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c $(CC) $(FLAGS) $^ -o $@$(EXT) zbufftest-dll : zstd-dll -zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated -zbufftest-dll : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings zbufftest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) diff --git a/tests/zbufftest.c b/tests/zbufftest.c index 944148262..8a4a27907 100644 --- a/tests/zbufftest.c +++ b/tests/zbufftest.c @@ -81,7 +81,7 @@ static U64 g_clockTime = 0; @return : a 27 bits random value, from a 32-bits `seed`. `seed` is also modified */ # define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) -unsigned int FUZ_rand(unsigned int* seedPtr) +static unsigned int FUZ_rand(unsigned int* seedPtr) { U32 rand32 = *seedPtr; rand32 *= prime1; @@ -467,7 +467,7 @@ _output_error: /*-******************************************************* * Command line *********************************************************/ -int FUZ_usage(const char* programName) +static int FUZ_usage(const char* programName) { DISPLAY( "Usage :\n"); DISPLAY( " %s [args]\n", programName); From 42a22af78b068bd934123ae7e1d6eee1544282f6 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 24 Oct 2019 15:19:05 -0700 Subject: [PATCH 43/50] fix zlibWrapper for Visual Studio As per https://github.com/facebook/zstd/issues/1800#issuecomment-545945050, Visual does not support `ssize_t` type, which is an issue for `gzread.c`. Added a work around, suggested by @bluenlive Note : I have not been able to confirm the problem, so this is a blind fix. This seems safe outside of Visual, since it is gated by _MSC_VER macro. --- zlibWrapper/gzread.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/zlibWrapper/gzread.c b/zlibWrapper/gzread.c index bcac9700d..359d17889 100644 --- a/zlibWrapper/gzread.c +++ b/zlibWrapper/gzread.c @@ -8,6 +8,14 @@ #include "gzguts.h" +/* fix for Visual Studio, which doesn't support ssize_t type. + * see https://github.com/facebook/zstd/issues/1800#issuecomment-545945050 */ +#if defined(_MSC_VER) && !defined(ssize_t) +# include + typedef SSIZE_T ssize_t; +#endif + + /* Local functions */ local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); local int gz_avail OF((gz_statep)); From 91c3f545cc51fbcd8d5131ee06a944125911ac02 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 24 Oct 2019 20:18:57 -0400 Subject: [PATCH 44/50] Fix Build; Refactor --- programs/fileio.c | 122 ++++++++++++++++++++++++++++++---------------- programs/fileio.h | 15 ++++-- 2 files changed, 91 insertions(+), 46 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 07503f052..c2c5618e5 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1496,17 +1496,17 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ char* outDirFilename = NULL; size_t sfnSize = strlen(srcFileName); - size_t const suffixSize = strlen(suffix); + size_t const srcSuffixLen = strlen(suffix); if (outDirName) { - outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, suffixSize); + outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen); sfnSize = strlen(outDirFilename); assert(outDirFilename != NULL); } - if (dfnbCapacity <= sfnSize+suffixSize+1) { + if (dfnbCapacity <= sfnSize+srcSuffixLen+1) { /* resize buffer for dstName */ free(dstFileNameBuffer); - dfnbCapacity = sfnSize + suffixSize + 30; + dfnbCapacity = sfnSize + srcSuffixLen + 30; dstFileNameBuffer = (char*)malloc(dfnbCapacity); if (!dstFileNameBuffer) { EXM_THROW(30, "zstd: %s", strerror(errno)); @@ -1520,7 +1520,7 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con } else { memcpy(dstFileNameBuffer, srcFileName, sfnSize); } - memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */); + memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */); return dstFileNameBuffer; } @@ -2287,6 +2287,37 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs, return decodingError; } +static const char *suffixList[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, +#ifdef ZSTD_GZDECOMPRESS + GZ_EXTENSION, + TGZ_EXTENSION, +#endif +#ifdef ZSTD_LZMADECOMPRESS + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, +#endif +#ifdef ZSTD_LZ4DECOMPRESS + LZ4_EXTENSION, + TLZ4_EXTENSION, +#endif + NULL +}; + +static const char *suffixListStr = + ZSTD_EXTENSION "/" TZSTD_EXTENSION +#ifdef ZSTD_GZDECOMPRESS + "/" GZ_EXTENSION "/" TGZ_EXTENSION +#endif +#ifdef ZSTD_LZMADECOMPRESS + "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION +#endif +#ifdef ZSTD_LZ4DECOMPRESS + "/" LZ4_EXTENSION "/" TLZ4_EXTENSION +#endif +; /* FIO_determineDstName() : * create a destination filename from a srcFileName. @@ -2297,72 +2328,79 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName) { static size_t dfnbCapacity = 0; static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ + size_t dstFileNameEndPos; char* outDirFilename = NULL; - - const char* SUFFIX_LIST = ZSTD_EXTENSION "/" TZSTD_EXTENSION - #ifdef ZSTD_GZDECOMPRESS - "/" GZ_EXTENSION "/" TGZ_EXTENSION - #endif - #ifdef ZSTD_LZMADECOMPRESS - "/" XZ_EXTENSION "/" LZMA_EXTENSION "/" TXZ_EXTENSION - #endif - #ifdef ZSTD_LZ4DECOMPRESS - "/" LZ4_EXTENSION "/" TLZ4_EXTENSION - #endif - ; + const char* dstSuffix = ""; + size_t dstSuffixLen = 0; size_t sfnSize = strlen(srcFileName); - size_t suffixSize; - const char* const suffixPtr = strrchr(srcFileName, '.'); - if (suffixPtr == NULL) { - DISPLAYLEVEL(1, "zstd: %s: missing suffix (%s expected). Can't derive the output file name so specify it with -o dstFileName. -- ignored \n", - srcFileName, SUFFIX_LIST); + size_t srcSuffixLen; + const char* const srcSuffix = strrchr(srcFileName, '.'); + if (srcSuffix == NULL) { + DISPLAYLEVEL(1, + "zstd: %s: unknown suffix (%s expected). " + "Can't derive the output file name. " + "Specify it with -o dstFileName. Ignoring.\n", + srcFileName, suffixListStr); return NULL; } - suffixSize = strlen(suffixPtr); + srcSuffixLen = strlen(srcSuffix); - /* check suffix is authorized */ - if (sfnSize <= suffixSize - || (strstr(SUFFIX_LIST, suffixPtr) == NULL)) { - DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected). Can't derive the output file name so specify it with -o dstFileName. -- ignored \n", - srcFileName, SUFFIX_LIST); - return NULL; + { + const char** matchedSuffixPtr; + for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) { + if (!strcmp(*matchedSuffixPtr, srcSuffix)) { + break; + } + } + + /* check suffix is authorized */ + if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) { + DISPLAYLEVEL(1, + "zstd: %s: unknown suffix (%s expected). " + "Can't derive the output file name. " + "Specify it with -o dstFileName. Ignoring.\n", + srcFileName, suffixListStr); + return NULL; + } + + if ((*matchedSuffixPtr)[1] == 't') { + dstSuffix = ".tar"; + dstSuffixLen = strlen(dstSuffix); + } } + if (outDirName) { outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0); sfnSize = strlen(outDirFilename); assert(outDirFilename != NULL); } - if (dfnbCapacity+suffixSize <= sfnSize+1) { + if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) { /* allocate enough space to write dstFilename into it */ free(dstFileNameBuffer); dfnbCapacity = sfnSize + 20; dstFileNameBuffer = (char*)malloc(dfnbCapacity); if (dstFileNameBuffer==NULL) - EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno)); + EXM_THROW(74, "%s : not enough memory for dstFileName", + strerror(errno)); } /* return dst name == src name truncated from suffix */ assert(dstFileNameBuffer != NULL); - size_t dstFileNameEndPos = sfnSize - suffixSize; + dstFileNameEndPos = sfnSize - srcSuffixLen; if (outDirFilename) { memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos); free(outDirFilename); } else { memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos); } - /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" extension on decompression - * To check that the file is one of them we can check that it starts with "t" - */ - if (suffixPtr[1] == 't') { - dstFileNameBuffer[dstFileNameEndPos++] = '.'; - dstFileNameBuffer[dstFileNameEndPos++] = 't'; - dstFileNameBuffer[dstFileNameEndPos++] = 'a'; - dstFileNameBuffer[dstFileNameEndPos++] = 'r'; - } - dstFileNameBuffer[dstFileNameEndPos] = '\0'; + + /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" + * extension on decompression. Also writes terminating null. */ + strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix); + dstFileNameEndPos += dstSuffixLen; return dstFileNameBuffer; /* note : dstFileNameBuffer memory is not going to be free */ diff --git a/programs/fileio.h b/programs/fileio.h index 42e2274ab..39eb544a1 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -30,15 +30,22 @@ extern "C" { #else # define nulmark "/dev/null" #endif + +/* We test whether the extension we found starts with 't', and if so, we append + * ".tar" to the end of the output name. + */ #define LZMA_EXTENSION ".lzma" #define XZ_EXTENSION ".xz" -#define TXZ_EXTENSION ".txz" +#define TXZ_EXTENSION ".txz" + #define GZ_EXTENSION ".gz" -#define TGZ_EXTENSION ".tgz" +#define TGZ_EXTENSION ".tgz" + #define ZSTD_EXTENSION ".zst" -#define TZSTD_EXTENSION ".tzst" +#define TZSTD_EXTENSION ".tzst" + #define LZ4_EXTENSION ".lz4" -#define TLZ4_EXTENSION ".tlz4" +#define TLZ4_EXTENSION ".tlz4" /*-************************************* From 24499036ba88dbead8cca53a6075c9229ef959f9 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 24 Oct 2019 20:19:10 -0400 Subject: [PATCH 45/50] Add Tests --- tests/playTests.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/playTests.sh b/tests/playTests.sh index f68ee81a5..9d9c8e3dc 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -845,6 +845,46 @@ if [ $LZ4MODE -ne 1 ]; then grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed" fi +println "\n===> tar extension tests " + +rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4 + +./datagen > tmp +tar cf tmp.tar tmp +$ZSTD tmp.tar -o tmp.tzst +rm tmp.tar +$ZSTD -d tmp.tzst +[ -e tmp.tar ] || die ".tzst failed to decompress to .tar!" +rm -f tmp.tar tmp.tzst + +if [ $GZIPMODE -eq 1 ]; then + tar czf tmp.tgz tmp + $ZSTD -d tmp.tgz + [ -e tmp.tar ] || die ".tgz failed to decompress to .tar!" + rm -f tmp.tar tmp.tgz +fi + +if [ $LZMAMODE -eq 1 ]; then + tar c tmp | xz > tmp.txz + $ZSTD -d tmp.txz + [ -e tmp.tar ] || die ".txz failed to decompress to .tar!" + rm -f tmp.tar tmp.txz +fi + +if [ $LZ4MODE -eq 1 ]; then + tar c tmp | lz4 > tmp.tlz4 + $ZSTD -d tmp.tlz4 + [ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!" + rm -f tmp.tar tmp.tlz4 +fi + +touch tmp.t tmp.tz tmp.tzs +! $ZSTD -d tmp.t +! $ZSTD -d tmp.tz +! $ZSTD -d tmp.tzs + +exit + println "\n===> zstd round-trip tests " roundTripTest From 4eccc82875d6e5ca8a621fc80acd363a5efb9d3f Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 25 Oct 2019 13:58:58 -0400 Subject: [PATCH 46/50] Minor Fixes --- programs/fileio.c | 1 - programs/fileio.h | 3 ++- tests/playTests.sh | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index c2c5618e5..828878c6a 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2400,7 +2400,6 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName) /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" * extension on decompression. Also writes terminating null. */ strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix); - dstFileNameEndPos += dstSuffixLen; return dstFileNameBuffer; /* note : dstFileNameBuffer memory is not going to be free */ diff --git a/programs/fileio.h b/programs/fileio.h index 39eb544a1..af2c5d9d1 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -31,7 +31,8 @@ extern "C" { # define nulmark "/dev/null" #endif -/* We test whether the extension we found starts with 't', and if so, we append +/** + * We test whether the extension we found starts with 't', and if so, we append * ".tar" to the end of the output name. */ #define LZMA_EXTENSION ".lzma" diff --git a/tests/playTests.sh b/tests/playTests.sh index 9d9c8e3dc..c1da16507 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -865,14 +865,14 @@ if [ $GZIPMODE -eq 1 ]; then fi if [ $LZMAMODE -eq 1 ]; then - tar c tmp | xz > tmp.txz + tar c tmp | $ZSTD --format=xz > tmp.txz $ZSTD -d tmp.txz [ -e tmp.tar ] || die ".txz failed to decompress to .tar!" rm -f tmp.tar tmp.txz fi if [ $LZ4MODE -eq 1 ]; then - tar c tmp | lz4 > tmp.tlz4 + tar c tmp | $ZSTD --format=lz4 > tmp.tlz4 $ZSTD -d tmp.tlz4 [ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!" rm -f tmp.tar tmp.tlz4 From 74bd76c3ffb3837a3fefc055edbc347620447236 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 25 Oct 2019 15:05:20 -0400 Subject: [PATCH 47/50] In pkg-config File, Derive Lib and Include Dir from Prefix at Use-Time Addresses #1794. Instead of deriving the lib dir and include dir at build-time, let's do it like everyone else does at pkg-config run-time. This has the disadvantage that we can no longer override LIBDIR and INCLUDEDIR in the Makefile and have that reflected in the .pc file. --- build/cmake/lib/CMakeLists.txt | 3 +-- lib/Makefile | 2 -- lib/libzstd.pc.in | 5 +++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 77b389cae..7adca875c 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -134,11 +134,10 @@ if (UNIX) # pkg-config set(PREFIX "${CMAKE_INSTALL_PREFIX}") set(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}") - set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}") set(VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}") add_custom_target(libzstd.pc ALL ${CMAKE_COMMAND} -DIN="${LIBRARY_DIR}/libzstd.pc.in" -DOUT="libzstd.pc" - -DPREFIX="${PREFIX}" -DLIBDIR="${LIBDIR}" -DINCLUDEDIR="${INCLUDEDIR}" -DVERSION="${VERSION}" + -DPREFIX="${PREFIX}" -DVERSION="${VERSION}" -P "${CMAKE_CURRENT_SOURCE_DIR}/pkgconfig.cmake" COMMENT "Creating pkg-config file") diff --git a/lib/Makefile b/lib/Makefile index 87a396c53..273ceb904 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -244,8 +244,6 @@ libzstd.pc: libzstd.pc: libzstd.pc.in @echo creating pkgconfig @sed -e 's|@PREFIX@|$(PREFIX)|' \ - -e 's|@LIBDIR@|$(LIBDIR)|' \ - -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ -e 's|@VERSION@|$(VERSION)|' \ $< >$@ diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in index 1d07b91f2..e7880be47 100644 --- a/lib/libzstd.pc.in +++ b/lib/libzstd.pc.in @@ -3,8 +3,9 @@ # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) prefix=@PREFIX@ -libdir=@LIBDIR@ -includedir=@INCLUDEDIR@ +exec_prefix=${prefix} +includedir=${prefix}/include +libdir=${exec_prefix}/lib Name: zstd Description: fast lossless compression algorithm library From 74065da4c5ea14a9755982e5ca0a72bf2d4f10d5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 28 Oct 2019 11:15:41 -0700 Subject: [PATCH 48/50] updated API inline doc and manual regarding ZSTD_CDict created without a dictBuffer. --- doc/zstd_manual.html | 26 +++++++++++++++++++------- lib/zstd.h | 17 +++++++++++------ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 0021eec28..43c5555b8 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -692,12 +692,17 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                              int compressionLevel);
-

When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. - ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. +

When compressing multiple messages or blocks using the same dictionary, + it's recommended to digest the dictionary only once, since it's a costly operation. + ZSTD_createCDict() will create a state from digesting a dictionary. + The resulting state can be used for future compression operations with very limited startup cost. ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. - Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. - Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. + @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + in which case the only thing that it transports is the @compressionLevel. + This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + expecting a ZSTD_CDict parameter with any data, including those without a known dictionary.


size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
@@ -947,7 +952,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
      * to evolve and should be considered only in the context of extremely
      * advanced performance tuning.
      *
-     * Zstd currently supports the use of a CDict in two ways:
+     * Zstd currently supports the use of a CDict in three ways:
      *
      * - The contents of the CDict can be copied into the working context. This
      *   means that the compression can search both the dictionary and input
@@ -963,6 +968,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
      *   working context's tables can be reused). For small inputs, this can be
      *   faster than copying the CDict's tables.
      *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
      * Zstd has a simple internal heuristic that selects which strategy to use
      * at the beginning of a compression. However, if experimentation shows that
      * Zstd is making poor choices, it is possible to override that choice with
@@ -970,7 +981,8 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
      */
     ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
     ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
-    ZSTD_dictForceCopy     = 2  /* Always copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
 } ZSTD_dictAttachPref_e;
 

typedef enum {
diff --git a/lib/zstd.h b/lib/zstd.h
index 24d23ff81..72080ea87 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -808,12 +808,17 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
 typedef struct ZSTD_CDict_s ZSTD_CDict;
 
 /*! ZSTD_createCDict() :
- *  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
- *  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
  *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
- *  Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
- *  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                                          int compressionLevel);
 
@@ -1167,7 +1172,7 @@ typedef enum {
      *   tables. However, this model incurs no start-up cost (as long as the
      *   working context's tables can be reused). For small inputs, this can be
      *   faster than copying the CDict's tables.
-     * 
+     *
      * - The CDict's tables are not used at all, and instead we use the working
      *   context alone to reload the dictionary and use params based on the source
      *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().

From faddd2aa1dd85bdb51e1eaa55f77b76cbde2d9e5 Mon Sep 17 00:00:00 2001
From: Yann Collet 
Date: Mon, 28 Oct 2019 12:57:05 -0700
Subject: [PATCH 49/50] updated CHANGELOG for v1.4.4

---
 CHANGELOG | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index ae54896a9..44600267d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,8 +1,32 @@
+v1.4.4
+perf: Improved decompression speed, by > 10%, by @terrelln
+perf: Better compression speed when re-using a context, by @felixhandte
+perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42
+perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha
+perf: minor generic speed optimization, by @davidbolvansky
+api: new ability to extract sequences from the parser for analysis, by @bimbashrestha
+api: fixed decoding of magic-less frames, by @terrelln
+api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK
+cli: Named pipes support, by @bimbashrestha
+cli: short tar's extension support, by @stokito
+cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42
+cli: commands --stream-size=# and --size-hint=#, by @nmagerko
+cli: faster `-t` test mode
+cli: improved some error messages, by @vangyzen
+cli: rare deadlock condition within dictionary builder, by @terrelln
+build: single-file decoder with emscripten compilation script, by @cwoffenden
+build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive
+build: fixed deprecation warning for certain gcc version, reported by @jasonma163
+build: fix compilation on old gcc versions, by @cemeyer
+build: improved installation directories for cmake script, by Dmitri Shubin
+pack: modified pkgconfig, for better integration into openwrt, requested by @neheb
+misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format
+misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro
+
 v1.4.3
 bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709)
-bug: Fix Buffer Overflow in v0.3 Decompression by @felixhandte (#1722)
+bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722)
 build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705)
-misc: Add NULL pointer check in util.c by @leeyoung624 (#1706)
 
 v1.4.2
 bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)

From 9c1860861ef15891f60510f9d4c3d6a75da2e12f Mon Sep 17 00:00:00 2001
From: Nick Terrell 
Date: Mon, 28 Oct 2019 17:51:17 -0700
Subject: [PATCH 50/50] Fix assert in ZSTD_safecopy

In the case that `op >= oend_w` it is possible that `diff < 8` because
the two buffers could be adjacent.

Credit to OSS-Fuzz, which found the bug. It isn't reproducible because
it depends on the memory layout.
---
 lib/decompress/zstd_decompress_block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index cbb66c8db..767e5f9a0 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -617,7 +617,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
     ptrdiff_t const diff = op - ip;
     BYTE* const oend = op + length;
 
-    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8)) ||
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
            (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
 
     if (length < 8) {