diff --git a/appveyor.yml b/appveyor.yml index 0f9142b47..7d6ca99bb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -9,7 +9,7 @@ - COMPILER: "gcc" HOST: "mingw" PLATFORM: "x64" - SCRIPT: "make allzstd MOREFLAGS=-static && make -C tests test-symbols fullbench-dll fullbench-lib" + SCRIPT: "make allzstd MOREFLAGS=-static && make -C tests test-symbols fullbench-lib" ARTIFACT: "true" BUILD: "true" - COMPILER: "gcc" diff --git a/lib/common/huf.h b/lib/common/huf.h index 522bf9b6c..1cead357a 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -206,10 +206,10 @@ The following API allows targeting specific sub-functions for advanced tasks. For example, it's possible to compress several blocks using the same 'CTable', or to save and regenerate 'CTable' using external methods. */ -/* FSE_count() : find it within "fse.h" */ +/* FSE_count() : exposed within "fse.h" */ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ -size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap, in which case, CTable will overwrite count content */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 5692d56e0..cfc5a98bb 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -405,6 +405,7 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu } /** HUF_buildCTable() : + * @return : maxNbBits * Note : count is used before tree is written, so they can safely overlap */ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 108229dea..8a0b46e09 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2461,7 +2461,6 @@ ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize, + cctxSize; ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; void* ptr; - DEBUGLOG(4, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7); if ((size_t)workspace & 7) return NULL; /* 8-aligned */ DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", (U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize)); diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index efdffddbf..b5a3957a9 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -537,8 +537,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, /* Checks */ if (totalSamplesSize < MAX(d, sizeof(U64)) || totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { - DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n", - (COVER_MAX_SAMPLES_SIZE >> 20)); + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); return 0; } /* Zero the context */ @@ -651,12 +651,16 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, } ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, - ZDICT_cover_params_t parameters) { - BYTE *const dict = (BYTE *)dictBuffer; + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; COVER_ctx_t ctx; COVER_map_t activeDmers; + + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; /* Checks */ if (!COVER_checkParameters(parameters, dictBufferCapacity)) { DISPLAYLEVEL(1, "Cover parameters incorrect\n"); @@ -671,8 +675,6 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( ZDICT_DICTSIZE_MIN); return ERROR(dstSize_tooSmall); } - /* Initialize global data */ - g_displayLevel = parameters.zParams.notificationLevel; /* Initialize context and activeDmers */ if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, parameters.d)) { @@ -947,6 +949,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( unsigned k; COVER_best_t best; POOL_ctx *pool = NULL; + /* Checks */ if (kMinK < kMaxD || kMaxK < kMinK) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index e8f454045..7d24e4991 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -207,7 +207,6 @@ static dictItem ZDICT_analyzePos( U32 cumulLength[LLIMIT] = {0}; U32 savings[LLIMIT] = {0}; const BYTE* b = (const BYTE*)buffer; - size_t length; size_t maxLength = LLIMIT; size_t pos = suffix[start]; U32 end = start; @@ -222,26 +221,30 @@ static dictItem ZDICT_analyzePos( ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { /* skip and mark segment */ - U16 u16 = MEM_read16(b+pos+4); - U32 u, e = 6; - while (MEM_read16(b+pos+e) == u16) e+=2 ; - if (b[pos+e] == b[pos+e-1]) e++; - for (u=1; u=MINMATCHLENGTH); + { size_t length; + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + } while (length >= MINMATCHLENGTH); + } /* look backward */ - do { - length = ZDICT_count(b + pos, b + *(suffix+start-1)); - if (length >=MINMATCHLENGTH) start--; - } while(length >= MINMATCHLENGTH); + { size_t length; + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + } /* exit if not found a minimum nb of repetitions */ if (end-start < minRatio) { @@ -268,7 +271,7 @@ static dictItem ZDICT_analyzePos( U32 selectedCount = 0; U32 selectedID = currentID; for (id =refinedStart; id < refinedEnd; id++) { - if (b[ suffix[id] + searchLength] != currentChar) { + if (b[suffix[id] + searchLength] != currentChar) { if (currentCount > selectedCount) { selectedCount = currentCount; selectedID = currentID; @@ -297,20 +300,23 @@ static dictItem ZDICT_analyzePos( memset(lengthList, 0, sizeof(lengthList)); /* look forward */ - do { - end++; - length = ZDICT_count(b + pos, b + suffix[end]); - if (length >= LLIMIT) length = LLIMIT-1; - lengthList[length]++; - } while (length >=MINMATCHLENGTH); + { size_t length; + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + } /* look backward */ - length = MINMATCHLENGTH; - while ((length >= MINMATCHLENGTH) & (start > 0)) { - length = ZDICT_count(b + pos, b + suffix[start - 1]); - if (length >= LLIMIT) length = LLIMIT - 1; - lengthList[length]++; - if (length >= MINMATCHLENGTH) start--; + { size_t length = MINMATCHLENGTH; + while ((length >= MINMATCHLENGTH) & (start > 0)) { + length = ZDICT_count(b + pos, b + suffix[start - 1]); + if (length >= LLIMIT) length = LLIMIT - 1; + lengthList[length]++; + if (length >= MINMATCHLENGTH) start--; + } } /* largest useful length */ @@ -345,12 +351,12 @@ static dictItem ZDICT_analyzePos( /* mark positions done */ { U32 id; for (id=start; id solution.length) length = solution.length; } pEnd = (U32)(testedPos + length); @@ -575,29 +581,30 @@ static void ZDICT_fillNoise(void* buffer, size_t length) typedef struct { - ZSTD_CCtx* ref; - ZSTD_CCtx* zc; + ZSTD_CCtx* ref; /* contains reference to dictionary */ + ZSTD_CCtx* zc; /* working context */ void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ } EStats_ress_t; #define MAXREPOFFSET 1024 static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params, - U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets, - const void* src, size_t srcSize, U32 notificationLevel) + U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets, + const void* src, size_t srcSize, + U32 notificationLevel) { size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog); size_t cSize; if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ - { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0); - if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } + { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0); + if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } } cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; } if (cSize) { /* if == 0; block is not compressible */ - const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc); + const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc); /* literals stats */ { const BYTE* bytePtr; @@ -659,6 +666,18 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val, } } +/* ZDICT_flatLit() : + * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals. + * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode. + */ +static void ZDICT_flatLit(U32* countLit) +{ + int u; + for (u=1; u<256; u++) countLit[u] = 2; + countLit[0] = 4; + countLit[253] = 1; + countLit[254] = 1; +} #define OFFCODE_MAX 30 /* only applicable to first block */ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, @@ -688,6 +707,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, BYTE* dstPtr = (BYTE*)dstBuffer; /* init */ + DEBUGLOG(4, "ZDICT_analyzeEntropy"); esr.ref = ZSTD_createCCtx(); esr.zc = ZSTD_createCCtx(); esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); @@ -713,7 +733,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, goto _cleanup; } } - /* collect stats on all files */ + /* collect stats on all samples */ for (u=0; u=1) + params.zParams.notificationLevel = ZSTD_DEBUG; +#endif return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity, - samplesBuffer, samplesSizes, - nbSamples, ¶ms); + samplesBuffer, samplesSizes, nbSamples, + ¶ms); } size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) { ZDICT_params_t params; memset(¶ms, 0, sizeof(params)); diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index 5f0000b1c..92f66415f 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -38,21 +38,21 @@ extern "C" { /*! ZDICT_trainFromBuffer(): - * Train a dictionary from an array of samples. - * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4. - * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, - * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. - * The resulting dictionary will be saved into `dictBuffer`. + * Train a dictionary from an array of samples. + * Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - * or an error code, which can be tested with ZDICT_isError(). - * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. - * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. - * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. - * In general, it's recommended to provide a few thousands samples, but this can vary a lot. + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. */ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); /*====== Helper functions ======*/ @@ -72,14 +72,14 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); * ==================================================================================== */ typedef struct { - int compressionLevel; /* 0 means default; target a specific zstd compression level */ - unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ - unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */ + int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */ } ZDICT_params_t; /*! ZDICT_cover_params_t: - * For all values 0 means default. * k and d are the only required parameters. + * For others, value 0 means default. */ typedef struct { unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ @@ -91,28 +91,28 @@ typedef struct { /*! ZDICT_trainFromBuffer_cover(): - * Train a dictionary from an array of samples using the COVER algorithm. - * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, - * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. - * The resulting dictionary will be saved into `dictBuffer`. + * Train a dictionary from an array of samples using the COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - * or an error code, which can be tested with ZDICT_isError(). - * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. - * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. - * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. - * In general, it's recommended to provide a few thousands samples, but this can vary a lot. + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. */ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t parameters); /*! ZDICT_optimizeTrainFromBuffer_cover(): * The same requirements as above hold for all the parameters except `parameters`. * This function tries many parameter combinations and picks the best parameters. - * `*parameters` is filled with the best parameters found, and the dictionary - * constructed with those parameters is stored in `dictBuffer`. + * `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. * * All of the parameters d, k, steps are optional. * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. @@ -125,9 +125,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. */ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, - ZDICT_cover_params_t *parameters); + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters); /*! ZDICT_finalizeDictionary(): * Given a custom content as a basis for dictionary, and a set of samples, @@ -157,22 +157,23 @@ typedef struct { } ZDICT_legacy_params_t; /*! ZDICT_trainFromBuffer_legacy(): - * Train a dictionary from an array of samples. - * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, - * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. - * The resulting dictionary will be saved into `dictBuffer`. + * Train a dictionary from an array of samples. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. * `parameters` is optional and can be provided with values set to 0 to mean "default". * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - * or an error code, which can be tested with ZDICT_isError(). - * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. - * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. - * In general, it's recommended to provide a few thousands samples, but this can vary a lot. + * or an error code, which can be tested with ZDICT_isError(). + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. - * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. + * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. */ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( - void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters); + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t parameters); /* Deprecation warnings */ /* It is generally possible to disable deprecation warnings from compiler, diff --git a/lib/zstd.h b/lib/zstd.h index 9ac0a73dc..a84490d99 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -45,11 +45,11 @@ extern "C" { Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory. Compression can be done in: - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit memory management) + - a single step, reusing a context (described as Explicit context) - unbounded multiple steps (described as Streaming compression) The compression ratio achievable on small data can be highly improved using a dictionary in: - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Fast dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing dictionary API) Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h. Advanced experimental APIs shall never be used with a dynamic library. @@ -68,7 +68,7 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll versio #define ZSTD_QUOTE(str) #str #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -ZSTDLIB_API const char* ZSTD_versionString(void); /* v1.3.0 */ +ZSTDLIB_API const char* ZSTD_versionString(void); /* added in v1.3.0 */ /*************************************** @@ -92,7 +92,7 @@ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); -/*! ZSTD_getFrameContentSize() : v1.3.0 +/*! ZSTD_getFrameContentSize() : added in v1.3.0 * `src` should point to the start of a ZSTD encoded frame. * `srcSize` must be at least as large as the frame header. * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. @@ -120,26 +120,24 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t /*! ZSTD_getDecompressedSize() : * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). - * Both functions work the same way, - * but ZSTD_getDecompressedSize() blends - * "empty", "unknown" and "error" results in the same return value (0), - * while ZSTD_getFrameContentSize() distinguishes them. - * - * 'src' is the start of a zstd compressed frame. - * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */ + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * `src` is the start of a zstd compressed frame. + * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */ ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); /*====== Helper functions ======*/ #define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ /*************************************** -* Explicit memory management +* Explicit context ***************************************/ /*= Compression context * When compressing many times, @@ -345,7 +343,7 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output * *******************************************************************************/ typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ - /* Continue to distinguish them for compatibility with versions <= v1.2.0 */ + /* For compatibility with versions <= v1.2.0, continue to consider them separated. */ /*===== ZSTD_DStream management functions =====*/ ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); @@ -375,15 +373,15 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output /* --- Constants ---*/ #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U -#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* v0.7+ */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* >= v0.7.0 */ #define ZSTD_WINDOWLOG_MAX_32 30 #define ZSTD_WINDOWLOG_MAX_64 31 #define ZSTD_WINDOWLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) #define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX MIN(ZSTD_WINDOWLOG_MAX, 30) +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) #define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX MIN(ZSTD_WINDOWLOG_MAX+1, 30) +#define ZSTD_CHAINLOG_MAX ((ZSTD_WINDOWLOG_MAX < 29) ? ZSTD_WINDOWLOG_MAX+1 : 30) #define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN #define ZSTD_HASHLOG3_MAX 17 #define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) @@ -432,12 +430,17 @@ typedef struct { typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; -/*--- Custom memory allocation functions ---*/ -typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); -typedef void (*ZSTD_freeFunction) (void* opaque, void* address); -typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; -/* use this constant to defer to stdlib's functions */ -static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; +typedef enum { + ZSTD_dm_auto=0, /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dm_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dm_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */ +} ZSTD_dictMode_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + /*************************************** @@ -483,12 +486,12 @@ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); /*************************************** -* Context memory usage +* Memory management ***************************************/ /*! ZSTD_sizeof_*() : * These functions give the current memory usage of selected object. - * Object memory usage can evolve when re-used multiple times. */ + * Object memory usage can evolve when re-used. */ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); @@ -504,7 +507,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is > 1. - * Note : CCtx estimation is only correct for single-threaded compression */ + * Note : CCtx size estimation is only correct for single-threaded compression. */ ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); @@ -516,7 +519,7 @@ ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is set to a value > 1. - * Note : CStream estimation is only correct for single-threaded compression. + * Note : CStream size estimation is only correct for single-threaded compression. * ZSTD_DStream memory budget depends on window Size. * This information can be passed manually, using ZSTD_estimateDStreamSize, * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); @@ -529,84 +532,88 @@ ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_para ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); -typedef enum { - ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ - ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ -} ZSTD_dictLoadMethod_e; - /*! ZSTD_estimate?DictSize() : * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). - * ZSTD_estimateCStreamSize_advanced_usingCParams() makes it possible to control precisely compression parameters, like ZSTD_createCDict_advanced(). - * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. */ ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); - -/*************************************** -* Advanced compression functions -***************************************/ -/*! ZSTD_createCCtx_advanced() : - * Create a ZSTD compression context using external alloc and free functions */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); - -/*! ZSTD_initStaticCCtx() : initialize a fixed-size zstd compression context - * workspace: The memory area to emplace the context into. - * Provided pointer must 8-bytes aligned. - * It must outlive context usage. - * workspaceSize: Use ZSTD_estimateCCtxSize() or ZSTD_estimateCStreamSize() - * to determine how large workspace must be to support scenario. - * @return : pointer to ZSTD_CCtx* (same address as workspace, but different type), - * or NULL if error (typically size too small) - * Note : zstd will never resize nor malloc() when using a static cctx. - * If it needs more memory than available, it will simply error out. +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). * Note 2 : there is no corresponding "free" function. - * Since workspace was allocated externally, it must be freed externally too. - * Limitation 1 : currently not compatible with internal CDict creation, such as - * ZSTD_CCtx_loadDictionary() or ZSTD_initCStream_usingDict(). - * Limitation 2 : currently not compatible with multi-threading + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ -/*! ZSTD_createCDict_byReference() : - * Create a digested dictionary for compression - * Dictionary content is simply referenced, and therefore stays in dictBuffer. - * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); +ZSTDLIB_API ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictMode_e dictMode, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); -typedef enum { ZSTD_dm_auto=0, /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ - ZSTD_dm_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ - ZSTD_dm_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */ -} ZSTD_dictMode_e; -/*! ZSTD_createCDict_advanced() : - * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams, ZSTD_customMem customMem); -/*! ZSTD_initStaticCDict() : - * Generate a digested dictionary in provided memory area. - * workspace: The memory area to emplace the dictionary into. - * Provided pointer must 8-bytes aligned. - * It must outlive dictionary usage. - * workspaceSize: Use ZSTD_estimateCDictSize() - * to determine how large workspace must be. - * cParams : use ZSTD_getCParams() to transform a compression level - * into its relevants cParams. - * @return : pointer to ZSTD_CDict* (same address as workspace, but different type), - * or NULL if error (typically, size too small). - * Note : there is no corresponding "free" function. - * Since workspace was allocated externally, it must be freed externally. - */ -ZSTDLIB_API ZSTD_CDict* ZSTD_initStaticCDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode, - ZSTD_compressionParameters cParams); +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_customMem customMem); + + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is simply referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); /*! ZSTD_getCParams() : * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. @@ -652,28 +659,6 @@ ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, * Note 3 : Skippable Frame Identifiers are considered valid. */ ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); -/*! ZSTD_createDCtx_advanced() : - * Create a ZSTD decompression context using external alloc and free functions */ -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); - -/*! ZSTD_initStaticDCtx() : initialize a fixed-size zstd decompression context - * workspace: The memory area to emplace the context into. - * Provided pointer must 8-bytes aligned. - * It must outlive context usage. - * workspaceSize: Use ZSTD_estimateDCtxSize() or ZSTD_estimateDStreamSize() - * to determine how large workspace must be to support scenario. - * @return : pointer to ZSTD_DCtx* (same address as workspace, but different type), - * or NULL if error (typically size too small) - * Note : zstd will never resize nor malloc() when using a static dctx. - * If it needs more memory than available, it will simply error out. - * Note 2 : static dctx is incompatible with legacy support - * Note 3 : there is no corresponding "free" function. - * Since workspace was allocated externally, it must be freed externally. - * Limitation : currently not compatible with internal DDict creation, - * such as ZSTD_initDStream_usingDict(). - */ -ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); - /*! ZSTD_createDDict_byReference() : * Create a digested dictionary, ready to start decompression operation without startup delay. * Dictionary content is referenced, and therefore stays in dictBuffer. @@ -681,26 +666,6 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize * it must remain read accessible throughout the lifetime of DDict */ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); -/*! ZSTD_createDDict_advanced() : - * Create a ZSTD_DDict using external alloc and free, optionally by reference */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_customMem customMem); - -/*! ZSTD_initStaticDDict() : - * Generate a digested dictionary in provided memory area. - * workspace: The memory area to emplace the dictionary into. - * Provided pointer must 8-bytes aligned. - * It must outlive dictionary usage. - * workspaceSize: Use ZSTD_estimateDDictSize() - * to determine how large workspace must be. - * @return : pointer to ZSTD_DDict*, or NULL if error (size too small) - * Note : there is no corresponding "free" function. - * Since workspace was allocated externally, it must be freed externally. - */ -ZSTDLIB_API ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod); /*! ZSTD_getDictID_fromDict() : * Provides the dictID stored within dictionary. @@ -732,8 +697,6 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); ********************************************************************/ /*===== Advanced Streaming compression functions =====*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */ ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/ ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, @@ -754,8 +717,6 @@ ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledg /*===== Advanced Streaming decompression functions =====*/ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); /* obsolete : this API will be removed in a future version */ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */ @@ -926,8 +887,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); * * This API is intended to replace all others experimental API. * It can basically do all other use cases, and even new ones. - * In constrast with _advanced() variants, it stands a reasonable chance to become "stable", - * after a good testing period. + * It stands a reasonable chance to become "stable", after a good testing period. */ /* note on naming convention : @@ -1172,7 +1132,7 @@ ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, * All parameters are back to default values. * It's possible to modify compression parameters after a reset. */ -ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx); /* Not ready yet ! */ +ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx); /*! ZSTD_compress_generic_simpleArgs() : @@ -1389,7 +1349,7 @@ ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression */ +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ diff --git a/programs/fileio.c b/programs/fileio.c index f9c9087ba..887cbeb37 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -139,7 +139,10 @@ static void INThandler(int sig) #if !defined(_MSC_VER) signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */ #endif - if (g_artefact) remove(g_artefact); + if (g_artefact) { + assert(UTIL_isRegularFile(g_artefact)); + remove(g_artefact); + } DISPLAY("\n"); exit(2); } @@ -861,7 +864,7 @@ static int FIO_compressFilename_srcFile(cRess_t ress, * delete both the source and destination files. */ clearHandler(); - if (remove(srcFileName)) + if (FIO_remove(srcFileName)) EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); } return result; @@ -898,11 +901,13 @@ static int FIO_compressFilename_dstFile(cRess_t ress, DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result=1; } - if (result!=0) { /* remove operation artefact */ - if (remove(dstFileName)) - EXM_THROW(1, "zstd: %s: %s", dstFileName, strerror(errno)); - } - else if (strcmp (dstFileName, stdoutmark) && stat_result) + if ( (result != 0) /* operation failure */ + && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null */ + && strcmp(dstFileName, stdoutmark) ) /* special case : don't remove() stdout */ + FIO_remove(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ + else if ( strcmp(dstFileName, stdoutmark) + && strcmp(dstFileName, nulmark) + && stat_result) UTIL_setFileStat(dstFileName, &statbuf); return result; @@ -1575,7 +1580,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch * delete both the source and destination files. */ clearHandler(); - if (remove(srcFileName)) { + if (FIO_remove(srcFileName)) { /* failed to remove src file */ DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); return 1; @@ -1618,7 +1623,7 @@ static int FIO_decompressDstFile(dRess_t ress, if ( (result != 0) /* operation failure */ && strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null (#316) */ && strcmp(dstFileName, stdoutmark) ) /* special case : don't remove() stdout */ - remove(dstFileName); /* remove decompression artefact; note don't do anything special if remove() fails */ + FIO_remove(dstFileName); /* remove decompression artefact; note don't do anything special if remove() fails */ else { /* operation success */ if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */ && strcmp(dstFileName, nulmark) /* special case : don't chmod /dev/null */ diff --git a/programs/util.h b/programs/util.h index af1fa7fca..4f22236a9 100644 --- a/programs/util.h +++ b/programs/util.h @@ -246,11 +246,17 @@ UTIL_STATIC void UTIL_waitForNextTick(void) #endif +UTIL_STATIC int UTIL_isRegularFile(const char* infilename); + + UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf) { int res = 0; struct utimbuf timebuf; + if (!UTIL_isRegularFile(filename)) + return -1; + timebuf.actime = time(NULL); timebuf.modtime = statbuf->st_mtime; res += utime(filename, &timebuf); /* set access and modification times */ diff --git a/tests/Makefile b/tests/Makefile index 853f4ee89..48ae17538 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -21,7 +21,7 @@ ZSTDDIR = ../lib PRGDIR = ../programs PYTHON ?= python3 -TESTARTEFACT := versionsTest namespaceTest +TESTARTEFACT := versionsTest DEBUGLEVEL ?= 1 DEBUGFLAGS = -g -DZSTD_DEBUG=$(DEBUGLEVEL) @@ -44,6 +44,16 @@ ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) ZBUFF_FILES := $(ZSTDDIR)/deprecated/*.c ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c +ZSTD_F1 := $(wildcard $(ZSTD_FILES)) +ZSTD_OBJ1 := $(subst $(ZSTDDIR)/common/,zstdm_,$(ZSTD_F1)) +ZSTD_OBJ2 := $(subst $(ZSTDDIR)/compress/,zstdc_,$(ZSTD_OBJ1)) +ZSTD_OBJ3 := $(subst $(ZSTDDIR)/decompress/,zstdd_,$(ZSTD_OBJ2)) +ZSTD_OBJECTS := $(ZSTD_OBJ3:.c=.o) + +ZSTDMT_OBJ1 := $(subst $(ZSTDDIR)/common/,zstdmt_m_,$(ZSTD_F1)) +ZSTDMT_OBJ2 := $(subst $(ZSTDDIR)/compress/,zstdmt_c_,$(ZSTDMT_OBJ1)) +ZSTDMT_OBJ3 := $(subst $(ZSTDDIR)/decompress/,zstdmt_d_,$(ZSTDMT_OBJ2)) +ZSTDMT_OBJECTS := $(ZSTDMT_OBJ3:.c=.o) # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) @@ -63,11 +73,12 @@ FUZZERTEST ?= -T200s ZSTDRTTEST = --test-large-data DECODECORPUS_TESTTIME ?= -T30 -.PHONY: default all all32 allnothread dll clean test test32 test-all namespaceTest versionsTest +.PHONY: default all all32 allnothread dll clean test test32 test-all versionsTest default: fullbench + @echo $(ZSTDMT_OBJECTS) -all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus +all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus roundTripCrash all32: fullbench32 fuzzer32 zstreamtest32 @@ -87,35 +98,69 @@ zstd-nolegacy: gzstd: $(MAKE) -C $(PRGDIR) zstd HAVE_ZLIB=1 MOREFLAGS="$(DEBUGFLAGS)" +.PHONY: +zstd-dll : + $(MAKE) -C $(ZSTDDIR) libzstd + +.PHONY: +zstd-staticLib : + $(MAKE) -C $(ZSTDDIR) libzstd.a + +zstdm_%.o : $(ZSTDDIR)/common/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdc_%.o : $(ZSTDDIR)/compress/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdd_%.o : $(ZSTDDIR)/decompress/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdmt%.o : CPPFLAGS += $(MULTITHREAD_CPP) + +zstdmt_m_%.o : $(ZSTDDIR)/common/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdmt_c_%.o : $(ZSTDDIR)/compress/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + +zstdmt_d_%.o : $(ZSTDDIR)/decompress/%.c + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ + fullbench32: CPPFLAGS += -m32 fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD) fullbench fullbench32 : DEBUGFLAGS = # turn off assert() for speed measurements -fullbench fullbench32 : $(ZSTD_FILES) $(PRGDIR)/datagen.c fullbench.c +fullbench fullbench32 : $(ZSTD_FILES) +fullbench fullbench32 : $(PRGDIR)/datagen.c fullbench.c $(CC) $(FLAGS) $^ -o $@$(EXT) -fullbench-lib: $(PRGDIR)/datagen.c fullbench.c - $(MAKE) -C $(ZSTDDIR) libzstd.a - $(CC) $(FLAGS) $^ -o $@$(EXT) $(ZSTDDIR)/libzstd.a +fullbench-lib : zstd-staticLib +fullbench-lib : $(PRGDIR)/datagen.c fullbench.c + $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) $(ZSTDDIR)/libzstd.a +# note : broken : requires unavailable symbols +fullbench-dll : zstd-dll +fullbench-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd fullbench-dll: $(PRGDIR)/datagen.c fullbench.c - $(MAKE) -C $(ZSTDDIR) libzstd - $(CC) $(FLAGS) $^ -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll +# $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll + $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -fuzzer : CPPFLAGS += $(MULTITHREAD_CPP) -fuzzer : LDFLAGS += $(MULTITHREAD_LD) +fuzzer : CPPFLAGS += $(MULTITHREAD_CPP) +fuzzer : LDFLAGS += $(MULTITHREAD_LD) fuzzer32: CFLAGS += -m32 -fuzzer fuzzer32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c fuzzer.c +fuzzer : $(ZSTDMT_OBJECTS) +fuzzer32: $(ZSTD_FILES) +fuzzer fuzzer32 : $(ZDICT_FILES) $(PRGDIR)/datagen.c fuzzer.c $(CC) $(FLAGS) $^ -o $@$(EXT) +fuzzer-dll : zstd-dll fuzzer-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c fuzzer.c - $(MAKE) -C $(ZSTDDIR) libzstd - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) + $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) zbufftest : CPPFLAGS += -I$(ZSTDDIR)/deprecated zbufftest : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings -zbufftest : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/datagen.c zbufftest.c +zbufftest : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/datagen.c zbufftest.c $(CC) $(FLAGS) $^ -o $@$(EXT) zbufftest32 : CPPFLAGS += -I$(ZSTDDIR)/deprecated @@ -123,18 +168,22 @@ zbufftest32 : CFLAGS += -Wno-deprecated-declarations -m32 zbufftest32 : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/datagen.c zbufftest.c $(CC) $(FLAGS) $^ -o $@$(EXT) +zbufftest-dll : zstd-dll zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated zbufftest-dll : CFLAGS += -Wno-deprecated-declarations # required to silence deprecation warnings zbufftest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c - $(MAKE) -C $(ZSTDDIR) libzstd - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) + $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) -ZSTREAMFILES := $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c seqgen.c zstreamtest.c +ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c seqgen.c zstreamtest.c +ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES) +ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES) zstreamtest32 : CFLAGS += -m32 zstreamtest zstreamtest32 : CPPFLAGS += $(MULTITHREAD_CPP) zstreamtest zstreamtest32 : LDFLAGS += $(MULTITHREAD_LD) -zstreamtest zstreamtest32 : $(ZSTREAMFILES) +zstreamtest : $(ZSTDMT_OBJECTS) $(ZSTREAM_PROPER_FILES) +zstreamtest32 : $(ZSTREAMFILES) +zstreamtest zstreamtest32 : $(CC) $(FLAGS) $^ -o $@$(EXT) zstreamtest_asan : CFLAGS += -fsanitize=address @@ -145,51 +194,47 @@ zstreamtest_tsan : CFLAGS += -fsanitize=thread zstreamtest_tsan : $(ZSTREAMFILES) $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) +zstreamtest-dll : zstd-dll zstreamtest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd -zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zstreamtest.c - $(MAKE) -C $(ZSTDDIR) libzstd - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) +zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c # xxh symbols not exposed from dll +zstreamtest-dll : $(ZSTREAM_LOCAL_FILES) + $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) -paramgrill : DEBUGFLAGS = +paramgrill : DEBUGFLAGS = # turn off assert() for speed measurements paramgrill : $(ZSTD_FILES) $(PRGDIR)/datagen.c paramgrill.c - $(CC) $(FLAGS) $^ -lm -o $@$(EXT) + $(CC) $(FLAGS) $^ -lm -o $@$(EXT) datagen : $(PRGDIR)/datagen.c datagencli.c - $(CC) $(FLAGS) $^ -o $@$(EXT) + $(CC) $(FLAGS) $^ -o $@$(EXT) -roundTripCrash : $(ZSTD_FILES) roundTripCrash.c - $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) +roundTripCrash : $(ZSTD_OBJECTS) roundTripCrash.c + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) -longmatch : $(ZSTD_FILES) longmatch.c - $(CC) $(FLAGS) $^ -o $@$(EXT) +longmatch : $(ZSTD_OBJECTS) longmatch.c + $(CC) $(FLAGS) $^ -o $@$(EXT) -invalidDictionaries : $(ZSTD_FILES) invalidDictionaries.c - $(CC) $(FLAGS) $^ -o $@$(EXT) +invalidDictionaries : $(ZSTD_OBJECTS) invalidDictionaries.c + $(CC) $(FLAGS) $^ -o $@$(EXT) legacy : CFLAGS+= -DZSTD_LEGACY_SUPPORT=4 legacy : CPPFLAGS+= -I$(ZSTDDIR)/legacy legacy : $(ZSTD_FILES) $(wildcard $(ZSTDDIR)/legacy/*.c) legacy.c - $(CC) $(FLAGS) $^ -o $@$(EXT) + $(CC) $(FLAGS) $^ -o $@$(EXT) -decodecorpus : $(filter-out $(ZSTDDIR)/compress/zstd_compress.c, $(wildcard $(ZSTD_FILES))) $(ZDICT_FILES) decodecorpus.c - $(CC) $(FLAGS) $^ -o $@$(EXT) -lm +decodecorpus : $(filter-out zstdc_zstd_compress.o, $(ZSTD_OBJECTS)) $(ZDICT_FILES) decodecorpus.c + $(CC) $(FLAGS) $^ -o $@$(EXT) -lm -symbols : symbols.c - $(MAKE) -C $(ZSTDDIR) libzstd +symbols : symbols.c zstd-dll ifneq (,$(filter Windows%,$(OS))) cp $(ZSTDDIR)/dll/libzstd.dll . - $(CC) $(FLAGS) $^ -o $@$(EXT) -DZSTD_DLL_IMPORT=1 libzstd.dll + $(CC) $(FLAGS) $< -o $@$(EXT) -DZSTD_DLL_IMPORT=1 libzstd.dll else - $(CC) $(FLAGS) $^ -o $@$(EXT) -Wl,-rpath=$(ZSTDDIR) $(ZSTDDIR)/libzstd.so + $(CC) $(FLAGS) $< -o $@$(EXT) -Wl,-rpath=$(ZSTDDIR) $(ZSTDDIR)/libzstd.so # broken on Mac endif poolTests : poolTests.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c $(ZSTDDIR)/common/zstd_common.c $(ZSTDDIR)/common/error_private.c $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) -namespaceTest: - if $(CC) namespaceTest.c ../lib/common/xxhash.c -o $@ ; then echo compilation should fail; exit 1 ; fi - $(RM) $@ - versionsTest: clean $(PYTHON) test-zstd-versions.py diff --git a/tests/fuzzer.c b/tests/fuzzer.c index c599a15de..3206d6e11 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -65,11 +65,18 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ if (g_displayLevel>=4) fflush(stdout); } } -/*-******************************************************* -* Fuzzer functions -*********************************************************/ + #undef MIN #undef MAX +void FUZ_bug976(void) +{ /* these constants shall not depend on MIN() macro */ + assert(ZSTD_HASHLOG_MAX < 31); + assert(ZSTD_CHAINLOG_MAX < 31); +} + +/*-******************************************************* +* Internal functions +*********************************************************/ #define MIN(a,b) ((a)<(b)?(a):(b)) #define MAX(a,b) ((a)>(b)?(a):(b)) @@ -658,12 +665,13 @@ static int basicUnitTests(U32 seed, double compressibility) /* Dictionary and dictBuilder tests */ { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - size_t dictSize = 16 KB; - void* dictBuffer = malloc(dictSize); + size_t const dictBufferCapacity = 16 KB; + void* dictBuffer = malloc(dictBufferCapacity); size_t const totalSampleSize = 1 MB; size_t const sampleUnitSize = 8 KB; U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); + size_t dictSize; U32 dictID; if (dictBuffer==NULL || samplesSizes==NULL) { @@ -672,9 +680,19 @@ static int basicUnitTests(U32 seed, double compressibility) goto _output_error; } + DISPLAYLEVEL(3, "test%3i : dictBuilder on cyclic data : ", testNb++); + assert(compressedBufferSize >= totalSampleSize); + { U32 u; for (u=0; u= cSize) break; /* add noise */ { U32 const nbBitsCodes = FUZ_rand(&lseed) % maxNbBits; U32 const nbBits = nbBitsCodes ? nbBitsCodes-1 : 0; diff --git a/tests/namespaceTest.c b/tests/namespaceTest.c deleted file mode 100644 index 5b7095f67..000000000 --- a/tests/namespaceTest.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - - -#include /* size_t */ -#include /* strlen */ - -/* symbol definition */ -extern unsigned XXH32(const void* src, size_t srcSize, unsigned seed); - -int main(int argc, const char** argv) -{ - const char* exename = argv[0]; - unsigned result = XXH32(exename, strlen(exename), argc); - return !result; -}