diff --git a/NEWS b/NEWS index d60b363a4..f99b0376b 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,7 @@ v0.4.6 fix : fast compression mode on Windows Improved : high compression mode on repetitive data +Added : ZSTD_duplicateCCtx() v0.4.5 new : -m/--multiple : compress/decompress multiple files diff --git a/lib/Makefile b/lib/Makefile index a4a156d85..a7172b6f8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -46,7 +46,7 @@ DESTDIR?= PREFIX ?= /usr/local CPPFLAGS= -I. CFLAGS ?= -O3 -CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes +CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes -Wstrict-aliasing=1 FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS) LIBDIR ?= $(PREFIX)/lib diff --git a/lib/zstd.h b/lib/zstd.h index bba096144..e201b5e0b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -107,16 +107,24 @@ ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /** provides error co /* ************************************* * Advanced functions ***************************************/ +/** Compression context management */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; /* incomplete type */ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); -/** -ZSTD_compressCCtx() : - Same as ZSTD_compress(), but requires a ZSTD_CCtx working space already allocated -*/ +/** ZSTD_compressCCtx() : + Same as ZSTD_compress(), but requires an already allocated ZSTD_CCtx (see ZSTD_createCCtx()) */ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel); +/** Decompression context management */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/** ZSTD_decompressDCtx +* Same as ZSTD_decompress(), but requires an already allocated ZSTD_DCtx (see ZSTD_createDCtx()) */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); + #if defined (__cplusplus) } diff --git a/lib/zstd_buffered.c b/lib/zstd_buffered.c index 99148c297..8a7fa1d11 100644 --- a/lib/zstd_buffered.c +++ b/lib/zstd_buffered.c @@ -143,7 +143,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, ZSTD_parameters params) if (zbc->outBuff == NULL) return ERROR(memory_allocation); } - zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, zbc->outBuff, zbc->outBuffSize, params); + zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, params); if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize; zbc->inToCompress = 0; diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 5db0fdf5d..4044ac741 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -65,8 +65,9 @@ /* ************************************* * Constants ***************************************/ -unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +ZSTDLIB_API unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } static const U32 g_searchStrength = 8; +static const size_t g_hbSize = (((ZSTD_frameHeaderSize_max+15)/8)*8); /* ************************************* @@ -109,10 +110,14 @@ struct ZSTD_CCtx_s U32 dictLimit; /* below that point, need extDict */ U32 lowLimit; /* below that point, no more data */ U32 nextToUpdate; /* index from which to continue dictionary update */ + U32 stage; ZSTD_parameters params; void* workSpace; size_t workSpaceSize; size_t blockSize; + void* headerBuffer; + size_t hbSize; + seqStore_t seqStore; /* sequences storage ptrs */ U32* hashTable; @@ -205,6 +210,8 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); + zc->headerBuffer = (char*)zc->workSpace + zc->workSpaceSize - g_hbSize; + zc->hbSize = 0; return 0; } @@ -790,7 +797,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, const U32 current = (U32)(ip-base); hashTable[h] = current; /* update hash table */ - if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) /* note : by construction, offset_1 <= (ip-base) */ + if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) /* note : by construction, offset_1 <= current */ { mlCode = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend); ip++; @@ -819,7 +826,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, if (ip <= ilimit) { /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 without ip <= ilimit check*/ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) @@ -1852,6 +1859,17 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc, const void* src, size_t srcSize) { const BYTE* const ip = (const BYTE*) src; + size_t hbSize = 0; + + if (zc->stage==0) + { + hbSize = zc->hbSize; + if (dstSize <= hbSize) return ERROR(dstSize_tooSmall); + zc->stage = 1; + memcpy(dst, zc->headerBuffer, hbSize); + dstSize -= hbSize; + dst = (char*)dst + hbSize; + } /* Check if blocks follow each other */ if (src != zc->nextSrc) @@ -1890,8 +1908,11 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc, } zc->nextSrc = ip + srcSize; - - return ZSTD_compress_generic (zc, dst, dstSize, src, srcSize); + { + size_t cSize = ZSTD_compress_generic (zc, dst, dstSize, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + return cSize + hbSize; + } } size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -1934,24 +1955,49 @@ size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* src, size_t src } +/*! ZSTD_duplicateCCtx +* Duplicate an existing context @srcCCtx into another one @dstCCtx. +* Only works during stage 0 (i.e. before first call to ZSTD_compressContinue()) +* @return : 0, or an error code */ +size_t ZSTD_duplicateCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) +{ + void* dstWorkSpace = dstCCtx->workSpace; + size_t dstWorkSpaceSize = dstCCtx->workSpaceSize; + + if (dstWorkSpaceSize < srcCCtx->workSpaceSize) + { + free(dstCCtx->workSpace); + dstWorkSpaceSize = srcCCtx->workSpaceSize; + dstWorkSpace = malloc(dstWorkSpaceSize); + if (dstWorkSpace==NULL) return ERROR(memory_allocation); + } + + memcpy(dstCCtx, srcCCtx, sizeof(*dstCCtx)); + dstCCtx->workSpace = dstWorkSpace; + dstCCtx->workSpaceSize = dstWorkSpaceSize; + + return 0; +} + + /*! ZSTD_compressBegin_advanced -* Write frame header, according to params -* @return : nb of bytes written */ +* @return : 0, or an error code */ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, ZSTD_parameters params) { size_t errorCode; ZSTD_validateParams(¶ms); - if (maxDstSize < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); errorCode = ZSTD_resetCCtx_advanced(ctx, params); if (ZSTD_isError(errorCode)) return errorCode; - MEM_writeLE32(dst, ZSTD_MAGICNUMBER); /* Write Header */ - ((BYTE*)dst)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN); - return ZSTD_frameHeaderSize_min; + MEM_writeLE32(ctx->headerBuffer, ZSTD_MAGICNUMBER); /* Write Header */ + ((BYTE*)ctx->headerBuffer)[4] = (BYTE)(params.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN); + ctx->hbSize = ZSTD_frameHeaderSize_min; + ctx->stage = 0; + + return 0; } @@ -1970,29 +2016,38 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint) } -size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int compressionLevel) +size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, int compressionLevel) { - return ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_getParams(compressionLevel, 0)); + return ZSTD_compressBegin_advanced(ctx, ZSTD_getParams(compressionLevel, 0)); } /*! ZSTD_compressEnd * Write frame epilogue * @return : nb of bytes written into dst (or an error code) */ -size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize) +size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t maxDstSize) { BYTE* op = (BYTE*)dst; + size_t hbSize = 0; - /* Sanity check */ - (void)ctx; + /* empty frame */ + if (zc->stage==0) + { + hbSize = zc->hbSize; + if (maxDstSize <= hbSize) return ERROR(dstSize_tooSmall); + zc->stage = 1; + memcpy(dst, zc->headerBuffer, hbSize); + maxDstSize -= hbSize; + op += hbSize; + } + + /* frame epilogue */ if (maxDstSize < 3) return ERROR(dstSize_tooSmall); - - /* End of frame */ op[0] = (BYTE)(bt_end << 6); op[1] = 0; op[2] = 0; - return 3; + return 3+hbSize; } size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, @@ -2006,10 +2061,8 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, size_t oSize; /* Header */ - oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, params); + oSize = ZSTD_compressBegin_advanced(ctx, params); if(ZSTD_isError(oSize)) return oSize; - op += oSize; - maxDstSize -= oSize; /* dictionary */ if (dict) @@ -2048,7 +2101,7 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi ZSTD_CCtx ctxBody; memset(&ctxBody, 0, sizeof(ctxBody)); result = ZSTD_compressCCtx(&ctxBody, dst, maxDstSize, src, srcSize, compressionLevel); - free(ctxBody.workSpace); /* can't free ctxBody, since it's on stack; free heap content */ + free(ctxBody.workSpace); /* can't free ctxBody, since it's on stack; just free heap content */ return result; } diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 32ef67eec..62bd93577 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -33,9 +33,9 @@ #ifndef ZSTD_STATIC_H #define ZSTD_STATIC_H -/* The objects defined into this file should be considered experimental. - * They are not labelled stable, as their prototype may change in the future. - * You can use them for tests, provide feedback, or if you can endure risk of future changes. +/* The objects defined into this file shall be considered experimental. + * They are not considered stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risks of future changes. */ #if defined (__cplusplus) @@ -108,40 +108,33 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, const void* dict,size_t dictSize, ZSTD_parameters params); -/** Decompression context management */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); -ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); - -/** ZSTD_decompressDCtx -* Same as ZSTD_decompress, with pre-allocated DCtx structure */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); - /** ZSTD_decompress_usingDict * Same as ZSTD_decompressDCtx, using a Dictionary content as prefix * Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - const void* dict, size_t dictSize); + const void* src, size_t srcSize, + const void* dict,size_t dictSize); /* ************************************** * Streaming functions (direct mode) ****************************************/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, ZSTD_parameters params); +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, ZSTD_parameters params); + ZSTDLIB_API size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_duplicateCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx); ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize); /** - Streaming compression, direct mode (bufferless) + Streaming compression, synchronous mode (bufferless) A ZSTD_CCtx object is required to track streaming operations. Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it. - A ZSTD_CCtx object can be re-used multiple times. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. First operation is to start a new frame. Use ZSTD_compressBegin(). @@ -151,8 +144,13 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz Note that dictionary presence is a "hidden" information, the decoder needs to be aware that it is required for proper decoding, or decoding will fail. + If you want to compress multiple messages using same dictionary, + it can be beneficial to duplicate compression context rather than reloading dictionary each time. + In such case, use ZSTD_duplicateCCtx(), which will need an already created ZSTD_CCtx, + in order to duplicate compression context into it. + Then, consume your input using ZSTD_compressContinue(). - The interface is synchronous, so all input will be consumed. + The interface is synchronous, so all input will be consumed and produce a compressed output. You must ensure there is enough space in destination buffer to store compressed data under worst case scenario. Worst case evaluation is provided by ZSTD_compressBound(). diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 025fc5d6a..801ae75bb 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -268,11 +268,13 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit U32 result = 0; U32 testNb = 0; U32 coreSeed = seed, lseed = 0; + ZSTD_CCtx* refCtx; ZSTD_CCtx* ctx; ZSTD_DCtx* dctx; U32 startTime = FUZ_GetMilliStart(); /* allocation */ + refCtx = ZSTD_createCCtx(); ctx = ZSTD_createCCtx(); dctx= ZSTD_createDCtx(); cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize); @@ -284,7 +286,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit mirrorBuffer = (BYTE*)malloc (dstBufferSize); cBuffer = (BYTE*)malloc (cBufferSize); CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] - || !dstBuffer || !mirrorBuffer || !cBuffer || !ctx || !dctx, + || !dstBuffer || !mirrorBuffer || !cBuffer || !refCtx || !ctx || !dctx, "Not enough memory, fuzzer tests cancelled"); /* Create initial samples */ @@ -461,10 +463,13 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit dict = srcBuffer + sampleStart; dictSize = sampleSize; - cSize = ZSTD_compressBegin(ctx, cBuffer, cBufferSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); - errorCode = ZSTD_compress_insertDictionary(ctx, dict, dictSize); + errorCode = ZSTD_compressBegin(refCtx, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1); + CHECK (ZSTD_isError(errorCode), "start streaming error : %s", ZSTD_getErrorName(errorCode)); + errorCode = ZSTD_compress_insertDictionary(refCtx, dict, dictSize); CHECK (ZSTD_isError(errorCode), "dictionary insertion error : %s", ZSTD_getErrorName(errorCode)); - totalTestSize = 0; + errorCode = ZSTD_duplicateCCtx(ctx, refCtx); + CHECK (ZSTD_isError(errorCode), "context duplication error : %s", ZSTD_getErrorName(errorCode)); + totalTestSize = 0; cSize = 0; for (n=0; n