From 7757577341bf9bbefeb3686a9127c32bb1fc3d3c Mon Sep 17 00:00:00 2001
From: Yann Collet `compressedSize` : must be the _exact_ size of a single compressed frame.
+ `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
`dstCapacity` is an upper bound of originalSize.
If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
@return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
@@ -71,7 +73,16 @@
'src' is the start of a zstd compressed frame.
+ NOTE: This function is planned to be obsolete, in favour of ZSTD_getFrameContentSize.
+ ZSTD_getFrameContentSize functions the same way, returning the decompressed size of a single
+ frame, but distinguishes empty frames from frames with an unknown size, or errors.
+
+ Additionally, ZSTD_findDecompressedSize can be used instead. It can handle multiple
+ concatenated frames in one buffer, and so is more general.
+ As a result however, it requires more computation and entire frames to be passed to it,
+ as opposed to ZSTD_getFrameContentSize which requires only a single frame's header.
+
+ 'src' is the start of a zstd compressed frame.
@return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
When `return==0`, data to decompress could be any size.
@@ -88,21 +99,29 @@
note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more.
Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()).
Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()).
When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only.
- `dict` can be released after ZSTD_CDict creation.
+ `dictBuffer` can be released after ZSTD_CDict creation, as its content is copied within CDict
zstd 1.1.2 Manual
+zstd 1.1.4 Manual
Contents
@@ -19,13 +19,15 @@
Introduction
@@ -63,7 +65,7 @@
size_t ZSTD_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
-
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
-
-Helper functions
int ZSTD_maxCLevel(void); /*!< maximum compression level available */
+
Helper functions
int ZSTD_maxCLevel(void);
/*!< maximum compression level available */
size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */
-
+
Explicit memory management
+Compression context
When compressing many times,
+ it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ This will make workload friendlier for system's memory.
+ Use one context per thread for parallel execution in multi-threaded environments.
+
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTD_CCtx* ZSTD_createCCtx(void);
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+
size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
-Decompression context
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+
Decompression context
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
ZSTD_DCtx* ZSTD_createDCtx(void);
size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
-
+
size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
@@ -131,11 +150,11 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
Fast dictionary API
-ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+
ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
size_t ZSTD_freeCDict(ZSTD_CDict* CDict);
@@ -151,9 +170,9 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
Note that compression level is decided during dictionary creation.
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize); +ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);Create a digested dictionary, ready to start decompression operation without startup delay. - `dict` can be released after creation. + dictBuffer can be released after DDict creation, as its content is copied inside DDict
size_t ZSTD_freeDDict(ZSTD_DDict* ddict); @@ -271,9 +290,9 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); } ZSTD_compressionParameters;
typedef struct { - unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */ - unsigned checksumFlag; /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */ - unsigned noDictIDFlag; /**< 1: no dict ID will be saved into frame header (if dictionary compression) */ + unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */ + unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */ } ZSTD_frameParameters;
typedef struct { @@ -281,11 +300,56 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); ZSTD_frameParameters fParams; } ZSTD_parameters;
-Custom memory allocation functions
typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +Custom memory allocation functions
typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); typedef void (*ZSTD_freeFunction) (void* opaque, void* address); typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; -
-Advanced compression functions
+
size_t ZSTD_getFrameCompressedSize(const void* src, size_t srcSize); +`src` should point to the start of a ZSTD encoded frame + `srcSize` must be at least as large as the frame + @return : the compressed size of the frame pointed to by `src`, suitable to pass to + `ZSTD_decompress` or similar, or an error code if given invalid input. +
unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); +`src` should point to the start of a ZSTD encoded frame + `srcSize` must be at least as large as the frame header. A value greater than or equal + to `ZSTD_frameHeaderSize_max` is guaranteed to be large enough in all cases. + @return : decompressed size of the frame pointed to be `src` if known, otherwise + - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + - ZSTD_CONTENTSIZE_ERROR if an error occured (e.g. invalid magic number, srcSize too small) +
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); +`src` should point the start of a series of ZSTD encoded and/or skippable frames + `srcSize` must be the _exact_ size of this series + (i.e. there should be a frame boundary exactly `srcSize` bytes after `src`) + @return : the decompressed size of all data in the contained frames, as a 64-bit value _if known_ + - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + - if an error occurred: ZSTD_CONTENTSIZE_ERROR + + note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + In which case, it's necessary to use streaming mode to decompress data. + Optionally, application can still use ZSTD_decompress() while relying on implied limits. + (For example, data may be necessarily cut into blocks <= 16 KB). + note 2 : decompressed size is always present when compression is done with ZSTD_compress() + note 3 : decompressed size can be very large (64-bits value), + potentially larger than what local system can handle as a single memory segment. + In which case, it's necessary to use streaming mode to decompress data. + note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + Always ensure result fits within application's authorized limits. + Each application can set its own limits. + note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + read each contained frame header. This is efficient as most of the data is skipped, + however it does mean that all frame data must be present and valid. +
size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams);Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters. @@ -300,7 +364,22 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
Gives the amount of memory used by a given ZSTD_CCtx
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, +typedef enum { + ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/ +} ZSTD_CCtxParameter; +
+size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); +Set advanced parameters, selected through enum ZSTD_CCtxParameter + @result : 0, or an error code (which can be tested with ZSTD_isError()) +
+ +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); +Create a digested dictionary for compression + Dictionary content is simply referenced, and therefore stays in dictBuffer. + It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict +
+ +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_parameters params, ZSTD_customMem customMem);Create a ZSTD_CDict using external alloc and free, and customized compression parameters
@@ -336,7 +415,7 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; vSame as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter
unsigned ZSTD_isFrame(const void* buffer, size_t size);Tells if the content of `buffer` starts with a valid Frame Identifier. @@ -357,6 +436,12 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
Gives the amount of memory used by a given ZSTD_DCtx
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); +Create a digested dictionary, ready to start decompression operation without startup delay. + Dictionary content is simply referenced, and therefore stays in dictBuffer. + It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict +
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);Gives the amount of memory used by a given ZSTD_DDict
ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */ -size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); +Advanced Streaming compression functions
ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);/**< pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */ +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ + ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); -
typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e; +
typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); -size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);/**< note: a dict will not be used if dict == NULL or dictSize < 8 */ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */ size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); -
+
This is an advanced API, giving full control over buffer management, for users which need direct control over memory. But it's also a complex one, with many restrictions (documented below). Prefer using normal streaming API for an easier experience-
+Buffer-less streaming compression (synchronous mode)
A ZSTD_CCtx object is required to track streaming operations. Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. ZSTD_CCtx object can be re-used multiple times within successive compression operations. @@ -434,20 +519,21 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); In which case, it will "discard" the relevant memory section from its history. Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame, - Without last block mark, frames will be considered unfinished (broken) by decoders. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames will be considered unfinished (corrupted) by decoders. - You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame. + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame.-Buffer-less streaming compression functions
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +Buffer-less streaming compression functions
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); -size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);/**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -
-Buffer-less streaming decompression (synchronous mode)
+
+Buffer-less streaming decompression (synchronous mode)
A ZSTD_DCtx object is required to track streaming operations. Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. A ZSTD_DCtx object can be re-used multiple times. @@ -490,7 +576,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const vo Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). This information is not required to properly decode a frame. - == Special case : skippable frames == + == Special case : skippable frames Skippable frames allow integration of user-defined data into a flow of concatenated frames. Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows : @@ -509,7 +595,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const vo unsigned checksumFlag; } ZSTD_frameParams;
-Buffer-less streaming decompression functions
size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */ +Buffer-less streaming decompression functions
size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);/**< doesn't consume input, see details below */ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); @@ -517,8 +603,8 @@ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -
-Block functions
+
+Block functions
Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). User will have to take in charge required information to regenerate data, such as compressed and content sizes. @@ -542,10 +628,10 @@ ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); Use ZSTD_insertBlock() in such a case.-Raw zstd block functions
size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); +Raw zstd block functions
size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);/**< insert block into `dctx` history. Useful for uncompressed blocks */ -
+