1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-30 22:23:13 +03:00

updated Advanced API proposal

only declarations in zstd.h
This commit is contained in:
Yann Collet
2017-05-12 12:36:11 -07:00
parent bd1964a988
commit 33eb7ac6b6
2 changed files with 357 additions and 0 deletions

View File

@ -471,6 +471,165 @@ size_t ZSTD_estimateDDictSize(size_t dictSize);
</b><p> Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters
</p></pre><BR>
<h3>New experimental advanced parameters API</h3><pre></pre><b><pre></pre></b><BR>
<pre><b>typedef enum {
</b>/* compression parameters */<b>
ZSTD_p_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table (default:3) */<b>
ZSTD_p_windowLog, </b>/* Maximum allowed back-reference distance, expressed as power of 2.<b>
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
* default value : set through compressionLevel */
ZSTD_p_hashLog, </b>/* Size of the probe table, as a power of 2.<b>
* Resulting table size is (1 << (hashLog+2)).
* Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
* Larger tables improve compression ratio of strategies <= dFast,
* and improve speed of strategies > dFast */
ZSTD_p_chainLog, </b>/* Size of the full-search table, as a power of 2.<b>
* Resulting table size is (1 << (chainLog+2)).
* Larger tables result in better and slower compression.
* This parameter is useless when using "fast" strategy */
ZSTD_p_searchLog, </b>/* Number of search attempts, as a power of 2.<b>
* More attempts result in better and slower compression.
* This parameter is useless when using "fast" and "dFast" strategies */
ZSTD_p_minMatchLength, </b>/* Minimum match size (except for repeat-matches, which limit is hard-coded).<b>
* Larger values make compression and decompression faster, but decrease compression ratio
* Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
* Note that currently, for all strategies < btopt, effective minimum is 4.
* Note that currently, for all strategies > fast, effective maximum is 6. */
ZSTD_p_targetLength, </b>/* Only useful for strategies >= btopt.<b>
* Length of Match considered "good enough" to stop search.
* Larger values make compression stronger and slower. */
ZSTD_p_compressionStrategy, </b>/* See ZSTD_strategy enum definition.<b>
* Cast selected strategy into unsigned for ZSTD_CCtx_setParameter() compatibility.
* The higher the value of selected strategy, the more complex it is,
* resulting in stronger and slower compression */
#if 0
ZSTD_p_windowSize, </b>/* Maximum allowed back-reference distance.<b>
* Can be set to a more precise value than windowLog.
* Will be transparently reduced to closest possible inferior value
* (see Zstandard compression format) */
</b>/* Not ready yet ! */<b>
#endif
</b>/* frame parameters */<b>
ZSTD_p_contentSizeFlag=200, </b>/* Content size is written into frame header _whenever known_ (default:1) */<b>
ZSTD_p_contentChecksumFlag, </b>/* A 32-bits content checksum is calculated and written at end of frame (default:0) */<b>
ZSTD_p_dictIDFlag, </b>/* When applicable, dictID of dictionary is provided in frame header (default:1) */<b>
</b>/* dictionary parameters */<b>
ZSTD_p_refDictContent=300, </b>/* Content of dictionary content will be referenced, instead of copied (default:0).<b>
* This avoids duplicating dictionary content.
* But it also requires that dictionary buffer outlives its user (CCtx or CDict) */
</b>/* Not ready yet ! */<b>
ZSTD_p_rawContentDict, </b>/* load dictionary in "content-only" mode (no header analysis) (default:0) */<b>
</b>/* question : should there be an option to load dictionary only in zstd format, rejecting others with an error code ? */<b>
#if 0
</b>/* multi-threading parameters (not ready yet !) */<b>
ZSTD_p_nbThreads=400, </b>/* Select how many threads a compression job can spawn (default:1)<b>
* More threads improve speed, but increases also memory usage */
ZSTDMT_p_jobSize, </b>/* Size of a compression job. Each job is compressed in parallel.<b>
* 0 means default, which is dynamically determined based on compression parameters.
* Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
* The minimum size is automatically and transparently enforced */
ZSTDMT_p_overlapSizeLog, </b>/* Size of previous input reloaded at the beginning of each job.<b>
* 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
#endif
</b>/* advanced parameters - may not remain available after API update */<b>
ZSTD_p_forceMaxWindow=1100, </b>/* Force back-references to remain < windowSize,<b>
* even when referencing into Dictionary content
* default : 0 when using a CDict, 1 when using a Prefix */
} ZSTD_cParameter;
</b></pre><BR>
<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
</b><p> Set one compression parameter, selected by enum ZSTD_cParameter.
@result : 0, or an error code (which can be tested with ZSTD_isError())
</p></pre><BR>
<pre><b>size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
</b><p> Total input data size to be compressed into a single frame.
This value will be controlled at the end, and result in error if not respected.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
Note 1 : 0 means zero, empty.
In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
Note that ZSTD_CONTENTSIZE_UNKNOWN is default value for all new compression jobs.
Note 2 : If all data is provided and consumed in a single round,
this value is overriden by srcSize instead.
</p></pre><BR>
<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); </b>/* Not ready yet ! */<b>
</b><p> Reference a prefix (content-only dictionary) to bootstrap next compression job.
Decompression will have to use same prefix.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
Special : Adding a NULL (or 0-size) dictionary invalidates any previous prefix, meaning "return to no-dictionary mode".
Note 1 : Prefix content is referenced. It must outlive compression job.
Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
For this reason, compression parameters cannot be changed anymore after loading a prefix.
It's also a CPU-heavy operation, with non-negligible impact on latency.
Note 3 : Prefix is only used once. Tables are discarded at end of compression job.
If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict
</p></pre><BR>
<pre><b>ZSTD_CDict* ZSTD_CDict_createEmpty(void); </b>/* Not ready yet ! */<b>
size_t ZSTD_CDict_setParameter(ZSTD_CDict* cdict, ZSTD_cParameter param, unsigned value); </b>/* Not ready yet ! */<b>
size_t ZSTD_CDict_loadDictionary(ZSTD_CDict* cdict, const void* dict, size_t dictSize); </b>/* Not ready yet ! */<b>
</b><p> Create a CDict object which is still mutable after creation.
It allows usage of ZSTD_CDict_setParameter().
Once all compression parameters are selected,
it's possible to load the target dictionary, using ZSTD_CDict_loadDictionary().
Dictionary content will be copied internally, except if ZSTD_p_refDictContent is used.
After loading the dictionary, no more change is possible.
The only remaining operation is to free CDict object.
Note : An unfinished CDict behaves the same as a NULL CDict when referenced into a CCtx.
</p></pre><BR>
<pre><b>size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/* Not ready yet ! */<b>
</b><p> Add a prepared dictionary to cctx, it will used for next compression jobs.
Note that compression parameters will be enforced from within CDict.
Currently, they supercede any compression parameter previously set within CCtx.
The dictionary will remain valid for all future compression jobs performed using the same cctx.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
Special : adding a NULL CDict means "return to no-dictionary mode".
Note 1 : Currently, only one dictionary can be managed.
Adding a new dictionary effectively "discards" any previous one.
Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
</p></pre><BR>
<pre><b>typedef enum {
ZSTD_e_continue, </b>/* continue sending data, encoder transparently decides when to output result, depending on optimal conditions */<b>
ZSTD_e_flush, </b>/* flush any data provided and buffered so far - frame will continue, future data can still reference previous data for better compression */<b>
ZSTD_e_end </b>/* flush any remaining data and ends current frame. Any future compression starts a new frame. */<b>
} ZSTD_EndDirective;
</b></pre><BR>
<pre><b>size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos,
ZSTD_EndDirective endOp);
</b><p> Behave about the same as ZSTD_compressStream. To note :
- Compression parameters are pushed into CCtx before starting compression, using ZSTD_setCCtxParameter()
- Compression parameters cannot be changed once compression is started.
- *dstPos must be <= dstCapacity, *srcPos must be <= srcSize
- *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit.
- @return provides the amount of data ready to flush and still within internal buffers
or an error code, which can be tested using ZSTD_isError().
if @return != 0, flush is not fully completed, so it must be called again to empty internal buffers.
- after a ZSTD_e_end directive, if internal buffer is not fully flushed,
only ZSTD_e_end and ZSTD_e_flush operations are allowed.
It is necessary to fully flush internal buffers
before changing compression parameters or start a new compression job.
</p></pre><BR>
<pre><b>size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
</b><p> Return a CCtx to clean state.
Useful after an error, or to interrupt an ongoing compression job and start a new one.
It's allowed to change compression parameters after a reset.
Any internal data not yet flushed is cancelled.
</p></pre><BR>
<a name="Chapter15"></a><h2>Advanced decompression functions</h2><pre></pre>
<pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);