updated Advanced API proposal

only declarations in zstd.h
2025-07-30 22:23:13 +03:00 · 2017-05-12 12:36:11 -07:00
parent bd1964a988
commit 33eb7ac6b6
2 changed files with 357 additions and 0 deletions
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@ -471,6 +471,165 @@ size_t ZSTD_estimateDDictSize(size_t dictSize);
 </b><p>   Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters 
 </p></pre><BR>

+<h3>New experimental advanced parameters API</h3><pre></pre><b><pre></pre></b><BR>
+<pre><b>typedef enum {
+    </b>/* compression parameters */<b>
+    ZSTD_p_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table (default:3) */<b>
+    ZSTD_p_windowLog,        </b>/* Maximum allowed back-reference distance, expressed as power of 2.<b>
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * default value : set through compressionLevel */
+    ZSTD_p_hashLog,          </b>/* Size of the probe table, as a power of 2.<b>
+                              * Resulting table size is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast */
+    ZSTD_p_chainLog,         </b>/* Size of the full-search table, as a power of 2.<b>
+                              * Resulting table size is (1 << (chainLog+2)).
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless when using "fast" strategy */
+    ZSTD_p_searchLog,        </b>/* Number of search attempts, as a power of 2.<b>
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless when using "fast" and "dFast" strategies */
+    ZSTD_p_minMatchLength,   </b>/* Minimum match size (except for repeat-matches, which limit is hard-coded).<b>
+                              * Larger values make compression and decompression faster, but decrease compression ratio
+                              * Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              * Note that currently, for all strategies > fast, effective maximum is 6. */
+    ZSTD_p_targetLength,     </b>/* Only useful for strategies >= btopt.<b>
+                              * Length of Match considered "good enough" to stop search.
+                              * Larger values make compression stronger and slower. */
+    ZSTD_p_compressionStrategy, </b>/* See ZSTD_strategy enum definition.<b>
+                              * Cast selected strategy into unsigned for ZSTD_CCtx_setParameter() compatibility.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression */
+#if 0
+    ZSTD_p_windowSize,       </b>/* Maximum allowed back-reference distance.<b>
+                              * Can be set to a more precise value than windowLog.
+                              * Will be transparently reduced to closest possible inferior value
+                              * (see Zstandard compression format) */
+                             </b>/* Not ready yet ! */<b>
+#endif
+
+    </b>/* frame parameters */<b>
+    ZSTD_p_contentSizeFlag=200, </b>/* Content size is written into frame header _whenever known_ (default:1) */<b>
+    ZSTD_p_contentChecksumFlag, </b>/* A 32-bits content checksum is calculated and written at end of frame (default:0) */<b>
+    ZSTD_p_dictIDFlag,       </b>/* When applicable, dictID of dictionary is provided in frame header (default:1) */<b>
+
+    </b>/* dictionary parameters */<b>
+    ZSTD_p_refDictContent=300, </b>/* Content of dictionary content will be referenced, instead of copied (default:0).<b>
+                              * This avoids duplicating dictionary content.
+                              * But it also requires that dictionary buffer outlives its user (CCtx or CDict) */
+                             </b>/* Not ready yet ! */<b>
+    ZSTD_p_rawContentDict,   </b>/* load dictionary in "content-only" mode (no header analysis) (default:0) */<b>
+                             </b>/* question : should there be an option to load dictionary only in zstd format, rejecting others with an error code ? */<b>
+
+#if 0
+    </b>/* multi-threading parameters (not ready yet !) */<b>
+    ZSTD_p_nbThreads=400,    </b>/* Select how many threads a compression job can spawn (default:1)<b>
+                              * More threads improve speed, but increases also memory usage */
+    ZSTDMT_p_jobSize,        </b>/* Size of a compression job. Each job is compressed in parallel.<b>
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+                              * The minimum size is automatically and transparently enforced */
+    ZSTDMT_p_overlapSizeLog, </b>/* Size of previous input reloaded at the beginning of each job.<b>
+                              * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
+#endif
+
+    </b>/* advanced parameters - may not remain available after API update */<b>
+    ZSTD_p_forceMaxWindow=1100, </b>/* Force back-references to remain < windowSize,<b>
+                              * even when referencing into Dictionary content
+                              * default : 0 when using a CDict, 1 when using a Prefix */
+} ZSTD_cParameter;
+</b></pre><BR>
+<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
+</b><p>  Set one compression parameter, selected by enum ZSTD_cParameter.
+  @result : 0, or an error code (which can be tested with ZSTD_isError()) 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+</b><p>  Total input data size to be compressed into a single frame.
+  This value will be controlled at the end, and result in error if not respected.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Note 1 : 0 means zero, empty.
+           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+           Note that ZSTD_CONTENTSIZE_UNKNOWN is default value for all new compression jobs.
+  Note 2 : If all data is provided and consumed in a single round,
+           this value is overriden by srcSize instead. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);   </b>/* Not ready yet ! */<b>
+</b><p>  Reference a prefix (content-only dictionary) to bootstrap next compression job.
+  Decompression will have to use same prefix.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special : Adding a NULL (or 0-size) dictionary invalidates any previous prefix, meaning "return to no-dictionary mode".
+  Note 1 : Prefix content is referenced. It must outlive compression job.
+  Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+           For this reason, compression parameters cannot be changed anymore after loading a prefix.
+           It's also a CPU-heavy operation, with non-negligible impact on latency.
+  Note 3 : Prefix is only used once. Tables are discarded at end of compression job.
+           If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict 
+</p></pre><BR>
+
+<pre><b>ZSTD_CDict* ZSTD_CDict_createEmpty(void);   </b>/* Not ready yet ! */<b>
+size_t ZSTD_CDict_setParameter(ZSTD_CDict* cdict, ZSTD_cParameter param, unsigned value);  </b>/* Not ready yet ! */<b>
+size_t ZSTD_CDict_loadDictionary(ZSTD_CDict* cdict, const void* dict, size_t dictSize);    </b>/* Not ready yet ! */<b>
+</b><p>  Create a CDict object which is still mutable after creation.
+  It allows usage of ZSTD_CDict_setParameter().
+  Once all compression parameters are selected,
+  it's possible to load the target dictionary, using ZSTD_CDict_loadDictionary().
+  Dictionary content will be copied internally, except if ZSTD_p_refDictContent is used.
+  After loading the dictionary, no more change is possible.
+  The only remaining operation is to free CDict object.
+  Note : An unfinished CDict behaves the same as a NULL CDict when referenced into a CCtx.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);  </b>/* Not ready yet ! */<b>
+</b><p>  Add a prepared dictionary to cctx, it will used for next compression jobs.
+  Note that compression parameters will be enforced from within CDict.
+  Currently, they supercede any compression parameter previously set within CCtx.
+  The dictionary will remain valid for all future compression jobs performed using the same cctx.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special : adding a NULL CDict means "return to no-dictionary mode".
+  Note 1 : Currently, only one dictionary can be managed.
+           Adding a new dictionary effectively "discards" any previous one.
+  Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
+ 
+</p></pre><BR>
+
+<pre><b>typedef enum {
+    ZSTD_e_continue,  </b>/* continue sending data, encoder transparently decides when to output result, depending on optimal conditions */<b>
+    ZSTD_e_flush,     </b>/* flush any data provided and buffered so far - frame will continue, future data can still reference previous data for better compression */<b>
+    ZSTD_e_end        </b>/* flush any remaining data and ends current frame. Any future compression starts a new frame. */<b>
+} ZSTD_EndDirective;
+</b></pre><BR>
+<pre><b>size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity, size_t* dstPos,
+                        const void* src, size_t srcSize, size_t* srcPos,
+                              ZSTD_EndDirective endOp);
+</b><p>  Behave about the same as ZSTD_compressStream. To note :
+  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_setCCtxParameter()
+  - Compression parameters cannot be changed once compression is started.
+  - *dstPos must be <= dstCapacity, *srcPos must be <= srcSize
+  - *dspPos and *srcPos will be updated. They are guaranteed to remain below their respective limit.
+  - @return provides the amount of data ready to flush and still within internal buffers
+            or an error code, which can be tested using ZSTD_isError().
+            if @return != 0, flush is not fully completed, so it must be called again to empty internal buffers.
+  - after a ZSTD_e_end directive, if internal buffer is not fully flushed,
+            only ZSTD_e_end and ZSTD_e_flush operations are allowed.
+            It is necessary to fully flush internal buffers
+            before changing compression parameters or start a new compression job.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
+</b><p>  Return a CCtx to clean state.
+  Useful after an error, or to interrupt an ongoing compression job and start a new one.
+  It's allowed to change compression parameters after a reset.
+  Any internal data not yet flushed is cancelled.
+ 
+</p></pre><BR>
+
 <a name="Chapter15"></a><h2>Advanced decompression functions</h2><pre></pre>

 <pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);