mirror of
https://github.com/facebook/zstd.git
synced 2025-07-30 22:23:13 +03:00
Rename "External Matchfinder" to "Block-Level Sequence Producer" (#3484)
* change "external matchfinder" to "external sequence producer" * migrate contrib/ to new naming convention * fix contrib build * fix error message * update debug strings * fix def of invalid sequences in zstd.h * nit * update CHANGELOG * fix .gitignore
This commit is contained in:
committed by
GitHub
parent
4aa3bc49da
commit
ff42ed1582
@ -29,7 +29,7 @@ cli: Fix empty string as argument for `--output-dir-*` (#3220, @embg)
|
||||
cli: Fix decompression memory usage reported by `-vv --long` (#3042, @u1f35c, and #3232, @zengyijing)
|
||||
cli: Fix infinite loop when empty input is passed to trainer (#3081, @terrelln)
|
||||
cli: Fix `--adapt` doesn't work when `--no-progress` is also set (#3354, @terrelln)
|
||||
api: Support for External matchfinder (#3333, @embg)
|
||||
api: Support for Block-Level Sequence Producer (#3333, @embg)
|
||||
api: Support for in-place decompression (#3432, @terrelln)
|
||||
api: New `ZSTD_CCtx_setCParams()` function, set all parameters defined in a `ZSTD_compressionParameters` structure (#3403, @Cyan4973)
|
||||
api: Streaming decompression detects incorrect header ID sooner (#3175, @Cyan4973)
|
||||
|
4
Makefile
4
Makefile
@ -123,7 +123,7 @@ contrib: lib
|
||||
$(MAKE) -C contrib/seekable_format/examples all
|
||||
$(MAKE) -C contrib/seekable_format/tests test
|
||||
$(MAKE) -C contrib/largeNbDicts all
|
||||
$(MAKE) -C contrib/externalMatchfinder all
|
||||
$(MAKE) -C contrib/externalSequenceProducer all
|
||||
cd build/single_file_libs/ ; ./build_decoder_test.sh
|
||||
cd build/single_file_libs/ ; ./build_library_test.sh
|
||||
|
||||
@ -143,7 +143,7 @@ clean:
|
||||
$(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
|
||||
$(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID)
|
||||
$(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
|
||||
$(Q)$(MAKE) -C contrib/externalMatchfinder $@ > $(VOID)
|
||||
$(Q)$(MAKE) -C contrib/externalSequenceProducer $@ > $(VOID)
|
||||
$(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
|
||||
$(Q)$(RM) -r lz4
|
||||
@echo Cleaning completed
|
||||
|
2
contrib/externalMatchfinder/.gitignore
vendored
2
contrib/externalMatchfinder/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
# build artifacts
|
||||
externalMatchfinder
|
@ -1,14 +0,0 @@
|
||||
externalMatchfinder
|
||||
=====================
|
||||
|
||||
`externalMatchfinder` is a test tool for the external matchfinder API.
|
||||
It demonstrates how to use the API to perform a simple round-trip test.
|
||||
|
||||
A sample matchfinder is provided in matchfinder.c, but the user can swap
|
||||
this out with a different one if desired. The sample matchfinder implements
|
||||
LZ compression with a 1KB hashtable. Dictionary compression is not currently supported.
|
||||
|
||||
Command line :
|
||||
```
|
||||
externalMatchfinder filename
|
||||
```
|
2
contrib/externalSequenceProducer/.gitignore
vendored
Normal file
2
contrib/externalSequenceProducer/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
# build artifacts
|
||||
externalSequenceProducer
|
@ -23,11 +23,11 @@ DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wredundant-decls
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
|
||||
default: externalMatchfinder
|
||||
default: externalSequenceProducer
|
||||
|
||||
all: externalMatchfinder
|
||||
all: externalSequenceProducer
|
||||
|
||||
externalMatchfinder: matchfinder.c main.c $(LIBZSTD)
|
||||
externalSequenceProducer: sequence_producer.c main.c $(LIBZSTD)
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
.PHONY: $(LIBZSTD)
|
||||
@ -37,4 +37,4 @@ $(LIBZSTD):
|
||||
clean:
|
||||
$(RM) *.o
|
||||
$(MAKE) -C $(LIBDIR) clean > /dev/null
|
||||
$(RM) externalMatchfinder
|
||||
$(RM) externalSequenceProducer
|
14
contrib/externalSequenceProducer/README.md
Normal file
14
contrib/externalSequenceProducer/README.md
Normal file
@ -0,0 +1,14 @@
|
||||
externalSequenceProducer
|
||||
=====================
|
||||
|
||||
`externalSequenceProducer` is a test tool for the Block-Level Sequence Producer API.
|
||||
It demonstrates how to use the API to perform a simple round-trip test.
|
||||
|
||||
A sample sequence producer is provided in sequence_producer.c, but the user can swap
|
||||
this out with a different one if desired. The sample sequence producer implements
|
||||
LZ parsing with a 1KB hashtable. Dictionary-based parsing is not currently supported.
|
||||
|
||||
Command line :
|
||||
```
|
||||
externalSequenceProducer filename
|
||||
```
|
@ -16,7 +16,7 @@
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
#include "zstd_errors.h"
|
||||
#include "matchfinder.h" // simpleExternalMatchFinder
|
||||
#include "sequence_producer.h" // simpleSequenceProducer
|
||||
|
||||
#define CHECK(res) \
|
||||
do { \
|
||||
@ -28,23 +28,23 @@ do { \
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 2) {
|
||||
printf("Usage: exampleMatchfinder <file>\n");
|
||||
printf("Usage: externalSequenceProducer <file>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
ZSTD_CCtx* const zc = ZSTD_createCCtx();
|
||||
|
||||
int simpleExternalMatchState = 0xdeadbeef;
|
||||
int simpleSequenceProducerState = 0xdeadbeef;
|
||||
|
||||
// Here is the crucial bit of code!
|
||||
ZSTD_registerExternalMatchFinder(
|
||||
ZSTD_registerSequenceProducer(
|
||||
zc,
|
||||
&simpleExternalMatchState,
|
||||
simpleExternalMatchFinder
|
||||
&simpleSequenceProducerState,
|
||||
simpleSequenceProducer
|
||||
);
|
||||
|
||||
{
|
||||
size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1);
|
||||
size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 1);
|
||||
CHECK(res);
|
||||
}
|
||||
|
@ -9,15 +9,15 @@
|
||||
*/
|
||||
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "matchfinder.h"
|
||||
#include "sequence_producer.h"
|
||||
|
||||
#define HSIZE 1024
|
||||
static U32 const HLOG = 10;
|
||||
static U32 const MLS = 4;
|
||||
static U32 const BADIDX = 0xffffffff;
|
||||
|
||||
size_t simpleExternalMatchFinder(
|
||||
void* externalMatchState,
|
||||
size_t simpleSequenceProducer(
|
||||
void* sequenceProducerState,
|
||||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const void* dict, size_t dictSize,
|
||||
@ -31,7 +31,7 @@ size_t simpleExternalMatchFinder(
|
||||
size_t seqCount = 0;
|
||||
U32 hashTable[HSIZE];
|
||||
|
||||
(void)externalMatchState;
|
||||
(void)sequenceProducerState;
|
||||
(void)dict;
|
||||
(void)dictSize;
|
||||
(void)outSeqsCapacity;
|
@ -14,8 +14,8 @@
|
||||
#define ZSTD_STATIC_LINKING_ONLY
|
||||
#include "zstd.h"
|
||||
|
||||
size_t simpleExternalMatchFinder(
|
||||
void* externalMatchState,
|
||||
size_t simpleSequenceProducer(
|
||||
void* sequenceProducerState,
|
||||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const void* dict, size_t dictSize,
|
@ -54,7 +54,7 @@ const char* ERR_getErrorString(ERR_enum code)
|
||||
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
|
||||
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
|
||||
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
|
||||
case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code";
|
||||
case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
|
||||
case PREFIX(externalSequences_invalid): return "External sequences are not valid";
|
||||
case PREFIX(maxCode):
|
||||
default: return notErrorCode;
|
||||
|
@ -615,7 +615,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
bounds.upperBound = (int)ZSTD_ps_disable;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_enableMatchFinderFallback:
|
||||
case ZSTD_c_enableSeqProducerFallback:
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = 1;
|
||||
return bounds;
|
||||
@ -695,7 +695,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
||||
case ZSTD_c_useRowMatchFinder:
|
||||
case ZSTD_c_deterministicRefPrefix:
|
||||
case ZSTD_c_prefetchCDictTables:
|
||||
case ZSTD_c_enableMatchFinderFallback:
|
||||
case ZSTD_c_enableSeqProducerFallback:
|
||||
case ZSTD_c_maxBlockSize:
|
||||
case ZSTD_c_searchForExternalRepcodes:
|
||||
default:
|
||||
@ -754,7 +754,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
||||
case ZSTD_c_useRowMatchFinder:
|
||||
case ZSTD_c_deterministicRefPrefix:
|
||||
case ZSTD_c_prefetchCDictTables:
|
||||
case ZSTD_c_enableMatchFinderFallback:
|
||||
case ZSTD_c_enableSeqProducerFallback:
|
||||
case ZSTD_c_maxBlockSize:
|
||||
case ZSTD_c_searchForExternalRepcodes:
|
||||
break;
|
||||
@ -989,8 +989,8 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
||||
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
|
||||
return CCtxParams->prefetchCDictTables;
|
||||
|
||||
case ZSTD_c_enableMatchFinderFallback:
|
||||
BOUNDCHECK(ZSTD_c_enableMatchFinderFallback, value);
|
||||
case ZSTD_c_enableSeqProducerFallback:
|
||||
BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
|
||||
CCtxParams->enableMatchFinderFallback = value;
|
||||
return CCtxParams->enableMatchFinderFallback;
|
||||
|
||||
@ -1140,7 +1140,7 @@ size_t ZSTD_CCtxParams_getParameter(
|
||||
case ZSTD_c_prefetchCDictTables:
|
||||
*value = (int)CCtxParams->prefetchCDictTables;
|
||||
break;
|
||||
case ZSTD_c_enableMatchFinderFallback:
|
||||
case ZSTD_c_enableSeqProducerFallback:
|
||||
*value = CCtxParams->enableMatchFinderFallback;
|
||||
break;
|
||||
case ZSTD_c_maxBlockSize:
|
||||
@ -1610,8 +1610,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
||||
|
||||
/* Helper function for calculating memory requirements.
|
||||
* Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
|
||||
static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useExternalMatchFinder) {
|
||||
U32 const divider = (minMatch==3 || useExternalMatchFinder) ? 3 : 4;
|
||||
static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) {
|
||||
U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;
|
||||
return blockSize / divider;
|
||||
}
|
||||
|
||||
@ -1623,12 +1623,12 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
||||
const size_t buffInSize,
|
||||
const size_t buffOutSize,
|
||||
const U64 pledgedSrcSize,
|
||||
int useExternalMatchFinder,
|
||||
int useSequenceProducer,
|
||||
size_t maxBlockSize)
|
||||
{
|
||||
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
|
||||
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
|
||||
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder);
|
||||
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
|
||||
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
|
||||
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
|
||||
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
|
||||
@ -1648,7 +1648,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
||||
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
|
||||
|
||||
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
|
||||
size_t const externalSeqSpace = useExternalMatchFinder
|
||||
size_t const externalSeqSpace = useSequenceProducer
|
||||
? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
|
||||
: 0;
|
||||
|
||||
@ -1679,7 +1679,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
||||
* be needed. However, we still allocate two 0-sized buffers, which can
|
||||
* take space under ASAN. */
|
||||
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
||||
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder, params->maxBlockSize);
|
||||
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
|
||||
}
|
||||
|
||||
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
|
||||
@ -1740,7 +1740,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
||||
|
||||
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
||||
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
|
||||
ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder, params->maxBlockSize);
|
||||
ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2024,7 +2024,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
|
||||
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
|
||||
size_t const blockSize = MIN(params->maxBlockSize, windowSize);
|
||||
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useExternalMatchFinder);
|
||||
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
|
||||
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
|
||||
? ZSTD_compressBound(blockSize) + 1
|
||||
: 0;
|
||||
@ -2041,7 +2041,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
size_t const neededSpace =
|
||||
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
||||
¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
|
||||
buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder, params->maxBlockSize);
|
||||
buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
|
||||
int resizeWorkspace;
|
||||
|
||||
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
|
||||
@ -2155,7 +2155,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
}
|
||||
|
||||
/* reserve space for block-level external sequences */
|
||||
if (params->useExternalMatchFinder) {
|
||||
if (params->useSequenceProducer) {
|
||||
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
|
||||
zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
|
||||
zc->externalMatchCtx.seqBuffer =
|
||||
@ -3022,26 +3022,26 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
|
||||
ssPtr->longLengthType = ZSTD_llt_none;
|
||||
}
|
||||
|
||||
/* ZSTD_postProcessExternalMatchFinderResult() :
|
||||
/* ZSTD_postProcessSequenceProducerResult() :
|
||||
* Validates and post-processes sequences obtained through the external matchfinder API:
|
||||
* - Checks whether nbExternalSeqs represents an error condition.
|
||||
* - Appends a block delimiter to outSeqs if one is not already present.
|
||||
* See zstd.h for context regarding block delimiters.
|
||||
* Returns the number of sequences after post-processing, or an error code. */
|
||||
static size_t ZSTD_postProcessExternalMatchFinderResult(
|
||||
static size_t ZSTD_postProcessSequenceProducerResult(
|
||||
ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
|
||||
) {
|
||||
RETURN_ERROR_IF(
|
||||
nbExternalSeqs > outSeqsCapacity,
|
||||
externalMatchFinder_failed,
|
||||
"External matchfinder returned error code %lu",
|
||||
sequenceProducer_failed,
|
||||
"External sequence producer returned error code %lu",
|
||||
(unsigned long)nbExternalSeqs
|
||||
);
|
||||
|
||||
RETURN_ERROR_IF(
|
||||
nbExternalSeqs == 0 && srcSize > 0,
|
||||
externalMatchFinder_failed,
|
||||
"External matchfinder produced zero sequences for a non-empty src buffer!"
|
||||
sequenceProducer_failed,
|
||||
"Got zero sequences from external sequence producer for a non-empty src buffer!"
|
||||
);
|
||||
|
||||
if (srcSize == 0) {
|
||||
@ -3061,7 +3061,7 @@ static size_t ZSTD_postProcessExternalMatchFinderResult(
|
||||
* produced an invalid parse, by definition of ZSTD_sequenceBound(). */
|
||||
RETURN_ERROR_IF(
|
||||
nbExternalSeqs == outSeqsCapacity,
|
||||
externalMatchFinder_failed,
|
||||
sequenceProducer_failed,
|
||||
"nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
|
||||
);
|
||||
|
||||
@ -3139,9 +3139,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
/* External matchfinder + LDM is technically possible, just not implemented yet.
|
||||
* We need to revisit soon and implement it. */
|
||||
RETURN_ERROR_IF(
|
||||
zc->appliedParams.useExternalMatchFinder,
|
||||
zc->appliedParams.useSequenceProducer,
|
||||
parameter_combination_unsupported,
|
||||
"Long-distance matching with external matchfinder enabled is not currently supported."
|
||||
"Long-distance matching with external sequence producer enabled is not currently supported."
|
||||
);
|
||||
|
||||
/* Updates ldmSeqStore.pos */
|
||||
@ -3158,9 +3158,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
/* External matchfinder + LDM is technically possible, just not implemented yet.
|
||||
* We need to revisit soon and implement it. */
|
||||
RETURN_ERROR_IF(
|
||||
zc->appliedParams.useExternalMatchFinder,
|
||||
zc->appliedParams.useSequenceProducer,
|
||||
parameter_combination_unsupported,
|
||||
"Long-distance matching with external matchfinder enabled is not currently supported."
|
||||
"Long-distance matching with external sequence producer enabled is not currently supported."
|
||||
);
|
||||
|
||||
ldmSeqStore.seq = zc->ldmSequences;
|
||||
@ -3177,7 +3177,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
zc->appliedParams.useRowMatchFinder,
|
||||
src, srcSize);
|
||||
assert(ldmSeqStore.pos == ldmSeqStore.size);
|
||||
} else if (zc->appliedParams.useExternalMatchFinder) {
|
||||
} else if (zc->appliedParams.useSequenceProducer) {
|
||||
assert(
|
||||
zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
|
||||
);
|
||||
@ -3195,7 +3195,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
windowSize
|
||||
);
|
||||
|
||||
size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult(
|
||||
size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
|
||||
zc->externalMatchCtx.seqBuffer,
|
||||
nbExternalSeqs,
|
||||
zc->externalMatchCtx.seqBufferCapacity,
|
||||
@ -3217,7 +3217,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
"Failed to copy external sequences to seqStore!"
|
||||
);
|
||||
ms->ldmSeqStore = NULL;
|
||||
DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs);
|
||||
DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs);
|
||||
return ZSTDbss_compress;
|
||||
}
|
||||
|
||||
@ -3233,7 +3233,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
ms->ldmSeqStore = NULL;
|
||||
DEBUGLOG(
|
||||
5,
|
||||
"External matchfinder returned error code %lu. Falling back to internal matchfinder.",
|
||||
"External sequence producer returned error code %lu. Falling back to internal parser.",
|
||||
(unsigned long)nbExternalSeqs
|
||||
);
|
||||
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
|
||||
@ -6033,9 +6033,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
|
||||
RETURN_ERROR_IF(
|
||||
params.useExternalMatchFinder == 1 && params.nbWorkers >= 1,
|
||||
params.useSequenceProducer == 1 && params.nbWorkers >= 1,
|
||||
parameter_combination_unsupported,
|
||||
"External matchfinder isn't supported with nbWorkers >= 1"
|
||||
"External sequence producer isn't supported with nbWorkers >= 1"
|
||||
);
|
||||
|
||||
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
|
||||
@ -6251,7 +6251,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
||||
*/
|
||||
static size_t
|
||||
ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
|
||||
size_t posInSrc, U32 windowLog, size_t dictSize, int useExternalMatchFinder)
|
||||
size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer)
|
||||
{
|
||||
U32 const windowSize = 1u << windowLog;
|
||||
/* posInSrc represents the amount of data the decoder would decode up to this point.
|
||||
@ -6260,7 +6260,7 @@ ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
|
||||
* window size. After output surpasses windowSize, we're limited to windowSize offsets again.
|
||||
*/
|
||||
size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
||||
size_t const matchLenLowerBound = (minMatch == 3 || useExternalMatchFinder) ? 3 : 4;
|
||||
size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;
|
||||
RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!");
|
||||
/* Validate maxNbSeq is large enough for the given matchLength and minMatch */
|
||||
RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch");
|
||||
@ -6325,7 +6325,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
|
||||
if (cctx->appliedParams.validateSequences) {
|
||||
seqPos->posInSrc += litLength + matchLength;
|
||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
|
||||
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useExternalMatchFinder),
|
||||
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
|
||||
"Sequence validation failed");
|
||||
}
|
||||
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
||||
@ -6463,7 +6463,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
|
||||
if (cctx->appliedParams.validateSequences) {
|
||||
seqPos->posInSrc += litLength + matchLength;
|
||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
|
||||
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useExternalMatchFinder),
|
||||
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
|
||||
"Sequence validation failed");
|
||||
}
|
||||
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
|
||||
@ -6908,9 +6908,9 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
|
||||
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
|
||||
}
|
||||
|
||||
void ZSTD_registerExternalMatchFinder(
|
||||
void ZSTD_registerSequenceProducer(
|
||||
ZSTD_CCtx* zc, void* mState,
|
||||
ZSTD_externalMatchFinder_F* mFinder
|
||||
ZSTD_sequenceProducer_F* mFinder
|
||||
) {
|
||||
if (mFinder != NULL) {
|
||||
ZSTD_externalMatchCtx emctx;
|
||||
@ -6919,9 +6919,9 @@ void ZSTD_registerExternalMatchFinder(
|
||||
emctx.seqBuffer = NULL;
|
||||
emctx.seqBufferCapacity = 0;
|
||||
zc->externalMatchCtx = emctx;
|
||||
zc->requestedParams.useExternalMatchFinder = 1;
|
||||
zc->requestedParams.useSequenceProducer = 1;
|
||||
} else {
|
||||
ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
|
||||
zc->requestedParams.useExternalMatchFinder = 0;
|
||||
zc->requestedParams.useSequenceProducer = 0;
|
||||
}
|
||||
}
|
||||
|
@ -353,8 +353,8 @@ struct ZSTD_CCtx_params_s {
|
||||
|
||||
/* Indicates whether an external matchfinder has been referenced.
|
||||
* Users can't set this externally.
|
||||
* It is set internally in ZSTD_registerExternalMatchFinder(). */
|
||||
int useExternalMatchFinder;
|
||||
* It is set internally in ZSTD_registerSequenceProducer(). */
|
||||
int useSequenceProducer;
|
||||
|
||||
/* Adjust the max block size*/
|
||||
size_t maxBlockSize;
|
||||
@ -395,7 +395,7 @@ typedef struct {
|
||||
/* Context for block-level external matchfinder API */
|
||||
typedef struct {
|
||||
void* mState;
|
||||
ZSTD_externalMatchFinder_F* mFinder;
|
||||
ZSTD_sequenceProducer_F* mFinder;
|
||||
ZSTD_Sequence* seqBuffer;
|
||||
size_t seqBufferCapacity;
|
||||
} ZSTD_externalMatchCtx;
|
||||
|
201
lib/zstd.h
201
lib/zstd.h
@ -478,7 +478,7 @@ typedef enum {
|
||||
* ZSTD_c_useBlockSplitter
|
||||
* ZSTD_c_useRowMatchFinder
|
||||
* ZSTD_c_prefetchCDictTables
|
||||
* ZSTD_c_enableMatchFinderFallback
|
||||
* ZSTD_c_enableSeqProducerFallback
|
||||
* ZSTD_c_maxBlockSize
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
@ -565,7 +565,7 @@ typedef enum {
|
||||
* They will be used to compress next frame.
|
||||
* Resetting session never fails.
|
||||
* - The parameters : changes all parameters back to "default".
|
||||
* This also removes any reference to any dictionary or external matchfinder.
|
||||
* This also removes any reference to any dictionary or external sequence producer.
|
||||
* Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
|
||||
* otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
|
||||
* - Both : similar to resetting the session, followed by resetting parameters.
|
||||
@ -1627,8 +1627,8 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
|
||||
* Note : only single-threaded compression is supported.
|
||||
* ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
|
||||
*
|
||||
* Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the external matchfinder API at this time.
|
||||
* Size estimates assume that no external matchfinder is registered.
|
||||
* Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
|
||||
* Size estimates assume that no external sequence producer is registered.
|
||||
*/
|
||||
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
|
||||
ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
|
||||
@ -1650,8 +1650,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
|
||||
* In this case, get total size by adding ZSTD_estimate?DictSize
|
||||
* Note 2 : only single-threaded compression is supported.
|
||||
* ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
|
||||
* Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the external matchfinder API at this time.
|
||||
* Size estimates assume that no external matchfinder is registered.
|
||||
* Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
|
||||
* Size estimates assume that no external sequence producer is registered.
|
||||
*/
|
||||
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
|
||||
ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
|
||||
@ -2113,19 +2113,19 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
|
||||
*/
|
||||
#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
|
||||
|
||||
/* ZSTD_c_enableMatchFinderFallback
|
||||
/* ZSTD_c_enableSeqProducerFallback
|
||||
* Allowed values are 0 (disable) and 1 (enable). The default setting is 0.
|
||||
*
|
||||
* Controls whether zstd will fall back to an internal matchfinder if an
|
||||
* external matchfinder is registered and returns an error code. This fallback is
|
||||
* block-by-block: the internal matchfinder will only be called for blocks where
|
||||
* the external matchfinder returns an error code. Fallback compression will
|
||||
* Controls whether zstd will fall back to an internal sequence producer if an
|
||||
* external sequence producer is registered and returns an error code. This fallback
|
||||
* is block-by-block: the internal sequence producer will only be called for blocks
|
||||
* where the external sequence producer returns an error code. Fallback parsing will
|
||||
* follow any other cParam settings, such as compression level, the same as in a
|
||||
* normal (fully-internal) compression operation.
|
||||
*
|
||||
* The user is strongly encouraged to read the full external matchfinder API
|
||||
* The user is strongly encouraged to read the full Block-Level Sequence Producer API
|
||||
* documentation (below) before setting this parameter. */
|
||||
#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17
|
||||
#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17
|
||||
|
||||
/* ZSTD_c_maxBlockSize
|
||||
* Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
|
||||
@ -2141,12 +2141,13 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
|
||||
|
||||
/* ZSTD_c_searchForExternalRepcodes
|
||||
* This parameter affects how zstd parses external sequences, such as sequences
|
||||
* provided through the compressSequences() API or from an external matchfinder.
|
||||
* provided through the compressSequences() API or from an external block-level
|
||||
* sequence producer.
|
||||
*
|
||||
* If set to ZSTD_ps_enable, the library will check for repeated offsets in
|
||||
* external sequences, even if those repcodes are not explicitly indicated in
|
||||
* the "rep" field. Note that this is the only way to exploit repcode matches
|
||||
* while using compressSequences() or an external matchfinder, since zstd
|
||||
* while using compressSequences() or an external sequence producer, since zstd
|
||||
* currently ignores the "rep" field of external sequences.
|
||||
*
|
||||
* If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
|
||||
@ -2805,43 +2806,52 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_
|
||||
ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
|
||||
|
||||
|
||||
/* ********************** EXTERNAL MATCHFINDER API **********************
|
||||
/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
|
||||
*
|
||||
* *** OVERVIEW ***
|
||||
* This API allows users to replace the zstd internal block-level matchfinder
|
||||
* with an external matchfinder function. Potential applications of the API
|
||||
* include hardware-accelerated matchfinders and matchfinders specialized to
|
||||
* particular types of data.
|
||||
* The Block-Level Sequence Producer API allows users to provide their own custom
|
||||
* sequence producer which libzstd invokes to process each block. The produced list
|
||||
* of sequences (literals and matches) is then post-processed by libzstd to produce
|
||||
* valid compressed blocks.
|
||||
*
|
||||
* See contrib/externalMatchfinder for an example program employing the
|
||||
* external matchfinder API.
|
||||
* This block-level offload API is a more granular complement of the existing
|
||||
* frame-level offload API compressSequences() (introduced in v1.5.1). It offers
|
||||
* an easier migration story for applications already integrated with libzstd: the
|
||||
* user application continues to invoke the same compression functions
|
||||
* ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
|
||||
* from the specific advantages of the external sequence producer. For example,
|
||||
* the sequence producer could be tuned to take advantage of known characteristics
|
||||
* of the input, to offer better speed / ratio, or could leverage hardware
|
||||
* acceleration not available within libzstd itself.
|
||||
*
|
||||
* See contrib/externalSequenceProducer for an example program employing the
|
||||
* Block-Level Sequence Producer API.
|
||||
*
|
||||
* *** USAGE ***
|
||||
* The user is responsible for implementing a function of type
|
||||
* ZSTD_externalMatchFinder_F. For each block, zstd will pass the following
|
||||
* ZSTD_sequenceProducer_F. For each block, zstd will pass the following
|
||||
* arguments to the user-provided function:
|
||||
*
|
||||
* - externalMatchState: a pointer to a user-managed state for the external
|
||||
* matchfinder.
|
||||
* - sequenceProducerState: a pointer to a user-managed state for the sequence
|
||||
* producer.
|
||||
*
|
||||
* - outSeqs, outSeqsCapacity: an output buffer for sequences produced by the
|
||||
* external matchfinder. outSeqsCapacity is guaranteed >=
|
||||
* ZSTD_sequenceBound(srcSize). The memory backing outSeqs is managed by
|
||||
* the CCtx.
|
||||
* - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
|
||||
* outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
|
||||
* backing outSeqs is managed by the CCtx.
|
||||
*
|
||||
* - src, srcSize: an input buffer which the external matchfinder must parse
|
||||
* into sequences. srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
|
||||
* - src, srcSize: an input buffer for the sequence producer to parse.
|
||||
* srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
|
||||
*
|
||||
* - dict, dictSize: a history buffer, which may be empty, which the external
|
||||
* matchfinder may reference as it produces sequences for the src buffer.
|
||||
* Currently, zstd will always pass dictSize == 0 into external matchfinders,
|
||||
* but this will change in the future.
|
||||
* - dict, dictSize: a history buffer, which may be empty, which the sequence
|
||||
* producer may reference as it parses the src buffer. Currently, zstd will
|
||||
* always pass dictSize == 0 into external sequence producers, but this will
|
||||
* change in the future.
|
||||
*
|
||||
* - compressionLevel: a signed integer representing the zstd compression level
|
||||
* set by the user for the current operation. The external matchfinder may
|
||||
* choose to use this information to change its compression strategy and
|
||||
* speed/ratio tradeoff. Note: The compression level does not reflect zstd
|
||||
* parameters set through the advanced API.
|
||||
* set by the user for the current operation. The sequence producer may choose
|
||||
* to use this information to change its compression strategy and speed/ratio
|
||||
* tradeoff. Note: the compression level does not reflect zstd parameters set
|
||||
* through the advanced API.
|
||||
*
|
||||
* - windowSize: a size_t representing the maximum allowed offset for external
|
||||
* sequences. Note that sequence offsets are sometimes allowed to exceed the
|
||||
@ -2851,7 +2861,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc
|
||||
* The user-provided function shall return a size_t representing the number of
|
||||
* sequences written to outSeqs. This return value will be treated as an error
|
||||
* code if it is greater than outSeqsCapacity. The return value must be non-zero
|
||||
* if srcSize is non-zero. The ZSTD_EXTERNAL_MATCHFINDER_ERROR macro is provided
|
||||
* if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
|
||||
* for convenience, but any value greater than outSeqsCapacity will be treated as
|
||||
* an error code.
|
||||
*
|
||||
@ -2859,68 +2869,71 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc
|
||||
* written to outSeqs must be a valid parse of the src buffer. Data corruption may
|
||||
* occur if the parse is not valid. A parse is defined to be valid if the
|
||||
* following conditions hold:
|
||||
* - The sum of matchLengths and literalLengths is equal to srcSize.
|
||||
* - All sequences in the parse have matchLength != 0, except for the final
|
||||
* sequence. matchLength is not constrained for the final sequence.
|
||||
* - All offsets respect the windowSize parameter as specified in
|
||||
* - The sum of matchLengths and literalLengths must equal srcSize.
|
||||
* - All sequences in the parse, except for the final sequence, must have
|
||||
* matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
|
||||
* matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
|
||||
* - All offsets must respect the windowSize parameter as specified in
|
||||
* doc/zstd_compression_format.md.
|
||||
* - If the final sequence has matchLength == 0, it must also have offset == 0.
|
||||
*
|
||||
* zstd will only validate these conditions (and fail compression if they do not
|
||||
* hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
|
||||
* validation has a performance cost.
|
||||
*
|
||||
* If the user-provided function returns an error, zstd will either fall back
|
||||
* to an internal matchfinder or fail the compression operation. The user can
|
||||
* choose between the two behaviors by setting the
|
||||
* ZSTD_c_enableMatchFinderFallback cParam. Fallback compression will follow any
|
||||
* other cParam settings, such as compression level, the same as in a normal
|
||||
* compression operation.
|
||||
* to an internal sequence producer or fail the compression operation. The user can
|
||||
* choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
|
||||
* cParam. Fallback compression will follow any other cParam settings, such as
|
||||
* compression level, the same as in a normal compression operation.
|
||||
*
|
||||
* The user shall instruct zstd to use a particular ZSTD_externalMatchFinder_F
|
||||
* function by calling ZSTD_registerExternalMatchFinder(cctx, externalMatchState,
|
||||
* externalMatchFinder). This setting will persist until the next parameter reset
|
||||
* of the CCtx.
|
||||
* The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
|
||||
* function by calling
|
||||
* ZSTD_registerSequenceProducer(cctx,
|
||||
* sequenceProducerState,
|
||||
* sequenceProducer)
|
||||
* This setting will persist until the next parameter reset of the CCtx.
|
||||
*
|
||||
* The externalMatchState must be initialized by the user before calling
|
||||
* ZSTD_registerExternalMatchFinder. The user is responsible for destroying the
|
||||
* externalMatchState.
|
||||
* The sequenceProducerState must be initialized by the user before calling
|
||||
* ZSTD_registerSequenceProducer(). The user is responsible for destroying the
|
||||
* sequenceProducerState.
|
||||
*
|
||||
* *** LIMITATIONS ***
|
||||
* External matchfinders are compatible with all zstd compression APIs which respect
|
||||
* advanced parameters. However, there are three limitations:
|
||||
* This API is compatible with all zstd compression APIs which respect advanced parameters.
|
||||
* However, there are three limitations:
|
||||
*
|
||||
* First, the ZSTD_c_enableLongDistanceMatching cParam is not supported.
|
||||
* COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an
|
||||
* external matchfinder.
|
||||
* - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in
|
||||
* some cases (see its documentation for details). Users must explicitly set
|
||||
* ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an
|
||||
* external matchfinder is registered.
|
||||
* First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
|
||||
* COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
|
||||
* external sequence producer.
|
||||
* - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
|
||||
* cases (see its documentation for details). Users must explicitly set
|
||||
* ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
|
||||
* sequence producer is registered.
|
||||
* - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
|
||||
* whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
|
||||
* check the docs on ZSTD_c_enableLongDistanceMatching whenever the external
|
||||
* matchfinder API is used in conjunction with advanced settings (like windowLog).
|
||||
* check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
|
||||
* Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
|
||||
*
|
||||
* Second, history buffers are not supported. Concretely, zstd will always pass
|
||||
* dictSize == 0 to the external matchfinder (for now). This has two implications:
|
||||
* - Dictionaries are not supported. Compression will *not* fail if the user
|
||||
* Second, history buffers are not currently supported. Concretely, zstd will always pass
|
||||
* dictSize == 0 to the external sequence producer (for now). This has two implications:
|
||||
* - Dictionaries are not currently supported. Compression will *not* fail if the user
|
||||
* references a dictionary, but the dictionary won't have any effect.
|
||||
* - Stream history is not supported. All compression APIs, including streaming
|
||||
* APIs, work with the external matchfinder, but the external matchfinder won't
|
||||
* receive any history from the previous block. Each block is an independent chunk.
|
||||
* - Stream history is not currently supported. All advanced compression APIs, including
|
||||
* streaming APIs, work with external sequence producers, but each block is treated as
|
||||
* an independent chunk without history from previous blocks.
|
||||
*
|
||||
* Third, multi-threading within a single compression is not supported. In other words,
|
||||
* COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external matchfinder is registered.
|
||||
* Third, multi-threading within a single compression is not currently supported. In other words,
|
||||
* COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
|
||||
* Multi-threading across compressions is fine: simply create one CCtx per thread.
|
||||
*
|
||||
* Long-term, we plan to overcome all three limitations. There is no technical blocker to
|
||||
* overcoming them. It is purely a question of engineering effort.
|
||||
*/
|
||||
|
||||
#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1))
|
||||
#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
|
||||
|
||||
typedef size_t ZSTD_externalMatchFinder_F (
|
||||
void* externalMatchState,
|
||||
typedef size_t ZSTD_sequenceProducer_F (
|
||||
void* sequenceProducerState,
|
||||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const void* dict, size_t dictSize,
|
||||
@ -2928,32 +2941,30 @@ typedef size_t ZSTD_externalMatchFinder_F (
|
||||
size_t windowSize
|
||||
);
|
||||
|
||||
/*! ZSTD_registerExternalMatchFinder() :
|
||||
* Instruct zstd to use an external matchfinder function.
|
||||
/*! ZSTD_registerSequenceProducer() :
|
||||
* Instruct zstd to use a block-level external sequence producer function.
|
||||
*
|
||||
* The externalMatchState must be initialized by the caller, and the caller is
|
||||
* The sequenceProducerState must be initialized by the caller, and the caller is
|
||||
* responsible for managing its lifetime. This parameter is sticky across
|
||||
* compressions. It will remain set until the user explicitly resets compression
|
||||
* parameters.
|
||||
*
|
||||
* External matchfinder registration is considered to be an "advanced parameter",
|
||||
* part of the "advanced API". This means it will only have an effect on
|
||||
* compression APIs which respect advanced parameters, such as compress2() and
|
||||
* compressStream(). Older compression APIs such as compressCCtx(), which predate
|
||||
* the introduction of "advanced parameters", will ignore any external matchfinder
|
||||
* setting.
|
||||
* Sequence producer registration is considered to be an "advanced parameter",
|
||||
* part of the "advanced API". This means it will only have an effect on compression
|
||||
* APIs which respect advanced parameters, such as compress2() and compressStream2().
|
||||
* Older compression APIs such as compressCCtx(), which predate the introduction of
|
||||
* "advanced parameters", will ignore any external sequence producer setting.
|
||||
*
|
||||
* The external matchfinder can be "cleared" by registering a NULL external
|
||||
* matchfinder function pointer. This removes all limitations described above in
|
||||
* the "LIMITATIONS" section of the API docs.
|
||||
* The sequence producer can be "cleared" by registering a NULL function pointer. This
|
||||
* removes all limitations described above in the "LIMITATIONS" section of the API docs.
|
||||
*
|
||||
* The user is strongly encouraged to read the full API documentation (above)
|
||||
* before calling this function. */
|
||||
* The user is strongly encouraged to read the full API documentation (above) before
|
||||
* calling this function. */
|
||||
ZSTDLIB_STATIC_API void
|
||||
ZSTD_registerExternalMatchFinder(
|
||||
ZSTD_registerSequenceProducer(
|
||||
ZSTD_CCtx* cctx,
|
||||
void* externalMatchState,
|
||||
ZSTD_externalMatchFinder_F* externalMatchFinder
|
||||
void* sequenceProducerState,
|
||||
ZSTD_sequenceProducer_F* sequenceProducer
|
||||
);
|
||||
|
||||
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
|
||||
|
@ -95,7 +95,7 @@ typedef enum {
|
||||
ZSTD_error_seekableIO = 102,
|
||||
ZSTD_error_dstBuffer_wrong = 104,
|
||||
ZSTD_error_srcBuffer_wrong = 105,
|
||||
ZSTD_error_externalMatchFinder_failed = 106,
|
||||
ZSTD_error_sequenceProducer_failed = 106,
|
||||
ZSTD_error_externalSequences_invalid = 107,
|
||||
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
|
||||
} ZSTD_ErrorCode;
|
||||
|
@ -17,8 +17,8 @@ static U32 const HLOG = 10;
|
||||
static U32 const MLS = 4;
|
||||
static U32 const BADIDX = 0xffffffff;
|
||||
|
||||
static size_t simpleExternalMatchFinder(
|
||||
void* externalMatchState,
|
||||
static size_t simpleSequenceProducer(
|
||||
void* sequenceProducerState,
|
||||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const void* dict, size_t dictSize,
|
||||
@ -32,7 +32,7 @@ static size_t simpleExternalMatchFinder(
|
||||
size_t seqCount = 0;
|
||||
U32 hashTable[HSIZE];
|
||||
|
||||
(void)externalMatchState;
|
||||
(void)sequenceProducerState;
|
||||
(void)dict;
|
||||
(void)dictSize;
|
||||
(void)outSeqsCapacity;
|
||||
@ -80,15 +80,15 @@ static size_t simpleExternalMatchFinder(
|
||||
return seqCount;
|
||||
}
|
||||
|
||||
size_t zstreamExternalMatchFinder(
|
||||
void* externalMatchState,
|
||||
size_t zstreamSequenceProducer(
|
||||
void* sequenceProducerState,
|
||||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const void* dict, size_t dictSize,
|
||||
int compressionLevel,
|
||||
size_t windowSize
|
||||
) {
|
||||
EMF_testCase const testCase = *((EMF_testCase*)externalMatchState);
|
||||
EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState);
|
||||
memset(outSeqs, 0, outSeqsCapacity);
|
||||
|
||||
switch (testCase) {
|
||||
@ -100,8 +100,8 @@ size_t zstreamExternalMatchFinder(
|
||||
outSeqs[0].litLength = (U32)(srcSize);
|
||||
return 1;
|
||||
case EMF_LOTS_OF_SEQS:
|
||||
return simpleExternalMatchFinder(
|
||||
externalMatchState,
|
||||
return simpleSequenceProducer(
|
||||
sequenceProducerState,
|
||||
outSeqs, outSeqsCapacity,
|
||||
src, srcSize,
|
||||
dict, dictSize,
|
||||
@ -135,6 +135,6 @@ size_t zstreamExternalMatchFinder(
|
||||
return outSeqsCapacity + 1;
|
||||
case EMF_BIG_ERROR:
|
||||
default:
|
||||
return ZSTD_EXTERNAL_MATCHFINDER_ERROR;
|
||||
return ZSTD_SEQUENCE_PRODUCER_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -27,8 +27,8 @@ typedef enum {
|
||||
EMF_INVALID_LAST_LITS = 8
|
||||
} EMF_testCase;
|
||||
|
||||
size_t zstreamExternalMatchFinder(
|
||||
void* externalMatchState,
|
||||
size_t zstreamSequenceProducer(
|
||||
void* sequenceProducerState,
|
||||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const void* dict, size_t dictSize,
|
||||
|
@ -35,8 +35,8 @@ PRGDIR = ../../programs
|
||||
CONTRIBDIR = ../../contrib
|
||||
|
||||
# TODO(embg) make it possible to plug in an arbitrary matchfinder as a .o file
|
||||
MATCHFINDER_DIR = $(CONTRIBDIR)/externalMatchfinder
|
||||
MATCHFINDER_SRC = $(MATCHFINDER_DIR)/matchfinder.c
|
||||
MATCHFINDER_DIR = $(CONTRIBDIR)/externalSequenceProducer
|
||||
MATCHFINDER_SRC = $(MATCHFINDER_DIR)/sequence_producer.c
|
||||
|
||||
FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
|
||||
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include "fuzz_helpers.h"
|
||||
#include "zstd.h"
|
||||
#include "zdict.h"
|
||||
#include "matchfinder.h"
|
||||
#include "sequence_producer.h"
|
||||
|
||||
const int kMinClevel = -3;
|
||||
const int kMaxClevel = 19;
|
||||
@ -71,13 +71,13 @@ ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer)
|
||||
return params;
|
||||
}
|
||||
|
||||
static void setExternalMatchFinderParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) {
|
||||
ZSTD_registerExternalMatchFinder(
|
||||
static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) {
|
||||
ZSTD_registerSequenceProducer(
|
||||
cctx,
|
||||
NULL,
|
||||
simpleExternalMatchFinder
|
||||
simpleSequenceProducer
|
||||
);
|
||||
setRand(cctx, ZSTD_c_enableMatchFinderFallback, 0, 1, producer);
|
||||
setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer);
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0));
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable));
|
||||
}
|
||||
@ -138,9 +138,9 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
|
||||
}
|
||||
|
||||
if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) {
|
||||
setExternalMatchFinderParams(cctx, producer);
|
||||
setSequenceProducerParams(cctx, producer);
|
||||
} else {
|
||||
ZSTD_registerExternalMatchFinder(cctx, NULL, NULL);
|
||||
ZSTD_registerSequenceProducer(cctx, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@
|
||||
#include "seqgen.h"
|
||||
#include "util.h"
|
||||
#include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
|
||||
#include "external_matchfinder.h" /* zstreamExternalMatchFinder, EMF_testCase */
|
||||
#include "external_matchfinder.h" /* zstreamSequenceProducer, EMF_testCase */
|
||||
|
||||
/*-************************************
|
||||
* Constants
|
||||
@ -1856,14 +1856,14 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++);
|
||||
DISPLAYLEVEL(3, "test%3i : Block-Level External Sequence Producer API: ", testNb++);
|
||||
{
|
||||
size_t const dstBufSize = ZSTD_compressBound(CNBufferSize);
|
||||
BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize));
|
||||
size_t const checkBufSize = CNBufferSize;
|
||||
BYTE* const checkBuf = (BYTE*)malloc(checkBufSize);
|
||||
int enableFallback;
|
||||
EMF_testCase externalMatchState;
|
||||
EMF_testCase sequenceProducerState;
|
||||
|
||||
CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed");
|
||||
|
||||
@ -1871,7 +1871,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
||||
|
||||
/* Reference external matchfinder outside the test loop to
|
||||
* check that the reference is preserved across compressions */
|
||||
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
|
||||
ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
|
||||
|
||||
for (enableFallback = 0; enableFallback <= 1; enableFallback++) {
|
||||
size_t testCaseId;
|
||||
@ -1892,9 +1892,9 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
||||
ZSTD_ErrorCode const errorCodes[] = {
|
||||
ZSTD_error_no_error,
|
||||
ZSTD_error_no_error,
|
||||
ZSTD_error_externalMatchFinder_failed,
|
||||
ZSTD_error_externalMatchFinder_failed,
|
||||
ZSTD_error_externalMatchFinder_failed,
|
||||
ZSTD_error_sequenceProducer_failed,
|
||||
ZSTD_error_sequenceProducer_failed,
|
||||
ZSTD_error_sequenceProducer_failed,
|
||||
ZSTD_error_externalSequences_invalid,
|
||||
ZSTD_error_externalSequences_invalid,
|
||||
ZSTD_error_externalSequences_invalid,
|
||||
@ -1906,18 +1906,18 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
||||
|
||||
int const compressionShouldSucceed = (
|
||||
(errorCodes[testCaseId] == ZSTD_error_no_error) ||
|
||||
(enableFallback && errorCodes[testCaseId] == ZSTD_error_externalMatchFinder_failed)
|
||||
(enableFallback && errorCodes[testCaseId] == ZSTD_error_sequenceProducer_failed)
|
||||
);
|
||||
|
||||
int const testWithSequenceValidation = (
|
||||
testCases[testCaseId] == EMF_INVALID_OFFSET
|
||||
);
|
||||
|
||||
externalMatchState = testCases[testCaseId];
|
||||
sequenceProducerState = testCases[testCaseId];
|
||||
|
||||
ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_validateSequences, testWithSequenceValidation));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, enableFallback));
|
||||
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
|
||||
|
||||
if (compressionShouldSucceed) {
|
||||
@ -1936,9 +1936,9 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
||||
/* Test compression with external matchfinder + empty src buffer */
|
||||
{
|
||||
size_t res;
|
||||
externalMatchState = EMF_ZERO_SEQS;
|
||||
sequenceProducerState = EMF_ZERO_SEQS;
|
||||
ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, enableFallback));
|
||||
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0);
|
||||
CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
|
||||
CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!");
|
||||
@ -1947,30 +1947,30 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
||||
|
||||
/* Test that reset clears the external matchfinder */
|
||||
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
|
||||
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
|
||||
sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0));
|
||||
CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
|
||||
|
||||
/* Test that registering mFinder == NULL clears the external matchfinder */
|
||||
ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
|
||||
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
|
||||
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
|
||||
ZSTD_registerExternalMatchFinder(zc, NULL, NULL); /* clear the external matchfinder */
|
||||
ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
|
||||
sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0));
|
||||
ZSTD_registerSequenceProducer(zc, NULL, NULL); /* clear the external matchfinder */
|
||||
CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
|
||||
|
||||
/* Test that external matchfinder doesn't interact with older APIs */
|
||||
ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
|
||||
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
|
||||
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
|
||||
ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
|
||||
sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0));
|
||||
CHECK_Z(ZSTD_compressCCtx(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize, 3));
|
||||
|
||||
/* Test that compression returns the correct error with LDM */
|
||||
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
|
||||
{
|
||||
size_t res;
|
||||
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
|
||||
ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable));
|
||||
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
|
||||
CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
|
||||
@ -1985,7 +1985,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
||||
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
|
||||
{
|
||||
size_t res;
|
||||
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
|
||||
ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer);
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, 1));
|
||||
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
|
||||
CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
|
||||
|
Reference in New Issue
Block a user