diff --git a/CHANGELOG b/CHANGELOG index 11cc0d3fb..4010c1ff5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -29,7 +29,7 @@ cli: Fix empty string as argument for `--output-dir-*` (#3220, @embg) cli: Fix decompression memory usage reported by `-vv --long` (#3042, @u1f35c, and #3232, @zengyijing) cli: Fix infinite loop when empty input is passed to trainer (#3081, @terrelln) cli: Fix `--adapt` doesn't work when `--no-progress` is also set (#3354, @terrelln) -api: Support for External matchfinder (#3333, @embg) +api: Support for Block-Level Sequence Producer (#3333, @embg) api: Support for in-place decompression (#3432, @terrelln) api: New `ZSTD_CCtx_setCParams()` function, set all parameters defined in a `ZSTD_compressionParameters` structure (#3403, @Cyan4973) api: Streaming decompression detects incorrect header ID sooner (#3175, @Cyan4973) diff --git a/Makefile b/Makefile index 75a47a308..a7890a5b1 100644 --- a/Makefile +++ b/Makefile @@ -123,7 +123,7 @@ contrib: lib $(MAKE) -C contrib/seekable_format/examples all $(MAKE) -C contrib/seekable_format/tests test $(MAKE) -C contrib/largeNbDicts all - $(MAKE) -C contrib/externalMatchfinder all + $(MAKE) -C contrib/externalSequenceProducer all cd build/single_file_libs/ ; ./build_decoder_test.sh cd build/single_file_libs/ ; ./build_library_test.sh @@ -143,7 +143,7 @@ clean: $(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID) $(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID) $(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID) - $(Q)$(MAKE) -C contrib/externalMatchfinder $@ > $(VOID) + $(Q)$(MAKE) -C contrib/externalSequenceProducer $@ > $(VOID) $(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp* $(Q)$(RM) -r lz4 @echo Cleaning completed diff --git a/contrib/externalMatchfinder/.gitignore b/contrib/externalMatchfinder/.gitignore deleted file mode 100644 index 46357ef58..000000000 --- a/contrib/externalMatchfinder/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# build artifacts -externalMatchfinder diff --git a/contrib/externalMatchfinder/README.md b/contrib/externalMatchfinder/README.md deleted file mode 100644 index cb7d49d97..000000000 --- a/contrib/externalMatchfinder/README.md +++ /dev/null @@ -1,14 +0,0 @@ -externalMatchfinder -===================== - -`externalMatchfinder` is a test tool for the external matchfinder API. -It demonstrates how to use the API to perform a simple round-trip test. - -A sample matchfinder is provided in matchfinder.c, but the user can swap -this out with a different one if desired. The sample matchfinder implements -LZ compression with a 1KB hashtable. Dictionary compression is not currently supported. - -Command line : -``` -externalMatchfinder filename -``` diff --git a/contrib/externalSequenceProducer/.gitignore b/contrib/externalSequenceProducer/.gitignore new file mode 100644 index 000000000..147710aee --- /dev/null +++ b/contrib/externalSequenceProducer/.gitignore @@ -0,0 +1,2 @@ +# build artifacts +externalSequenceProducer diff --git a/contrib/externalMatchfinder/Makefile b/contrib/externalSequenceProducer/Makefile similarity index 87% rename from contrib/externalMatchfinder/Makefile rename to contrib/externalSequenceProducer/Makefile index 2baa558cb..0591ae01b 100644 --- a/contrib/externalMatchfinder/Makefile +++ b/contrib/externalSequenceProducer/Makefile @@ -23,11 +23,11 @@ DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wredundant-decls CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) -default: externalMatchfinder +default: externalSequenceProducer -all: externalMatchfinder +all: externalSequenceProducer -externalMatchfinder: matchfinder.c main.c $(LIBZSTD) +externalSequenceProducer: sequence_producer.c main.c $(LIBZSTD) $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ .PHONY: $(LIBZSTD) @@ -37,4 +37,4 @@ $(LIBZSTD): clean: $(RM) *.o $(MAKE) -C $(LIBDIR) clean > /dev/null - $(RM) externalMatchfinder + $(RM) externalSequenceProducer diff --git a/contrib/externalSequenceProducer/README.md b/contrib/externalSequenceProducer/README.md new file mode 100644 index 000000000..c16a17007 --- /dev/null +++ b/contrib/externalSequenceProducer/README.md @@ -0,0 +1,14 @@ +externalSequenceProducer +===================== + +`externalSequenceProducer` is a test tool for the Block-Level Sequence Producer API. +It demonstrates how to use the API to perform a simple round-trip test. + +A sample sequence producer is provided in sequence_producer.c, but the user can swap +this out with a different one if desired. The sample sequence producer implements +LZ parsing with a 1KB hashtable. Dictionary-based parsing is not currently supported. + +Command line : +``` +externalSequenceProducer filename +``` diff --git a/contrib/externalMatchfinder/main.c b/contrib/externalSequenceProducer/main.c similarity index 89% rename from contrib/externalMatchfinder/main.c rename to contrib/externalSequenceProducer/main.c index 6971a46c7..e67e29538 100644 --- a/contrib/externalMatchfinder/main.c +++ b/contrib/externalSequenceProducer/main.c @@ -16,7 +16,7 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zstd_errors.h" -#include "matchfinder.h" // simpleExternalMatchFinder +#include "sequence_producer.h" // simpleSequenceProducer #define CHECK(res) \ do { \ @@ -28,23 +28,23 @@ do { \ int main(int argc, char *argv[]) { if (argc != 2) { - printf("Usage: exampleMatchfinder \n"); + printf("Usage: externalSequenceProducer \n"); return 1; } ZSTD_CCtx* const zc = ZSTD_createCCtx(); - int simpleExternalMatchState = 0xdeadbeef; + int simpleSequenceProducerState = 0xdeadbeef; // Here is the crucial bit of code! - ZSTD_registerExternalMatchFinder( + ZSTD_registerSequenceProducer( zc, - &simpleExternalMatchState, - simpleExternalMatchFinder + &simpleSequenceProducerState, + simpleSequenceProducer ); { - size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1); + size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 1); CHECK(res); } diff --git a/contrib/externalMatchfinder/matchfinder.c b/contrib/externalSequenceProducer/sequence_producer.c similarity index 94% rename from contrib/externalMatchfinder/matchfinder.c rename to contrib/externalSequenceProducer/sequence_producer.c index f119193ef..60a2f9572 100644 --- a/contrib/externalMatchfinder/matchfinder.c +++ b/contrib/externalSequenceProducer/sequence_producer.c @@ -9,15 +9,15 @@ */ #include "zstd_compress_internal.h" -#include "matchfinder.h" +#include "sequence_producer.h" #define HSIZE 1024 static U32 const HLOG = 10; static U32 const MLS = 4; static U32 const BADIDX = 0xffffffff; -size_t simpleExternalMatchFinder( - void* externalMatchState, +size_t simpleSequenceProducer( + void* sequenceProducerState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, @@ -31,7 +31,7 @@ size_t simpleExternalMatchFinder( size_t seqCount = 0; U32 hashTable[HSIZE]; - (void)externalMatchState; + (void)sequenceProducerState; (void)dict; (void)dictSize; (void)outSeqsCapacity; diff --git a/contrib/externalMatchfinder/matchfinder.h b/contrib/externalSequenceProducer/sequence_producer.h similarity index 91% rename from contrib/externalMatchfinder/matchfinder.h rename to contrib/externalSequenceProducer/sequence_producer.h index f8ba1c965..19f9982ac 100644 --- a/contrib/externalMatchfinder/matchfinder.h +++ b/contrib/externalSequenceProducer/sequence_producer.h @@ -14,8 +14,8 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" -size_t simpleExternalMatchFinder( - void* externalMatchState, +size_t simpleSequenceProducer( + void* sequenceProducerState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 0cff6b80b..075fc5ef4 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -54,7 +54,7 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; - case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code"; + case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code"; case PREFIX(externalSequences_invalid): return "External sequences are not valid"; case PREFIX(maxCode): default: return notErrorCode; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 5b89ca6d2..b55f684cd 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -615,7 +615,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = (int)ZSTD_ps_disable; return bounds; - case ZSTD_c_enableMatchFinderFallback: + case ZSTD_c_enableSeqProducerFallback: bounds.lowerBound = 0; bounds.upperBound = 1; return bounds; @@ -695,7 +695,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_enableMatchFinderFallback: + case ZSTD_c_enableSeqProducerFallback: case ZSTD_c_maxBlockSize: case ZSTD_c_searchForExternalRepcodes: default: @@ -754,7 +754,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: case ZSTD_c_prefetchCDictTables: - case ZSTD_c_enableMatchFinderFallback: + case ZSTD_c_enableSeqProducerFallback: case ZSTD_c_maxBlockSize: case ZSTD_c_searchForExternalRepcodes: break; @@ -989,8 +989,8 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; return CCtxParams->prefetchCDictTables; - case ZSTD_c_enableMatchFinderFallback: - BOUNDCHECK(ZSTD_c_enableMatchFinderFallback, value); + case ZSTD_c_enableSeqProducerFallback: + BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value); CCtxParams->enableMatchFinderFallback = value; return CCtxParams->enableMatchFinderFallback; @@ -1140,7 +1140,7 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_prefetchCDictTables: *value = (int)CCtxParams->prefetchCDictTables; break; - case ZSTD_c_enableMatchFinderFallback: + case ZSTD_c_enableSeqProducerFallback: *value = CCtxParams->enableMatchFinderFallback; break; case ZSTD_c_maxBlockSize: @@ -1610,8 +1610,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, /* Helper function for calculating memory requirements. * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */ -static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useExternalMatchFinder) { - U32 const divider = (minMatch==3 || useExternalMatchFinder) ? 3 : 4; +static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) { + U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4; return blockSize / divider; } @@ -1623,12 +1623,12 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const size_t buffInSize, const size_t buffOutSize, const U64 pledgedSrcSize, - int useExternalMatchFinder, + int useSequenceProducer, size_t maxBlockSize) { size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize); - size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder); + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer); size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); @@ -1648,7 +1648,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); - size_t const externalSeqSpace = useExternalMatchFinder + size_t const externalSeqSpace = useSequenceProducer ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) : 0; @@ -1679,7 +1679,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder, params->maxBlockSize); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) @@ -1740,7 +1740,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) return ZSTD_estimateCCtxSize_usingCCtxParams_internal( &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, - ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder, params->maxBlockSize); + ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize); } } @@ -2024,7 +2024,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); size_t const blockSize = MIN(params->maxBlockSize, windowSize); - size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useExternalMatchFinder); + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer); size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; @@ -2041,7 +2041,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, - buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder, params->maxBlockSize); + buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize); int resizeWorkspace; FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); @@ -2155,7 +2155,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, } /* reserve space for block-level external sequences */ - if (params->useExternalMatchFinder) { + if (params->useSequenceProducer) { size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq; zc->externalMatchCtx.seqBuffer = @@ -3022,26 +3022,26 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthType = ZSTD_llt_none; } -/* ZSTD_postProcessExternalMatchFinderResult() : +/* ZSTD_postProcessSequenceProducerResult() : * Validates and post-processes sequences obtained through the external matchfinder API: * - Checks whether nbExternalSeqs represents an error condition. * - Appends a block delimiter to outSeqs if one is not already present. * See zstd.h for context regarding block delimiters. * Returns the number of sequences after post-processing, or an error code. */ -static size_t ZSTD_postProcessExternalMatchFinderResult( +static size_t ZSTD_postProcessSequenceProducerResult( ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize ) { RETURN_ERROR_IF( nbExternalSeqs > outSeqsCapacity, - externalMatchFinder_failed, - "External matchfinder returned error code %lu", + sequenceProducer_failed, + "External sequence producer returned error code %lu", (unsigned long)nbExternalSeqs ); RETURN_ERROR_IF( nbExternalSeqs == 0 && srcSize > 0, - externalMatchFinder_failed, - "External matchfinder produced zero sequences for a non-empty src buffer!" + sequenceProducer_failed, + "Got zero sequences from external sequence producer for a non-empty src buffer!" ); if (srcSize == 0) { @@ -3061,7 +3061,7 @@ static size_t ZSTD_postProcessExternalMatchFinderResult( * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ RETURN_ERROR_IF( nbExternalSeqs == outSeqsCapacity, - externalMatchFinder_failed, + sequenceProducer_failed, "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" ); @@ -3139,9 +3139,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* External matchfinder + LDM is technically possible, just not implemented yet. * We need to revisit soon and implement it. */ RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchFinder, + zc->appliedParams.useSequenceProducer, parameter_combination_unsupported, - "Long-distance matching with external matchfinder enabled is not currently supported." + "Long-distance matching with external sequence producer enabled is not currently supported." ); /* Updates ldmSeqStore.pos */ @@ -3158,9 +3158,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* External matchfinder + LDM is technically possible, just not implemented yet. * We need to revisit soon and implement it. */ RETURN_ERROR_IF( - zc->appliedParams.useExternalMatchFinder, + zc->appliedParams.useSequenceProducer, parameter_combination_unsupported, - "Long-distance matching with external matchfinder enabled is not currently supported." + "Long-distance matching with external sequence producer enabled is not currently supported." ); ldmSeqStore.seq = zc->ldmSequences; @@ -3177,7 +3177,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); - } else if (zc->appliedParams.useExternalMatchFinder) { + } else if (zc->appliedParams.useSequenceProducer) { assert( zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize) ); @@ -3195,7 +3195,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) windowSize ); - size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult( + size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult( zc->externalMatchCtx.seqBuffer, nbExternalSeqs, zc->externalMatchCtx.seqBufferCapacity, @@ -3217,7 +3217,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) "Failed to copy external sequences to seqStore!" ); ms->ldmSeqStore = NULL; - DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs); + DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs); return ZSTDbss_compress; } @@ -3233,7 +3233,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ms->ldmSeqStore = NULL; DEBUGLOG( 5, - "External matchfinder returned error code %lu. Falling back to internal matchfinder.", + "External sequence producer returned error code %lu. Falling back to internal parser.", (unsigned long)nbExternalSeqs ); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); @@ -6033,9 +6033,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, #ifdef ZSTD_MULTITHREAD /* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */ RETURN_ERROR_IF( - params.useExternalMatchFinder == 1 && params.nbWorkers >= 1, + params.useSequenceProducer == 1 && params.nbWorkers >= 1, parameter_combination_unsupported, - "External matchfinder isn't supported with nbWorkers >= 1" + "External sequence producer isn't supported with nbWorkers >= 1" ); if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { @@ -6251,7 +6251,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, */ static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch, - size_t posInSrc, U32 windowLog, size_t dictSize, int useExternalMatchFinder) + size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer) { U32 const windowSize = 1u << windowLog; /* posInSrc represents the amount of data the decoder would decode up to this point. @@ -6260,7 +6260,7 @@ ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch, * window size. After output surpasses windowSize, we're limited to windowSize offsets again. */ size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; - size_t const matchLenLowerBound = (minMatch == 3 || useExternalMatchFinder) ? 3 : 4; + size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4; RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!"); /* Validate maxNbSeq is large enough for the given matchLength and minMatch */ RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch"); @@ -6325,7 +6325,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, if (cctx->appliedParams.validateSequences) { seqPos->posInSrc += litLength + matchLength; FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useExternalMatchFinder), + cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer), "Sequence validation failed"); } RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, @@ -6463,7 +6463,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* if (cctx->appliedParams.validateSequences) { seqPos->posInSrc += litLength + matchLength; FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useExternalMatchFinder), + cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer), "Sequence validation failed"); } DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); @@ -6908,9 +6908,9 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); } -void ZSTD_registerExternalMatchFinder( +void ZSTD_registerSequenceProducer( ZSTD_CCtx* zc, void* mState, - ZSTD_externalMatchFinder_F* mFinder + ZSTD_sequenceProducer_F* mFinder ) { if (mFinder != NULL) { ZSTD_externalMatchCtx emctx; @@ -6919,9 +6919,9 @@ void ZSTD_registerExternalMatchFinder( emctx.seqBuffer = NULL; emctx.seqBufferCapacity = 0; zc->externalMatchCtx = emctx; - zc->requestedParams.useExternalMatchFinder = 1; + zc->requestedParams.useSequenceProducer = 1; } else { ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx)); - zc->requestedParams.useExternalMatchFinder = 0; + zc->requestedParams.useSequenceProducer = 0; } } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index dac7a0aa2..cbb85e527 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -353,8 +353,8 @@ struct ZSTD_CCtx_params_s { /* Indicates whether an external matchfinder has been referenced. * Users can't set this externally. - * It is set internally in ZSTD_registerExternalMatchFinder(). */ - int useExternalMatchFinder; + * It is set internally in ZSTD_registerSequenceProducer(). */ + int useSequenceProducer; /* Adjust the max block size*/ size_t maxBlockSize; @@ -395,7 +395,7 @@ typedef struct { /* Context for block-level external matchfinder API */ typedef struct { void* mState; - ZSTD_externalMatchFinder_F* mFinder; + ZSTD_sequenceProducer_F* mFinder; ZSTD_Sequence* seqBuffer; size_t seqBufferCapacity; } ZSTD_externalMatchCtx; diff --git a/lib/zstd.h b/lib/zstd.h index 91a3679a1..95aac0737 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -478,7 +478,7 @@ typedef enum { * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder * ZSTD_c_prefetchCDictTables - * ZSTD_c_enableMatchFinderFallback + * ZSTD_c_enableSeqProducerFallback * ZSTD_c_maxBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; @@ -565,7 +565,7 @@ typedef enum { * They will be used to compress next frame. * Resetting session never fails. * - The parameters : changes all parameters back to "default". - * This also removes any reference to any dictionary or external matchfinder. + * This also removes any reference to any dictionary or external sequence producer. * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) * - Both : similar to resetting the session, followed by resetting parameters. @@ -1627,8 +1627,8 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); * Note : only single-threaded compression is supported. * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. * - * Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the external matchfinder API at this time. - * Size estimates assume that no external matchfinder is registered. + * Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time. + * Size estimates assume that no external sequence producer is registered. */ ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); @@ -1650,8 +1650,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); * In this case, get total size by adding ZSTD_estimate?DictSize * Note 2 : only single-threaded compression is supported. * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the external matchfinder API at this time. - * Size estimates assume that no external matchfinder is registered. + * Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. + * Size estimates assume that no external sequence producer is registered. */ ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); @@ -2113,19 +2113,19 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 -/* ZSTD_c_enableMatchFinderFallback +/* ZSTD_c_enableSeqProducerFallback * Allowed values are 0 (disable) and 1 (enable). The default setting is 0. * - * Controls whether zstd will fall back to an internal matchfinder if an - * external matchfinder is registered and returns an error code. This fallback is - * block-by-block: the internal matchfinder will only be called for blocks where - * the external matchfinder returns an error code. Fallback compression will + * Controls whether zstd will fall back to an internal sequence producer if an + * external sequence producer is registered and returns an error code. This fallback + * is block-by-block: the internal sequence producer will only be called for blocks + * where the external sequence producer returns an error code. Fallback parsing will * follow any other cParam settings, such as compression level, the same as in a * normal (fully-internal) compression operation. * - * The user is strongly encouraged to read the full external matchfinder API + * The user is strongly encouraged to read the full Block-Level Sequence Producer API * documentation (below) before setting this parameter. */ -#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17 +#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17 /* ZSTD_c_maxBlockSize * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). @@ -2141,12 +2141,13 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo /* ZSTD_c_searchForExternalRepcodes * This parameter affects how zstd parses external sequences, such as sequences - * provided through the compressSequences() API or from an external matchfinder. + * provided through the compressSequences() API or from an external block-level + * sequence producer. * * If set to ZSTD_ps_enable, the library will check for repeated offsets in * external sequences, even if those repcodes are not explicitly indicated in * the "rep" field. Note that this is the only way to exploit repcode matches - * while using compressSequences() or an external matchfinder, since zstd + * while using compressSequences() or an external sequence producer, since zstd * currently ignores the "rep" field of external sequences. * * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in @@ -2805,43 +2806,52 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ -/* ********************** EXTERNAL MATCHFINDER API ********************** +/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API ********************* * * *** OVERVIEW *** - * This API allows users to replace the zstd internal block-level matchfinder - * with an external matchfinder function. Potential applications of the API - * include hardware-accelerated matchfinders and matchfinders specialized to - * particular types of data. + * The Block-Level Sequence Producer API allows users to provide their own custom + * sequence producer which libzstd invokes to process each block. The produced list + * of sequences (literals and matches) is then post-processed by libzstd to produce + * valid compressed blocks. * - * See contrib/externalMatchfinder for an example program employing the - * external matchfinder API. + * This block-level offload API is a more granular complement of the existing + * frame-level offload API compressSequences() (introduced in v1.5.1). It offers + * an easier migration story for applications already integrated with libzstd: the + * user application continues to invoke the same compression functions + * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits + * from the specific advantages of the external sequence producer. For example, + * the sequence producer could be tuned to take advantage of known characteristics + * of the input, to offer better speed / ratio, or could leverage hardware + * acceleration not available within libzstd itself. + * + * See contrib/externalSequenceProducer for an example program employing the + * Block-Level Sequence Producer API. * * *** USAGE *** * The user is responsible for implementing a function of type - * ZSTD_externalMatchFinder_F. For each block, zstd will pass the following + * ZSTD_sequenceProducer_F. For each block, zstd will pass the following * arguments to the user-provided function: * - * - externalMatchState: a pointer to a user-managed state for the external - * matchfinder. + * - sequenceProducerState: a pointer to a user-managed state for the sequence + * producer. * - * - outSeqs, outSeqsCapacity: an output buffer for sequences produced by the - * external matchfinder. outSeqsCapacity is guaranteed >= - * ZSTD_sequenceBound(srcSize). The memory backing outSeqs is managed by - * the CCtx. + * - outSeqs, outSeqsCapacity: an output buffer for the sequence producer. + * outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory + * backing outSeqs is managed by the CCtx. * - * - src, srcSize: an input buffer which the external matchfinder must parse - * into sequences. srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. + * - src, srcSize: an input buffer for the sequence producer to parse. + * srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. * - * - dict, dictSize: a history buffer, which may be empty, which the external - * matchfinder may reference as it produces sequences for the src buffer. - * Currently, zstd will always pass dictSize == 0 into external matchfinders, - * but this will change in the future. + * - dict, dictSize: a history buffer, which may be empty, which the sequence + * producer may reference as it parses the src buffer. Currently, zstd will + * always pass dictSize == 0 into external sequence producers, but this will + * change in the future. * * - compressionLevel: a signed integer representing the zstd compression level - * set by the user for the current operation. The external matchfinder may - * choose to use this information to change its compression strategy and - * speed/ratio tradeoff. Note: The compression level does not reflect zstd - * parameters set through the advanced API. + * set by the user for the current operation. The sequence producer may choose + * to use this information to change its compression strategy and speed/ratio + * tradeoff. Note: the compression level does not reflect zstd parameters set + * through the advanced API. * * - windowSize: a size_t representing the maximum allowed offset for external * sequences. Note that sequence offsets are sometimes allowed to exceed the @@ -2851,7 +2861,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc * The user-provided function shall return a size_t representing the number of * sequences written to outSeqs. This return value will be treated as an error * code if it is greater than outSeqsCapacity. The return value must be non-zero - * if srcSize is non-zero. The ZSTD_EXTERNAL_MATCHFINDER_ERROR macro is provided + * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided * for convenience, but any value greater than outSeqsCapacity will be treated as * an error code. * @@ -2859,68 +2869,71 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc * written to outSeqs must be a valid parse of the src buffer. Data corruption may * occur if the parse is not valid. A parse is defined to be valid if the * following conditions hold: - * - The sum of matchLengths and literalLengths is equal to srcSize. - * - All sequences in the parse have matchLength != 0, except for the final - * sequence. matchLength is not constrained for the final sequence. - * - All offsets respect the windowSize parameter as specified in + * - The sum of matchLengths and literalLengths must equal srcSize. + * - All sequences in the parse, except for the final sequence, must have + * matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have + * matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0. + * - All offsets must respect the windowSize parameter as specified in * doc/zstd_compression_format.md. + * - If the final sequence has matchLength == 0, it must also have offset == 0. * * zstd will only validate these conditions (and fail compression if they do not * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence * validation has a performance cost. * * If the user-provided function returns an error, zstd will either fall back - * to an internal matchfinder or fail the compression operation. The user can - * choose between the two behaviors by setting the - * ZSTD_c_enableMatchFinderFallback cParam. Fallback compression will follow any - * other cParam settings, such as compression level, the same as in a normal - * compression operation. + * to an internal sequence producer or fail the compression operation. The user can + * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback + * cParam. Fallback compression will follow any other cParam settings, such as + * compression level, the same as in a normal compression operation. * - * The user shall instruct zstd to use a particular ZSTD_externalMatchFinder_F - * function by calling ZSTD_registerExternalMatchFinder(cctx, externalMatchState, - * externalMatchFinder). This setting will persist until the next parameter reset - * of the CCtx. + * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F + * function by calling + * ZSTD_registerSequenceProducer(cctx, + * sequenceProducerState, + * sequenceProducer) + * This setting will persist until the next parameter reset of the CCtx. * - * The externalMatchState must be initialized by the user before calling - * ZSTD_registerExternalMatchFinder. The user is responsible for destroying the - * externalMatchState. + * The sequenceProducerState must be initialized by the user before calling + * ZSTD_registerSequenceProducer(). The user is responsible for destroying the + * sequenceProducerState. * * *** LIMITATIONS *** - * External matchfinders are compatible with all zstd compression APIs which respect - * advanced parameters. However, there are three limitations: + * This API is compatible with all zstd compression APIs which respect advanced parameters. + * However, there are three limitations: * - * First, the ZSTD_c_enableLongDistanceMatching cParam is not supported. - * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an - * external matchfinder. - * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in - * some cases (see its documentation for details). Users must explicitly set - * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an - * external matchfinder is registered. + * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported. + * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level + * external sequence producer. + * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some + * cases (see its documentation for details). Users must explicitly set + * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external + * sequence producer is registered. * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should - * check the docs on ZSTD_c_enableLongDistanceMatching whenever the external - * matchfinder API is used in conjunction with advanced settings (like windowLog). + * check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence + * Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog). * - * Second, history buffers are not supported. Concretely, zstd will always pass - * dictSize == 0 to the external matchfinder (for now). This has two implications: - * - Dictionaries are not supported. Compression will *not* fail if the user + * Second, history buffers are not currently supported. Concretely, zstd will always pass + * dictSize == 0 to the external sequence producer (for now). This has two implications: + * - Dictionaries are not currently supported. Compression will *not* fail if the user * references a dictionary, but the dictionary won't have any effect. - * - Stream history is not supported. All compression APIs, including streaming - * APIs, work with the external matchfinder, but the external matchfinder won't - * receive any history from the previous block. Each block is an independent chunk. + * - Stream history is not currently supported. All advanced compression APIs, including + * streaming APIs, work with external sequence producers, but each block is treated as + * an independent chunk without history from previous blocks. * - * Third, multi-threading within a single compression is not supported. In other words, - * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external matchfinder is registered. + * Third, multi-threading within a single compression is not currently supported. In other words, + * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered. * Multi-threading across compressions is fine: simply create one CCtx per thread. * * Long-term, we plan to overcome all three limitations. There is no technical blocker to * overcoming them. It is purely a question of engineering effort. */ -#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1)) +#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1)) -typedef size_t ZSTD_externalMatchFinder_F ( - void* externalMatchState, +typedef size_t ZSTD_sequenceProducer_F ( + void* sequenceProducerState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, @@ -2928,32 +2941,30 @@ typedef size_t ZSTD_externalMatchFinder_F ( size_t windowSize ); -/*! ZSTD_registerExternalMatchFinder() : - * Instruct zstd to use an external matchfinder function. +/*! ZSTD_registerSequenceProducer() : + * Instruct zstd to use a block-level external sequence producer function. * - * The externalMatchState must be initialized by the caller, and the caller is + * The sequenceProducerState must be initialized by the caller, and the caller is * responsible for managing its lifetime. This parameter is sticky across * compressions. It will remain set until the user explicitly resets compression * parameters. * - * External matchfinder registration is considered to be an "advanced parameter", - * part of the "advanced API". This means it will only have an effect on - * compression APIs which respect advanced parameters, such as compress2() and - * compressStream(). Older compression APIs such as compressCCtx(), which predate - * the introduction of "advanced parameters", will ignore any external matchfinder - * setting. + * Sequence producer registration is considered to be an "advanced parameter", + * part of the "advanced API". This means it will only have an effect on compression + * APIs which respect advanced parameters, such as compress2() and compressStream2(). + * Older compression APIs such as compressCCtx(), which predate the introduction of + * "advanced parameters", will ignore any external sequence producer setting. * - * The external matchfinder can be "cleared" by registering a NULL external - * matchfinder function pointer. This removes all limitations described above in - * the "LIMITATIONS" section of the API docs. + * The sequence producer can be "cleared" by registering a NULL function pointer. This + * removes all limitations described above in the "LIMITATIONS" section of the API docs. * - * The user is strongly encouraged to read the full API documentation (above) - * before calling this function. */ + * The user is strongly encouraged to read the full API documentation (above) before + * calling this function. */ ZSTDLIB_STATIC_API void -ZSTD_registerExternalMatchFinder( +ZSTD_registerSequenceProducer( ZSTD_CCtx* cctx, - void* externalMatchState, - ZSTD_externalMatchFinder_F* externalMatchFinder + void* sequenceProducerState, + ZSTD_sequenceProducer_F* sequenceProducer ); #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ diff --git a/lib/zstd_errors.h b/lib/zstd_errors.h index 6a66bedcb..dc75eeeba 100644 --- a/lib/zstd_errors.h +++ b/lib/zstd_errors.h @@ -95,7 +95,7 @@ typedef enum { ZSTD_error_seekableIO = 102, ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_srcBuffer_wrong = 105, - ZSTD_error_externalMatchFinder_failed = 106, + ZSTD_error_sequenceProducer_failed = 106, ZSTD_error_externalSequences_invalid = 107, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; diff --git a/tests/external_matchfinder.c b/tests/external_matchfinder.c index 97c47caff..76ad41126 100644 --- a/tests/external_matchfinder.c +++ b/tests/external_matchfinder.c @@ -17,8 +17,8 @@ static U32 const HLOG = 10; static U32 const MLS = 4; static U32 const BADIDX = 0xffffffff; -static size_t simpleExternalMatchFinder( - void* externalMatchState, +static size_t simpleSequenceProducer( + void* sequenceProducerState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, @@ -32,7 +32,7 @@ static size_t simpleExternalMatchFinder( size_t seqCount = 0; U32 hashTable[HSIZE]; - (void)externalMatchState; + (void)sequenceProducerState; (void)dict; (void)dictSize; (void)outSeqsCapacity; @@ -80,15 +80,15 @@ static size_t simpleExternalMatchFinder( return seqCount; } -size_t zstreamExternalMatchFinder( - void* externalMatchState, +size_t zstreamSequenceProducer( + void* sequenceProducerState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel, size_t windowSize ) { - EMF_testCase const testCase = *((EMF_testCase*)externalMatchState); + EMF_testCase const testCase = *((EMF_testCase*)sequenceProducerState); memset(outSeqs, 0, outSeqsCapacity); switch (testCase) { @@ -100,8 +100,8 @@ size_t zstreamExternalMatchFinder( outSeqs[0].litLength = (U32)(srcSize); return 1; case EMF_LOTS_OF_SEQS: - return simpleExternalMatchFinder( - externalMatchState, + return simpleSequenceProducer( + sequenceProducerState, outSeqs, outSeqsCapacity, src, srcSize, dict, dictSize, @@ -135,6 +135,6 @@ size_t zstreamExternalMatchFinder( return outSeqsCapacity + 1; case EMF_BIG_ERROR: default: - return ZSTD_EXTERNAL_MATCHFINDER_ERROR; + return ZSTD_SEQUENCE_PRODUCER_ERROR; } } diff --git a/tests/external_matchfinder.h b/tests/external_matchfinder.h index 7550bbceb..e38dc25ca 100644 --- a/tests/external_matchfinder.h +++ b/tests/external_matchfinder.h @@ -27,8 +27,8 @@ typedef enum { EMF_INVALID_LAST_LITS = 8 } EMF_testCase; -size_t zstreamExternalMatchFinder( - void* externalMatchState, +size_t zstreamSequenceProducer( + void* sequenceProducerState, ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 4b5102ef1..bbb262add 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -35,8 +35,8 @@ PRGDIR = ../../programs CONTRIBDIR = ../../contrib # TODO(embg) make it possible to plug in an arbitrary matchfinder as a .o file -MATCHFINDER_DIR = $(CONTRIBDIR)/externalMatchfinder -MATCHFINDER_SRC = $(MATCHFINDER_DIR)/matchfinder.c +MATCHFINDER_DIR = $(CONTRIBDIR)/externalSequenceProducer +MATCHFINDER_SRC = $(MATCHFINDER_DIR)/sequence_producer.c FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \ diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 6b833471e..411b6391c 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -17,7 +17,7 @@ #include "fuzz_helpers.h" #include "zstd.h" #include "zdict.h" -#include "matchfinder.h" +#include "sequence_producer.h" const int kMinClevel = -3; const int kMaxClevel = 19; @@ -71,13 +71,13 @@ ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer) return params; } -static void setExternalMatchFinderParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) { - ZSTD_registerExternalMatchFinder( +static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) { + ZSTD_registerSequenceProducer( cctx, NULL, - simpleExternalMatchFinder + simpleSequenceProducer ); - setRand(cctx, ZSTD_c_enableMatchFinderFallback, 0, 1, producer); + setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer); FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0)); FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable)); } @@ -138,9 +138,9 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer } if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) { - setExternalMatchFinderParams(cctx, producer); + setSequenceProducerParams(cctx, producer); } else { - ZSTD_registerExternalMatchFinder(cctx, NULL, NULL); + ZSTD_registerSequenceProducer(cctx, NULL, NULL); } } diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index aff847b4d..14c4af82f 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -40,7 +40,7 @@ #include "seqgen.h" #include "util.h" #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ -#include "external_matchfinder.h" /* zstreamExternalMatchFinder, EMF_testCase */ +#include "external_matchfinder.h" /* zstreamSequenceProducer, EMF_testCase */ /*-************************************ * Constants @@ -1856,14 +1856,14 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) } DISPLAYLEVEL(3, "OK \n"); - DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++); + DISPLAYLEVEL(3, "test%3i : Block-Level External Sequence Producer API: ", testNb++); { size_t const dstBufSize = ZSTD_compressBound(CNBufferSize); BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize)); size_t const checkBufSize = CNBufferSize; BYTE* const checkBuf = (BYTE*)malloc(checkBufSize); int enableFallback; - EMF_testCase externalMatchState; + EMF_testCase sequenceProducerState; CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed"); @@ -1871,7 +1871,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) /* Reference external matchfinder outside the test loop to * check that the reference is preserved across compressions */ - ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); + ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer); for (enableFallback = 0; enableFallback <= 1; enableFallback++) { size_t testCaseId; @@ -1892,9 +1892,9 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) ZSTD_ErrorCode const errorCodes[] = { ZSTD_error_no_error, ZSTD_error_no_error, - ZSTD_error_externalMatchFinder_failed, - ZSTD_error_externalMatchFinder_failed, - ZSTD_error_externalMatchFinder_failed, + ZSTD_error_sequenceProducer_failed, + ZSTD_error_sequenceProducer_failed, + ZSTD_error_sequenceProducer_failed, ZSTD_error_externalSequences_invalid, ZSTD_error_externalSequences_invalid, ZSTD_error_externalSequences_invalid, @@ -1906,18 +1906,18 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) int const compressionShouldSucceed = ( (errorCodes[testCaseId] == ZSTD_error_no_error) || - (enableFallback && errorCodes[testCaseId] == ZSTD_error_externalMatchFinder_failed) + (enableFallback && errorCodes[testCaseId] == ZSTD_error_sequenceProducer_failed) ); int const testWithSequenceValidation = ( testCases[testCaseId] == EMF_INVALID_OFFSET ); - externalMatchState = testCases[testCaseId]; + sequenceProducerState = testCases[testCaseId]; ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_validateSequences, testWithSequenceValidation)); - CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, enableFallback)); res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); if (compressionShouldSucceed) { @@ -1936,9 +1936,9 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) /* Test compression with external matchfinder + empty src buffer */ { size_t res; - externalMatchState = EMF_ZERO_SEQS; + sequenceProducerState = EMF_ZERO_SEQS; ZSTD_CCtx_reset(zc, ZSTD_reset_session_only); - CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback)); + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, enableFallback)); res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0); CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res)); CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!"); @@ -1947,30 +1947,30 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) /* Test that reset clears the external matchfinder */ CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters)); - externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ - CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); + sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0)); CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); /* Test that registering mFinder == NULL clears the external matchfinder */ ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); - ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); - externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ - CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); - ZSTD_registerExternalMatchFinder(zc, NULL, NULL); /* clear the external matchfinder */ + ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer); + sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */ + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0)); + ZSTD_registerSequenceProducer(zc, NULL, NULL); /* clear the external matchfinder */ CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize)); /* Test that external matchfinder doesn't interact with older APIs */ ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters); - ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); - externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */ - CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0)); + ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer); + sequenceProducerState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */ + CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 0)); CHECK_Z(ZSTD_compressCCtx(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize, 3)); /* Test that compression returns the correct error with LDM */ CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters)); { size_t res; - ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); + ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer); CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)); res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!"); @@ -1985,7 +1985,7 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests) CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters)); { size_t res; - ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder); + ZSTD_registerSequenceProducer(zc, &sequenceProducerState, zstreamSequenceProducer); CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, 1)); res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize); CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");