1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-08 17:22:10 +03:00

ZSTD_SequenceCopier_f no returns the nb of bytes consumed from input

which feels much more natural
This commit is contained in:
Yann Collet
2024-12-10 17:51:20 -08:00
parent 41b1ed8262
commit f0f8030870
3 changed files with 30 additions and 29 deletions

View File

@@ -1315,8 +1315,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr,
</p></pre><BR> </p></pre><BR>
<pre><b>typedef enum { <pre><b>typedef enum {
ZSTD_sf_noBlockDelimiters = 0, </b>/* Representation of ZSTD_Sequence has no block delimiters, sequences only */<b> ZSTD_sf_noBlockDelimiters = 0, </b>/* ZSTD_Sequence[] has no block delimiters, just sequences */<b>
ZSTD_sf_explicitBlockDelimiters = 1 </b>/* Representation of ZSTD_Sequence contains explicit block delimiters */<b> ZSTD_sf_explicitBlockDelimiters = 1 </b>/* ZSTD_Sequence[] contains explicit block delimiters */<b>
} ZSTD_SequenceFormat_e; } ZSTD_SequenceFormat_e;
</b></pre><BR> </b></pre><BR>
<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); <pre><b>ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);

View File

@@ -6624,7 +6624,7 @@ static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32
* storing the sequences it reads, until it reaches a block delimiter. * storing the sequences it reads, until it reaches a block delimiter.
* Note that the block delimiter includes the last literals of the block. * Note that the block delimiter includes the last literals of the block.
* @blockSize must be == sum(sequence_lengths). * @blockSize must be == sum(sequence_lengths).
* @returns 0 on success, and a ZSTD_error otherwise. * @returns @blockSize on success, and a ZSTD_error otherwise.
*/ */
static size_t static size_t
ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx, ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx,
@@ -6711,21 +6711,19 @@ ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx,
} }
RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!"); RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");
seqPos->idx = idx+1; seqPos->idx = idx+1;
return 0; return blockSize;
} }
/* /*
* This function attempts to scan through blockSize bytes * This function attempts to scan through @blockSize bytes in @src
* represented by the sequences in @inSeqs, * represented by the sequences in @inSeqs,
* storing any (partial) sequences. * storing any (partial) sequences.
* *
* @returns the number of bytes to move the current read position back by. * Occasionally, we may want to reduce the actual number of bytes consumed from @src
* Only non-zero if we ended up splitting a sequence. * to avoid splitting a match, notably if it would produce a match smaller than MINMATCH.
* Otherwise, it may return a ZSTD error if something went wrong.
* *
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to * @returns the number of bytes consumed from @src, necessarily <= @blockSize.
* avoid splitting a match, or to avoid splitting a match such that it would produce a match * Otherwise, it may return a ZSTD error if something went wrong.
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
*/ */
static size_t static size_t
ZSTD_transferSequences_noDelim(ZSTD_CCtx* cctx, ZSTD_transferSequences_noDelim(ZSTD_CCtx* cctx,
@@ -6738,8 +6736,9 @@ ZSTD_transferSequences_noDelim(ZSTD_CCtx* cctx,
U32 startPosInSequence = seqPos->posInSequence; U32 startPosInSequence = seqPos->posInSequence;
U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
size_t dictSize; size_t dictSize;
BYTE const* ip = (BYTE const*)(src); const BYTE* const istart = (const BYTE*)(src);
BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ const BYTE* ip = istart;
const BYTE* iend = istart + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */
Repcodes_t updatedRepcodes; Repcodes_t updatedRepcodes;
U32 bytesAdjustment = 0; U32 bytesAdjustment = 0;
U32 finalMatchSplit = 0; U32 finalMatchSplit = 0;
@@ -6842,21 +6841,20 @@ ZSTD_transferSequences_noDelim(ZSTD_CCtx* cctx,
iend -= bytesAdjustment; iend -= bytesAdjustment;
if (ip != iend) { if (ip != iend) {
/* Store any last literals */ /* Store any last literals */
U32 lastLLSize = (U32)(iend - ip); U32 const lastLLSize = (U32)(iend - ip);
assert(ip <= iend); assert(ip <= iend);
DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
seqPos->posInSrc += lastLLSize; seqPos->posInSrc += lastLLSize;
} }
return bytesAdjustment; return (size_t)(iend-istart);
} }
/* @seqPos represents a position within @inSeqs, /* @seqPos represents a position within @inSeqs,
* it is read and updated by this function, * it is read and updated by this function,
* once the goal to produce a block of size @blockSize is reached. * once the goal to produce a block of size @blockSize is reached.
* @return: nb of bytes missing to reach @blockSize goal. * @return: nb of bytes consumed from @src, necessarily <= @blockSize.
* so (@blockSize - @return) represents the nb of bytes ingested from @src.
*/ */
typedef size_t (*ZSTD_SequenceCopier_f)(ZSTD_CCtx* cctx, typedef size_t (*ZSTD_SequenceCopier_f)(ZSTD_CCtx* cctx,
ZSTD_SequencePosition* seqPos, ZSTD_SequencePosition* seqPos,
@@ -6963,10 +6961,11 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
assert(blockSize <= remaining); assert(blockSize <= remaining);
ZSTD_resetSeqStore(&cctx->seqStore); ZSTD_resetSeqStore(&cctx->seqStore);
{ size_t adjust = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes); blockSize = sequenceCopier(cctx,
FORWARD_IF_ERROR(adjust, "Bad sequence copy"); &seqPos, inSeqs, inSeqsSize,
blockSize -= adjust; ip, blockSize,
} cctx->appliedParams.searchForExternalRepcodes);
FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
/* If blocks are too small, emit as a nocompress block */ /* If blocks are too small, emit as a nocompress block */
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
@@ -7054,13 +7053,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
{ {
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
size_t cSize = 0; size_t cSize = 0;
size_t compressedBlocksSize = 0;
size_t frameHeaderSize = 0; size_t frameHeaderSize = 0;
/* Transparent initialization stage, same as compressStream2() */ /* Transparent initialization stage, same as compressStream2() */
DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity); DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity);
assert(cctx != NULL); assert(cctx != NULL);
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
/* Begin writing output, starting with frame header */ /* Begin writing output, starting with frame header */
frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
op += frameHeaderSize; op += frameHeaderSize;
@@ -7069,14 +7068,16 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
if (cctx->appliedParams.fParams.checksumFlag && srcSize) { if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
XXH64_update(&cctx->xxhState, src, srcSize); XXH64_update(&cctx->xxhState, src, srcSize);
} }
/* cSize includes block header size and compressed sequences size */
compressedBlocksSize = ZSTD_compressSequences_internal(cctx, { size_t const cBlocksSize = ZSTD_compressSequences_internal(cctx,
op, dstCapacity, op, dstCapacity,
inSeqs, inSeqsSize, inSeqs, inSeqsSize,
src, srcSize); src, srcSize);
FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); FORWARD_IF_ERROR(cBlocksSize, "Compressing blocks failed!");
cSize += compressedBlocksSize; cSize += cBlocksSize;
dstCapacity -= compressedBlocksSize; assert(cBlocksSize <= dstCapacity);
dstCapacity -= cBlocksSize;
}
if (cctx->appliedParams.fParams.checksumFlag) { if (cctx->appliedParams.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);

View File

@@ -1561,8 +1561,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSi
)) ))
typedef enum { typedef enum {
ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ ZSTD_sf_noBlockDelimiters = 0, /* ZSTD_Sequence[] has no block delimiters, just sequences */
ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ ZSTD_sf_explicitBlockDelimiters = 1 /* ZSTD_Sequence[] contains explicit block delimiters */
} ZSTD_SequenceFormat_e; } ZSTD_SequenceFormat_e;
#define ZSTD_sequenceFormat_e ZSTD_SequenceFormat_e /* old name */ #define ZSTD_sequenceFormat_e ZSTD_SequenceFormat_e /* old name */