1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-30 22:23:13 +03:00

splitter workspace is now provided by ZSTD_CCtx*

This commit is contained in:
Yann Collet
2024-10-18 16:00:36 -07:00
parent ab44689a44
commit 6be0ad2036
4 changed files with 60 additions and 44 deletions

View File

@ -137,11 +137,12 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
ZSTD_cwksp_move(&cctx->workspace, &ws);
cctx->staticSize = workspaceSize;
/* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
/* statically sized space. tmpWorkspace never moves (but prev/next block swap places) */
if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
cctx->tmpWorkspace = ZSTD_cwksp_reserve_object_aligned(&cctx->workspace, TMP_WORKSPACE_SIZE, sizeof(S64));
cctx->tmpWkspSize = TMP_WORKSPACE_SIZE;
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
return cctx;
}
@ -1661,14 +1662,14 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ hSize * sizeof(U32)
+ h3Size * sizeof(U32);
size_t const optPotentialSpace =
ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
ZSTD_cwksp_aligned64_alloc_size((MaxML+1) * sizeof(U32))
+ ZSTD_cwksp_aligned64_alloc_size((MaxLL+1) * sizeof(U32))
+ ZSTD_cwksp_aligned64_alloc_size((MaxOff+1) * sizeof(U32))
+ ZSTD_cwksp_aligned64_alloc_size((1<<Litbits) * sizeof(U32))
+ ZSTD_cwksp_aligned64_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned64_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
? ZSTD_cwksp_aligned_alloc_size(hSize)
? ZSTD_cwksp_aligned64_alloc_size(hSize)
: 0;
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
@ -1706,16 +1707,16 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
+ ZSTD_cwksp_aligned64_alloc_size(maxNbSeq * sizeof(seqDef))
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
size_t const tmpWorkSpace = ZSTD_cwksp_aligned_alloc_size(TMP_WORKSPACE_SIZE, sizeof(S64));
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ?
ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
ZSTD_cwksp_aligned64_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
@ -1725,12 +1726,12 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
size_t const externalSeqSpace = useSequenceProducer
? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
? ZSTD_cwksp_aligned64_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
: 0;
size_t const neededSpace =
cctxSpace +
entropySpace +
tmpWorkSpace +
blockStateSpace +
ldmSpace +
ldmSeqSpace +
@ -2166,15 +2167,16 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
DEBUGLOG(5, "reserving object space");
/* Statically sized space.
* entropyWorkspace never moves,
* tmpWorkspace never moves,
* though prev/next block swap places */
assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
zc->tmpWorkspace = ZSTD_cwksp_reserve_object_aligned(ws, TMP_WORKSPACE_SIZE, sizeof(S64));
RETURN_ERROR_IF(zc->tmpWorkspace == NULL, memory_allocation, "couldn't allocate tmpWorkspace");
zc->tmpWkspSize = TMP_WORKSPACE_SIZE;
} }
ZSTD_cwksp_clear(ws);
@ -3894,14 +3896,14 @@ ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CC
&zc->blockState.nextCBlock->entropy,
&zc->appliedParams,
entropyMetadata,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");
zc->tmpWorkspace, zc->tmpWkspSize), "");
return ZSTD_estimateBlockSize(
seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
(size_t)(seqStore->sequences - seqStore->sequencesStart),
&zc->blockState.nextCBlock->entropy,
entropyMetadata,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
zc->tmpWorkspace, zc->tmpWkspSize,
(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
}
@ -4067,7 +4069,7 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
&zc->appliedParams,
op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
srcSize,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */,
zc->bmi2);
FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
@ -4357,7 +4359,7 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
&zc->appliedParams,
dst, dstCapacity,
srcSize,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */,
zc->bmi2);
if (frame &&
@ -4489,20 +4491,17 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
#include "zstd_preSplit.h"
static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings)
{
S64 workspace[ZSTD_SLIPBLOCK_WORKSPACESIZE / 8];
(void)cctx;
/* note: we currenly only split full blocks (128 KB)
* and when there is more than 128 KB input remaining
/* note: conservatively only split full blocks (128 KB) currently,
* and even then only if there is more than 128 KB input remaining.
*/
if (srcSize <= 128 KB || blockSizeMax < 128 KB)
return MIN(srcSize, blockSizeMax);
/* dynamic splitting has a cpu cost for analysis,
* due to that cost it's only used for btlazy2+ strategies */
if (strat >= ZSTD_btlazy2)
return ZSTD_splitBlock_4k(src, srcSize, blockSizeMax, workspace, sizeof(workspace));
return ZSTD_splitBlock_4k(src, srcSize, blockSizeMax, cctx->tmpWorkspace, cctx->tmpWkspSize);
/* blind split strategy
* no cpu cost, but can over-split homegeneous data.
* heuristic, tested as being "generally better".
@ -5174,11 +5173,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
cdict->dictContentSize, cdict->dictContentType, dtlm,
ZSTD_tfp_forCCtx, cctx->entropyWorkspace)
ZSTD_tfp_forCCtx, cctx->tmpWorkspace)
: ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);
dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->tmpWorkspace);
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
assert(dictID <= UINT_MAX);
cctx->dictID = (U32)dictID;
@ -6876,7 +6875,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
&cctx->appliedParams,
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
blockSize,
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */,
cctx->bmi2);
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);

View File

@ -24,6 +24,7 @@
# include "zstdmt_compress.h"
#endif
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
#include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */
#if defined (__cplusplus)
extern "C" {
@ -376,6 +377,7 @@ struct ZSTD_CCtx_params_s {
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
#define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE))
/**
* Indicates whether this compression proceeds directly from user-provided
@ -432,7 +434,8 @@ struct ZSTD_CCtx_s {
size_t maxNbLdmSequences;
rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
ZSTD_blockState_t blockState;
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
void* tmpWorkspace; /* used as substitute of stack space - must be aligned for S64 type */
size_t tmpWkspSize;
/* Whether we are streaming or not */
ZSTD_buffered_policy_e bufferedPolicy;

View File

@ -674,7 +674,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
&zc->blockState.nextCBlock->entropy,
&zc->appliedParams,
&entropyMetadata,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */), "");
return ZSTD_compressSubBlock_multi(&zc->seqStore,
zc->blockState.prevCBlock,
@ -684,5 +684,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
dst, dstCapacity,
src, srcSize,
zc->bmi2, lastBlock,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */);
}

View File

@ -222,7 +222,7 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
* to figure out how much space you need for the matchState tables. Everything
* else is though.
*
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned64_alloc_size().
*/
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
if (size == 0)
@ -234,12 +234,16 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
#endif
}
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size, size_t alignment) {
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, alignment));
}
/**
* Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
* Used to determine the number of bytes required for a given "aligned".
*/
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
MEM_STATIC size_t ZSTD_cwksp_aligned64_alloc_size(size_t size) {
return ZSTD_cwksp_aligned_alloc_size(size, ZSTD_CWKSP_ALIGNMENT_BYTES);
}
/**
@ -272,7 +276,7 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
* which we can allocate from the end of the workspace.
*/
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
return (void*)((size_t)ws->workspaceEnd & (size_t)~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
}
/**
@ -426,8 +430,9 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t byt
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
{
void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
ZSTD_cwksp_alloc_aligned);
void* const ptr = ZSTD_cwksp_reserve_internal(ws,
ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
ZSTD_cwksp_alloc_aligned);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
return ptr;
}
@ -479,12 +484,12 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
}
/**
* Aligned on sizeof(void*).
* Note : should happen only once, at workspace first initialization
*/
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
MEM_STATIC void*
ZSTD_cwksp_reserve_object_aligned(ZSTD_cwksp* ws, size_t bytes, size_t alignment)
{
size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
size_t const roundedBytes = ZSTD_cwksp_align(bytes, alignment);
void* alloc = ws->objectEnd;
void* end = (BYTE*)alloc + roundedBytes;
@ -496,8 +501,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
DEBUGLOG(4,
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
assert(bytes % ZSTD_ALIGNOF(void*) == 0);
assert((size_t)alloc % alignment == 0);
assert(bytes % alignment == 0);
ZSTD_cwksp_assert_internal_consistency(ws);
/* we must be in the first phase, no advance is possible */
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
@ -521,6 +526,15 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
return alloc;
}
/**
* Aligned on sizeof(void*).
* Note : should happen only once, at workspace first initialization
*/
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
{
return ZSTD_cwksp_reserve_object_aligned(ws, bytes, sizeof(void*));
}
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
{
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");