1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-10 04:43:07 +03:00

wire up bmi2 support

This commit is contained in:
Nick Terrell
2020-08-17 13:44:49 -07:00
parent ba1fd17a9f
commit 612e947c5e
10 changed files with 226 additions and 34 deletions

View File

@@ -38,7 +38,8 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-**************************************************************
* FSE NCount encoding-decoding
****************************************************************/
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
FORCE_INLINE_TEMPLATE
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
const BYTE* const istart = (const BYTE*) headerBuffer;
@@ -175,6 +176,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
return ip-istart;
}
static size_t FSE_readNCount_body_default(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#endif
size_t FSE_readNCount_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize, int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
#endif
(void)bmi2;
return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
}
size_t FSE_readNCount(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
}
/*! HUF_readStats() :
Read compact Huffman tree, saved by HUF_writeCTable().
`huffWeight` is destination buffer.
@@ -187,13 +225,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
const void* src, size_t srcSize)
{
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp));
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
}
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
FORCE_INLINE_TEMPLATE size_t HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
void* workSpace, size_t wkspSize,
int bmi2)
{
U32 weightTotal;
const BYTE* ip = (const BYTE*) src;
@@ -217,7 +256,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
} } }
else { /* header compressed with FSE (normal case) */
if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize); /* max (hwSize-1) values decoded, as last one is implied */
oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); /* max (hwSize-1) values decoded, as last one is implied */
if (FSE_isError(oSize)) return oSize;
}
@@ -252,3 +291,36 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
*nbSymbolsPtr = (U32)(oSize+1);
return iSize+1;
}
static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
{
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
}
#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)
{
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
}
#endif
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize,
int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}
#endif
(void)bmi2;
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
}

View File

@@ -228,6 +228,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize);
/*! FSE_readNCount_bmi2():
* Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
*/
FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize, int bmi2);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
@@ -342,6 +349,9 @@ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
typedef enum {
FSE_repeat_none, /**< Cannot use the previous table */
FSE_repeat_check, /**< Can use the previous table but it must be checked */

View File

@@ -73,7 +73,7 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
}
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
@@ -178,6 +178,11 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi
return 0;
}
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
}
#ifndef FSE_COMMONDEFS_ONLY
@@ -306,6 +311,15 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
}
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
void* dst, size_t dstCapacity,
const void* cSrc, size_t cSrcSize,
unsigned maxLog, void* workSpace, size_t wkspSize,
int bmi2)
{
const BYTE* const istart = (const BYTE*)cSrc;
const BYTE* ip = istart;
@@ -315,7 +329,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
FSE_DTable* const dtable = (FSE_DTable*)workSpace;
/* normal FSE decoding mode */
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
if (FSE_isError(NCountLength)) return NCountLength;
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
assert(NCountLength <= cSrcSize);
@@ -326,9 +340,40 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
wkspSize -= FSE_DTABLE_SIZE(tableLog);
CHECK_F( FSE_buildDTable_wksp(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, dtable); /* always return, even if it is an error code */
{
const void* ptr = dtable;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
}
}
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
}
#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
}
#endif
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
}
#endif
(void)bmi2;
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
}

View File

@@ -231,13 +231,15 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
/*! HUF_readStats_wksp() :
* Same as HUF_readStats() but takes an external workspace which must be
* 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/
#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workspace, size_t wkspSize);
void* workspace, size_t wkspSize,
int bmi2);
/** HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */
@@ -345,6 +347,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
#endif
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif
#endif /* HUF_STATIC_LINKING_ONLY */

View File

@@ -180,6 +180,11 @@ typedef struct {
// TODO: Template based on BMI2 (5% boost)
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
{
return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
}
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
{
U32 tableLog = 0;
U32 nbSymbols = 0;
@@ -194,7 +199,7 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp));
iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
if (HUF_isError(iSize)) return iSize;
/* Table header */
@@ -220,13 +225,21 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
{
int n;
int nextRankStart = 0;
int const unroll = 4;
int const nLimit = (int)nbSymbols - unroll + 1;
for (n=0; n<(int)tableLog+1; n++) {
U32 const current = nextRankStart;
nextRankStart += wksp->rankVal[n];
wksp->rankStart[n] = current;
}
// TODO: This loop is now the bottleneck: Can this be made faster?
for (n=0; n < (int)nbSymbols; ++n) {
for (n=0; n < nLimit; n += unroll) {
int u;
for (u=0; u < unroll; ++u) {
size_t const w = wksp->huffWeight[n+u];
wksp->symbols[wksp->rankStart[w]++] = n+u;
}
}
for (; n < (int)nbSymbols; ++n) {
size_t const w = wksp->huffWeight[n];
wksp->symbols[wksp->rankStart[w]++] = n;
}
@@ -540,8 +553,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size
{
const BYTE* ip = (const BYTE*) cSrc;
size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
workSpace, wkspSize);
size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
@@ -1320,7 +1332,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
{
const BYTE* ip = (const BYTE*) cSrc;
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;

View File

@@ -1092,7 +1092,8 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
offcodeNCount, offcodeMaxValue,
OF_base, OF_bits,
offcodeLog,
entropy->workspace, sizeof(entropy->workspace));
entropy->workspace, sizeof(entropy->workspace),
/* bmi2 */0);
dictPtr += offcodeHeaderSize;
}
@@ -1106,7 +1107,8 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
matchlengthNCount, matchlengthMaxValue,
ML_base, ML_bits,
matchlengthLog,
entropy->workspace, sizeof(entropy->workspace));
entropy->workspace, sizeof(entropy->workspace),
/* bmi2 */ 0);
dictPtr += matchlengthHeaderSize;
}
@@ -1120,7 +1122,8 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
litlengthNCount, litlengthMaxValue,
LL_base, LL_bits,
litlengthLog,
entropy->workspace, sizeof(entropy->workspace));
entropy->workspace, sizeof(entropy->workspace),
/* bmi2 */ 0);
dictPtr += litlengthHeaderSize;
}

View File

@@ -364,8 +364,8 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
* generate FSE decoding table for one symbol (ll, ml or off)
* cannot fail if input is valid =>
* all inputs are presumed validated at this stage */
void
ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
FORCE_INLINE_TEMPLATE
void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize)
@@ -378,6 +378,7 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
(void)wkspSize;
/* Sanity Checks */
assert(maxSymbolValue <= MaxSeq);
@@ -483,6 +484,42 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
}
}
static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize)
{
return ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
}
#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize)
{
return ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
}
#endif
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
{
#if DYNAMIC_BMI2
if (bmi2) {
return ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
}
#endif
(void)bmi2;
return ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
}
/*! ZSTD_buildSeqTable() :
* @return : nb bytes read from src,
@@ -492,7 +529,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
const void* src, size_t srcSize,
const U32* baseValue, const U32* nbAdditionalBits,
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize)
int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
int bmi2)
{
switch(type)
{
@@ -524,7 +562,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
*DTablePtr = DTableSpace;
return headerSize;
}
@@ -578,7 +616,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
LL_base, LL_bits,
LL_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold, nbSeq,
dctx->workspace, sizeof(dctx->workspace));
dctx->workspace, sizeof(dctx->workspace),
dctx->bmi2);
RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
ip += llhSize;
}
@@ -589,7 +628,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
OF_base, OF_bits,
OF_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold, nbSeq,
dctx->workspace, sizeof(dctx->workspace));
dctx->workspace, sizeof(dctx->workspace),
dctx->bmi2);
RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
ip += ofhSize;
}
@@ -600,7 +640,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
ML_base, ML_bits,
ML_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold, nbSeq,
dctx->workspace, sizeof(dctx->workspace));
dctx->workspace, sizeof(dctx->workspace),
dctx->bmi2);
RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
ip += mlhSize;
}

View File

@@ -48,14 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
* this function must be called with valid parameters only
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
* in which case it cannot fail.
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes.
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
* defined in zstd_decompress_internal.h.
* Internal use only.
*/
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize);
unsigned tableLog, void* wksp, size_t wkspSize,
int bmi2);
#endif /* ZSTD_DEC_BLOCK_H */

View File

@@ -72,7 +72,9 @@ static const U32 ML_base[MaxML+1] = {
} ZSTD_seqSymbol;
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
#define ZSTD_FSE_WKSP_SIZE_U32 130
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
typedef struct {
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
@@ -80,7 +82,7 @@ typedef struct {
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
U32 rep[ZSTD_REP_NUM];
U32 workspace[ZSTD_FSE_WKSP_SIZE_U32];
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE];
} ZSTD_entropyDTables_t;
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,

View File

@@ -232,10 +232,11 @@ FORCE_NOINLINE size_t ZSTD_decodeLiteralsHeader(ZSTD_DCtx* dctx, void const* src
}
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
return HUF_readDTableX1_wksp(
return HUF_readDTableX1_wksp_bmi2(
dctx->entropy.hufTable,
istart+lhSize, litCSize,
dctx->workspace, sizeof(dctx->workspace));
dctx->workspace, sizeof(dctx->workspace),
dctx->bmi2);
}
}
return 0;