1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-13 02:42:53 +03:00

fix dereferencing type-punned pointer error

This commit is contained in:
ZijianLi
2025-06-29 15:36:25 +08:00
parent 40f64f3493
commit 2c3f23b018

View File

@@ -7287,7 +7287,7 @@ static size_t convertSequences_noRepcodes(
#elif defined ZSTD_ARCH_RISCV_RVV #elif defined ZSTD_ARCH_RISCV_RVV
#include <riscv_vector.h> #include <riscv_vector.h>
/* /*
* Convert `vl` sequences per iteration, using AVX2 intrinsics: * Convert `vl` sequences per iteration, using RVV intrinsics:
* - offset -> offBase = offset + 2 * - offset -> offBase = offset + 2
* - litLength -> (U16) litLength * - litLength -> (U16) litLength
* - matchLength -> (U16)(matchLength - 3) * - matchLength -> (U16)(matchLength - 3)
@@ -7300,7 +7300,8 @@ static size_t convertSequences_noRepcodes(
*/ */
static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence* inSeqs, size_t nbSequences) { static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence* inSeqs, size_t nbSequences) {
size_t longLen = 0; size_t longLen = 0;
size_t vl = 0;
typedef uint32_t __attribute__((may_alias)) aliased_u32;
/* RVV depends on the specific definition of target structures */ /* RVV depends on the specific definition of target structures */
ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16); ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16);
ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, offset) == 0); ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, offset) == 0);
@@ -7310,13 +7311,14 @@ static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence*
ZSTD_STATIC_ASSERT(offsetof(SeqDef, offBase) == 0); ZSTD_STATIC_ASSERT(offsetof(SeqDef, offBase) == 0);
ZSTD_STATIC_ASSERT(offsetof(SeqDef, litLength) == 4); ZSTD_STATIC_ASSERT(offsetof(SeqDef, litLength) == 4);
ZSTD_STATIC_ASSERT(offsetof(SeqDef, mlBase) == 6); ZSTD_STATIC_ASSERT(offsetof(SeqDef, mlBase) == 6);
size_t vl = 0;
for (size_t i = 0; i < nbSequences; i += vl) { for (size_t i = 0; i < nbSequences; i += vl) {
vl = __riscv_vsetvl_e32m2(nbSequences-i); vl = __riscv_vsetvl_e32m2(nbSequences-i);
{
// Loading structure member variables // Loading structure member variables
vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4( vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
(const int32_t*)&inSeqs[i], (const aliased_u32*)((const void*)&inSeqs[i]),
vl vl
); );
vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0); vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
@@ -7344,6 +7346,7 @@ static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence*
__riscv_vwcvtu_x_x_v_u32m2(v_lit_clamped, vl), __riscv_vwcvtu_x_x_v_u32m2(v_lit_clamped, vl),
vl vl
); );
{
// Create a vector of SeqDef structures // Create a vector of SeqDef structures
// Store the offBase, litLength, and mlBase in a vector of SeqDef // Store the offBase, litLength, and mlBase in a vector of SeqDef
vuint32m2x2_t store_data = __riscv_vcreate_v_u32m2x2( vuint32m2x2_t store_data = __riscv_vcreate_v_u32m2x2(
@@ -7351,10 +7354,12 @@ static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence*
v_lit_ml_combined v_lit_ml_combined
); );
__riscv_vsseg2e32_v_u32m2x2( __riscv_vsseg2e32_v_u32m2x2(
(uint32_t*)&dstSeqs[i], (aliased_u32*)((void*)&dstSeqs[i]),
store_data, store_data,
vl vl
); );
}
{
// Find the first index where an overflow occurs // Find the first index where an overflow occurs
int first_ml = __riscv_vfirst_m_b16(ml_overflow, vl); int first_ml = __riscv_vfirst_m_b16(ml_overflow, vl);
int first_lit = __riscv_vfirst_m_b16(lit_overflow, vl); int first_lit = __riscv_vfirst_m_b16(lit_overflow, vl);
@@ -7368,6 +7373,8 @@ static size_t convertSequences_noRepcodes(SeqDef* dstSeqs, const ZSTD_Sequence*
longLen = i + first_lit + 1 + nbSequences; longLen = i + first_lit + 1 + nbSequences;
} }
} }
}
}
return longLen; return longLen;
} }
@@ -7547,18 +7554,17 @@ BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
size_t i = 0; size_t i = 0;
int found_terminator = 0; int found_terminator = 0;
size_t vl_max = __riscv_vsetvlmax_e32m1(); size_t vl_max = __riscv_vsetvlmax_e32m1();
typedef uint32_t __attribute__((may_alias)) aliased_u32;
vuint32m1_t v_lit_sum = __riscv_vmv_v_x_u32m1(0, vl_max); vuint32m1_t v_lit_sum = __riscv_vmv_v_x_u32m1(0, vl_max);
vuint32m1_t v_match_sum = __riscv_vmv_v_x_u32m1(0, vl_max); vuint32m1_t v_match_sum = __riscv_vmv_v_x_u32m1(0, vl_max);
for (; i < nbSeqs; ) { for (; i < nbSeqs; ) {
size_t vl = __riscv_vsetvl_e32m2(nbSeqs - i); size_t vl = __riscv_vsetvl_e32m2(nbSeqs - i);
ptrdiff_t stride = sizeof(ZSTD_Sequence); // 16
vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4( vuint32m2x4_t v_tuple = __riscv_vlseg4e32_v_u32m2x4(
(const int32_t*)&seqs[i], (const aliased_u32*)((const void*)&seqs[i]),
vl vl
); );
vuint32m2_t v_offset = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 0);
vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1); vuint32m2_t v_lit = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 1);
vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2); vuint32m2_t v_match = __riscv_vget_v_u32m2x4_u32m2(v_tuple, 2);