mirror of
				https://github.com/facebook/zstd.git
				synced 2025-11-03 20:33:11 +03:00 
			
		
		
		
	- Modify the GCC version used for CI testing of the RISCV architecture
- Fix a bug in the ZSTD_row_getRVVMask function - Improve some performance for ZSTD_copy16()
This commit is contained in:
		
							
								
								
									
										2
									
								
								.github/workflows/dev-short-tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/dev-short-tests.yml
									
									
									
									
										vendored
									
									
								
							@@ -403,7 +403,7 @@ jobs:
 | 
				
			|||||||
          { name: PPC64LE,  xcc_pkg: gcc-powerpc64le-linux-gnu, xcc: powerpc64le-linux-gnu-gcc, xemu_pkg: qemu-system-ppc,    xemu: qemu-ppc64le-static },
 | 
					          { name: PPC64LE,  xcc_pkg: gcc-powerpc64le-linux-gnu, xcc: powerpc64le-linux-gnu-gcc, xemu_pkg: qemu-system-ppc,    xemu: qemu-ppc64le-static },
 | 
				
			||||||
          { name: S390X,    xcc_pkg: gcc-s390x-linux-gnu,       xcc: s390x-linux-gnu-gcc,       xemu_pkg: qemu-system-s390x,  xemu: qemu-s390x-static   },
 | 
					          { name: S390X,    xcc_pkg: gcc-s390x-linux-gnu,       xcc: s390x-linux-gnu-gcc,       xemu_pkg: qemu-system-s390x,  xemu: qemu-s390x-static   },
 | 
				
			||||||
          { name: MIPS,     xcc_pkg: gcc-mips-linux-gnu,        xcc: mips-linux-gnu-gcc,        xemu_pkg: qemu-system-mips,   xemu: qemu-mips-static    },
 | 
					          { name: MIPS,     xcc_pkg: gcc-mips-linux-gnu,        xcc: mips-linux-gnu-gcc,        xemu_pkg: qemu-system-mips,   xemu: qemu-mips-static    },
 | 
				
			||||||
          { name: RISC-V,   xcc_pkg: gcc-riscv64-linux-gnu,     xcc: riscv64-linux-gnu-gcc,     xemu_pkg: qemu-system-riscv64,xemu: qemu-riscv64-static },
 | 
					          { name: RISC-V,   xcc_pkg: gcc-14-riscv64-linux-gnu,  xcc: riscv64-linux-gnu-gcc-14,  xemu_pkg: qemu-system-riscv64,xemu: qemu-riscv64-static },
 | 
				
			||||||
          { name: M68K,     xcc_pkg: gcc-m68k-linux-gnu,        xcc: m68k-linux-gnu-gcc,        xemu_pkg: qemu-system-m68k,   xemu: qemu-m68k-static    },
 | 
					          { name: M68K,     xcc_pkg: gcc-m68k-linux-gnu,        xcc: m68k-linux-gnu-gcc,        xemu_pkg: qemu-system-m68k,   xemu: qemu-m68k-static    },
 | 
				
			||||||
          { name: SPARC,    xcc_pkg: gcc-sparc64-linux-gnu,     xcc: sparc64-linux-gnu-gcc,     xemu_pkg: qemu-system-sparc,  xemu: qemu-sparc64-static },
 | 
					          { name: SPARC,    xcc_pkg: gcc-sparc64-linux-gnu,     xcc: sparc64-linux-gnu-gcc,     xemu_pkg: qemu-system-sparc,  xemu: qemu-sparc64-static },
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -224,16 +224,11 @@
 | 
				
			|||||||
#  if defined(__ARM_FEATURE_SVE2)
 | 
					#  if defined(__ARM_FEATURE_SVE2)
 | 
				
			||||||
#    define ZSTD_ARCH_ARM_SVE2
 | 
					#    define ZSTD_ARCH_ARM_SVE2
 | 
				
			||||||
#  endif
 | 
					#  endif
 | 
				
			||||||
#if defined(__riscv) && defined(__riscv_vector)
 | 
					#  if defined(__riscv) && defined(__riscv_vector)
 | 
				
			||||||
    #if defined(__GNUC__)
 | 
					#    if ((defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 14) || \
 | 
				
			||||||
        #if (__GNUC__ > 14 || (__GNUC__ == 14 && __GNUC_MINOR__ >= 1))
 | 
					        (defined(__clang__) && __clang_major__ >= 19))
 | 
				
			||||||
            #define ZSTD_ARCH_RISCV_RVV
 | 
					        #define ZSTD_ARCH_RISCV_RVV
 | 
				
			||||||
        #endif
 | 
					#  endif
 | 
				
			||||||
    #elif defined(__clang__)
 | 
					 | 
				
			||||||
        #if __clang_major__ > 18 || (__clang_major__ == 18 && __clang_minor__ >= 1)
 | 
					 | 
				
			||||||
            #define ZSTD_ARCH_RISCV_RVV
 | 
					 | 
				
			||||||
        #endif
 | 
					 | 
				
			||||||
    #endif
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
#  if defined(ZSTD_ARCH_X86_AVX2)
 | 
					#  if defined(ZSTD_ARCH_X86_AVX2)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -185,6 +185,8 @@ static void ZSTD_copy16(void* dst, const void* src) {
 | 
				
			|||||||
    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
 | 
					    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
 | 
				
			||||||
#elif defined(ZSTD_ARCH_X86_SSE2)
 | 
					#elif defined(ZSTD_ARCH_X86_SSE2)
 | 
				
			||||||
    _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
 | 
					    _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
 | 
				
			||||||
 | 
					#elif defined(ZSTD_ARCH_RISCV_RVV)
 | 
				
			||||||
 | 
					    __riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16);
 | 
				
			||||||
#elif defined(__clang__)
 | 
					#elif defined(__clang__)
 | 
				
			||||||
    ZSTD_memmove(dst, src, 16);
 | 
					    ZSTD_memmove(dst, src, 16);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7292,7 +7292,7 @@ size_t convertSequences_noRepcodes(
 | 
				
			|||||||
    return longLen;
 | 
					    return longLen;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#elif defined ZSTD_ARCH_RISCV_RVV
 | 
					#elif defined (ZSTD_ARCH_RISCV_RVV)
 | 
				
			||||||
#include <riscv_vector.h>
 | 
					#include <riscv_vector.h>
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Convert `vl` sequences per iteration, using RVV intrinsics:
 | 
					 * Convert `vl` sequences per iteration, using RVV intrinsics:
 | 
				
			||||||
@@ -7824,7 +7824,7 @@ BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#elif defined ZSTD_ARCH_RISCV_RVV
 | 
					#elif defined (ZSTD_ARCH_RISCV_RVV)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
 | 
					BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1052,33 +1052,39 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
#if defined(ZSTD_ARCH_RISCV_RVV) && (__riscv_xlen == 64)
 | 
					#if defined(ZSTD_ARCH_RISCV_RVV) && (__riscv_xlen == 64)
 | 
				
			||||||
FORCE_INLINE_TEMPLATE ZSTD_VecMask
 | 
					FORCE_INLINE_TEMPLATE ZSTD_VecMask
 | 
				
			||||||
ZSTD_row_getRVVMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head)
 | 
					ZSTD_row_getRVVMask(int rowEntries, const BYTE* const src, const BYTE tag, const U32 head)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ZSTD_VecMask matches;
 | 
					    ZSTD_VecMask matches;
 | 
				
			||||||
    size_t vl;
 | 
					    size_t vl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (rowEntries == 16) {
 | 
					    if (rowEntries == 16) {
 | 
				
			||||||
        vl = __riscv_vsetvl_e8m1(16);
 | 
					        vl = __riscv_vsetvl_e8m1(16);
 | 
				
			||||||
        vuint8m1_t chunk = __riscv_vle8_v_u8m1(src, vl);
 | 
					        {
 | 
				
			||||||
        vbool8_t mask = __riscv_vmseq_vx_u8m1_b8(chunk, tag, vl);
 | 
					            vuint8m1_t chunk = __riscv_vle8_v_u8m1(src, vl);
 | 
				
			||||||
        vuint16m1_t mask_u16 = __riscv_vreinterpret_v_b8_u16m1(mask);
 | 
					            vbool8_t mask = __riscv_vmseq_vx_u8m1_b8(chunk, tag, vl);
 | 
				
			||||||
        matches = __riscv_vmv_x_s_u16m1_u16(mask_u16);
 | 
					            vuint16m1_t mask_u16 = __riscv_vreinterpret_v_b8_u16m1(mask);
 | 
				
			||||||
        return ZSTD_rotateRight_U16((U16)matches, head);
 | 
					            matches = __riscv_vmv_x_s_u16m1_u16(mask_u16);
 | 
				
			||||||
 | 
					            return ZSTD_rotateRight_U16((U16)matches, head);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    } else if (rowEntries == 32) {
 | 
					    } else if (rowEntries == 32) {
 | 
				
			||||||
        vl = __riscv_vsetvl_e8m2(32);
 | 
					        vl = __riscv_vsetvl_e8m2(32);
 | 
				
			||||||
        vuint8m2_t chunk = __riscv_vle8_v_u8m2(src, vl);
 | 
					        {
 | 
				
			||||||
        vbool4_t mask = __riscv_vmseq_vx_u8m2_b4(chunk, tag, vl);
 | 
					            vuint8m2_t chunk = __riscv_vle8_v_u8m2(src, vl);
 | 
				
			||||||
        vuint32m1_t mask_u32 = __riscv_vreinterpret_v_b4_u32m1(mask);
 | 
					            vbool4_t mask = __riscv_vmseq_vx_u8m2_b4(chunk, tag, vl);
 | 
				
			||||||
        matches = __riscv_vmv_x_s_u32m1_u32(mask_u32);
 | 
					            vuint32m1_t mask_u32 = __riscv_vreinterpret_v_b4_u32m1(mask);
 | 
				
			||||||
        return ZSTD_rotateRight_U32((U32)matches, head);
 | 
					            matches = __riscv_vmv_x_s_u32m1_u32(mask_u32);
 | 
				
			||||||
 | 
					            return ZSTD_rotateRight_U32((U32)matches, head);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    } else { // rowEntries = 64
 | 
					    } else { // rowEntries = 64
 | 
				
			||||||
        vl = __riscv_vsetvl_e8m4(64);
 | 
					        vl = __riscv_vsetvl_e8m4(64);
 | 
				
			||||||
        vuint8m4_t chunk = __riscv_vle8_v_u8m4(src, vl);
 | 
					        {
 | 
				
			||||||
        vbool2_t mask = __riscv_vmseq_vx_u8m4_b2(chunk, tag, vl);
 | 
					            vuint8m4_t chunk = __riscv_vle8_v_u8m4(src, vl);
 | 
				
			||||||
        vuint64m1_t mask_u64 = __riscv_vreinterpret_v_b2_u64m1(mask);
 | 
					            vbool2_t mask = __riscv_vmseq_vx_u8m4_b2(chunk, tag, vl);
 | 
				
			||||||
        matches = __riscv_vmv_x_s_u64m1_u64(mask_u64);
 | 
					            vuint64m1_t mask_u64 = __riscv_vreinterpret_v_b2_u64m1(mask);
 | 
				
			||||||
        return ZSTD_rotateRight_U64(matches, head);
 | 
					            matches = __riscv_vmv_x_s_u64m1_u64(mask_u64);
 | 
				
			||||||
 | 
					            return ZSTD_rotateRight_U64(matches, head);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user