diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index a6eea61a4d..f4e1bab601 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -18,47 +18,18 @@
. */
#include
-#define USE_WITH_SSE2 1
-#define VEC_SIZE 16
-#define MOV_SIZE 3
-#define RET_SIZE 1
+#define MEMSET_SYMBOL(p,s) memset
+#define MEMSET_CHK_SYMBOL(p,s) p
-#define VEC(i) xmm##i
-#define VMOVU movups
-#define VMOVA movaps
+#define WMEMSET_SYMBOL(p,s) __wmemset
+#define WMEMSET_CHK_SYMBOL(p,s) p
-# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
- movd d, %xmm0; \
- movq r, %rax; \
- punpcklbw %xmm0, %xmm0; \
- punpcklwd %xmm0, %xmm0; \
- pshufd $0, %xmm0, %xmm0
+#define DEFAULT_IMPL_V1 "multiarch/memset-sse2-unaligned-erms.S"
+#define DEFAULT_IMPL_V3 "multiarch/memset-avx2-unaligned-erms.S"
+#define DEFAULT_IMPL_V4 "multiarch/memset-evex-unaligned-erms.S"
-# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
- movd d, %xmm0; \
- pshufd $0, %xmm0, %xmm0; \
- movq r, %rax
-
-# define MEMSET_VDUP_TO_VEC0_HIGH()
-# define MEMSET_VDUP_TO_VEC0_LOW()
-
-# define WMEMSET_VDUP_TO_VEC0_HIGH()
-# define WMEMSET_VDUP_TO_VEC0_LOW()
-
-#define SECTION(p) p
-
-#ifndef MEMSET_SYMBOL
-# define MEMSET_CHK_SYMBOL(p,s) p
-# define MEMSET_SYMBOL(p,s) memset
-#endif
-
-#ifndef WMEMSET_SYMBOL
-# define WMEMSET_CHK_SYMBOL(p,s) p
-# define WMEMSET_SYMBOL(p,s) __wmemset
-#endif
-
-#include "multiarch/memset-vec-unaligned-erms.S"
+#include "isa-default-impl.h"
libc_hidden_builtin_def (memset)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 7858aa316f..21008c72b4 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -213,94 +213,99 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, __memset_chk,
IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
__memset_chk_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
- __memset_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
- __memset_chk_sse2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- CPU_FEATURE_USABLE (AVX2),
- __memset_chk_avx2_unaligned)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- CPU_FEATURE_USABLE (AVX2),
- __memset_chk_avx2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memset_chk_avx2_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memset_chk_avx2_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_chk_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_chk_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_chk_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_chk_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- CPU_FEATURE_USABLE (AVX512F),
- __memset_chk_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_chk_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_chk_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memset_chk_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_chk_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_chk_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_chk_avx2_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_chk_avx2_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memset_chk_avx2_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memset_chk_avx2_unaligned_erms_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memset_chk, 1,
+ __memset_chk_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memset_chk, 1,
+ __memset_chk_sse2_unaligned_erms)
)
#endif
/* Support sysdeps/x86_64/multiarch/memset.c. */
IFUNC_IMPL (i, name, memset,
IFUNC_IMPL_ADD (array, i, memset, 1,
- __memset_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, memset, 1,
- __memset_sse2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms)
- IFUNC_IMPL_ADD (array, i, memset,
- CPU_FEATURE_USABLE (AVX2),
- __memset_avx2_unaligned)
- IFUNC_IMPL_ADD (array, i, memset,
- CPU_FEATURE_USABLE (AVX2),
- __memset_avx2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memset_avx2_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memset_avx2_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, memset,
- CPU_FEATURE_USABLE (AVX512F),
- __memset_avx512_no_vzeroupper)
+ __memset_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memset_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_avx2_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_avx2_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memset_avx2_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memset_avx2_unaligned_erms_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memset, 1,
+ __memset_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memset, 1,
+ __memset_sse2_unaligned_erms)
)
/* Support sysdeps/x86_64/multiarch/rawmemchr.c. */
@@ -821,25 +826,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wmemset.c. */
IFUNC_IMPL (i, name, wmemset,
- IFUNC_IMPL_ADD (array, i, wmemset, 1,
- __wmemset_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, wmemset,
- CPU_FEATURE_USABLE (AVX2),
- __wmemset_avx2_unaligned)
- IFUNC_IMPL_ADD (array, i, wmemset,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wmemset_avx2_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, wmemset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wmemset_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, wmemset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wmemset_avx512_unaligned))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wmemset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wmemset_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wmemset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wmemset_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wmemset,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemset_avx2_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wmemset,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wmemset_avx2_unaligned_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wmemset, 1,
+ __wmemset_sse2_unaligned))
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/memcpy_chk.c. */
@@ -1049,25 +1056,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */
IFUNC_IMPL (i, name, __wmemset_chk,
- IFUNC_IMPL_ADD (array, i, __wmemset_chk, 1,
- __wmemset_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- CPU_FEATURE_USABLE (AVX2),
- __wmemset_chk_avx2_unaligned)
- IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wmemset_chk_avx2_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wmemset_chk_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wmemset_chk_avx512_unaligned))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __wmemset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wmemset_chk_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __wmemset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wmemset_chk_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __wmemset_chk,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemset_chk_avx2_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __wmemset_chk,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wmemset_chk_avx2_unaligned_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __wmemset_chk, 1,
+ __wmemset_chk_sse2_unaligned))
#endif
return 0;
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
index 64d179913c..ed514976aa 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
@@ -20,21 +20,7 @@
#include
extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
- attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
@@ -42,20 +28,38 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
+ attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm)
+ attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+ attribute_hidden;
+
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
return OPTIMIZE (erms);
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (avx512_unaligned_erms);
@@ -66,11 +70,11 @@ IFUNC_SELECTOR (void)
return OPTIMIZE (avx512_no_vzeroupper);
}
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (evex_unaligned_erms);
@@ -86,7 +90,8 @@ IFUNC_SELECTOR (void)
return OPTIMIZE (avx2_unaligned_rtm);
}
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (avx2_unaligned_erms);
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
index 87c48e2387..3810c719c6 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
@@ -18,22 +18,26 @@
#include
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, !))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
{
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
return OPTIMIZE (avx512_unaligned);
@@ -44,7 +48,8 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_unaligned_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2_unaligned);
}
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
index c0bf2875d0..a9054a9122 100644
--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include
+
+#if ISA_SHOULD_BUILD (3)
+
# define USE_WITH_AVX2 1
# define VEC_SIZE 32
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
index c5be8f57ef..8cc9c16d73 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
@@ -17,8 +17,10 @@
. */
#include
+#include
+
+#if ISA_SHOULD_BUILD (4)
-#if IS_IN (libc)
#include "asm-syntax.h"
#ifndef MEMSET
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
index 5241216a77..47623b8ee8 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include
+
+#if ISA_SHOULD_BUILD (4)
+
# define USE_WITH_AVX512 1
# define VEC_SIZE 64
@@ -30,8 +33,15 @@
# define WMEMSET_VDUP_TO_VEC0_LOW()
# define SECTION(p) p##.evex512
+
+#ifndef MEMSET_SYMBOL
# define MEMSET_SYMBOL(p,s) p##_avx512_##s
+#endif
+#ifndef WMEMSET_SYMBOL
# define WMEMSET_SYMBOL(p,s) p##_avx512_##s
+#endif
+
+
# define USE_LESS_VEC_MASK_STORE 1
# include "memset-vec-unaligned-erms.S"
#endif
diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
index 6370021506..ac4b2d2d50 100644
--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include
+
+#if ISA_SHOULD_BUILD (4)
+
# define USE_WITH_EVEX 1
# define VEC_SIZE 32
@@ -30,8 +33,15 @@
# define WMEMSET_VDUP_TO_VEC0_LOW()
# define SECTION(p) p##.evex
+
+#ifndef MEMSET_SYMBOL
# define MEMSET_SYMBOL(p,s) p##_evex_##s
+#endif
+#ifndef WMEMSET_SYMBOL
# define WMEMSET_SYMBOL(p,s) p##_evex_##s
+#endif
+
+
# define USE_LESS_VEC_MASK_STORE 1
# include "memset-vec-unaligned-erms.S"
#endif
diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
index 3d92f6993a..44f9b8888b 100644
--- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
@@ -17,22 +17,51 @@
License along with the GNU C Library; if not, see
. */
-#include
-#include
+#include
-#if IS_IN (libc)
-# define MEMSET_SYMBOL(p,s) p##_sse2_##s
-# define WMEMSET_SYMBOL(p,s) p##_sse2_##s
+/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
+ so we need this to build for ISA V2 builds. */
+#if ISA_SHOULD_BUILD (2)
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)
+# include
+# define USE_WITH_SSE2 1
+
+# define VEC_SIZE 16
+# define MOV_SIZE 3
+# define RET_SIZE 1
+
+# define VEC(i) xmm##i
+# define VMOVU movups
+# define VMOVA movaps
+
+# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ movd d, %xmm0; \
+ movq r, %rax; \
+ punpcklbw %xmm0, %xmm0; \
+ punpcklwd %xmm0, %xmm0; \
+ pshufd $0, %xmm0, %xmm0
+
+# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ movd d, %xmm0; \
+ pshufd $0, %xmm0, %xmm0; \
+ movq r, %rax
+
+# define MEMSET_VDUP_TO_VEC0_HIGH()
+# define MEMSET_VDUP_TO_VEC0_LOW()
+
+# define WMEMSET_VDUP_TO_VEC0_HIGH()
+# define WMEMSET_VDUP_TO_VEC0_LOW()
+
+# define SECTION(p) p
+
+# ifndef MEMSET_SYMBOL
+# define MEMSET_SYMBOL(p,s) p##_sse2_##s
# endif
-# undef weak_alias
-# define weak_alias(original, alias)
-# undef strong_alias
-# define strong_alias(ignored1, ignored2)
-#endif
+# ifndef WMEMSET_SYMBOL
+# define WMEMSET_SYMBOL(p,s) p##_sse2_##s
+# endif
-#include
+# include "memset-vec-unaligned-erms.S"
+
+#endif
diff --git a/sysdeps/x86_64/multiarch/rtld-memset.S b/sysdeps/x86_64/multiarch/rtld-memset.S
new file mode 100644
index 0000000000..d912bfa7cc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rtld-memset.S
@@ -0,0 +1,18 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include "../memset.S"