1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-08-07 06:43:00 +03:00

<sys/platform/x86.h>: Remove the C preprocessor magic

In <sys/platform/x86.h>, define CPU features as enum instead of using
the C preprocessor magic to make it easier to wrap this functionality
in other languages.  Move the C preprocessor magic to internal header
for better GCC codegen when more than one features are checked in a
single expression as in x86-64 dl-hwcaps-subdirs.c.

1. Rename COMMON_CPUID_INDEX_XXX to CPUID_INDEX_XXX.
2. Move CPUID_INDEX_MAX to sysdeps/x86/include/cpu-features.h.
3. Remove struct cpu_features and __x86_get_cpu_features from
<sys/platform/x86.h>.
4. Add __x86_get_cpuid_feature_leaf to <sys/platform/x86.h> and put it
in libc.
5. Make __get_cpu_features() private to glibc.
6. Replace __x86_get_cpu_features(N) with __get_cpu_features().
7. Add _dl_x86_get_cpu_features to GLIBC_PRIVATE.
8. Use a single enum index for each CPU feature detection.
9. Pass the CPUID feature leaf to __x86_get_cpuid_feature_leaf.
10. Return zero struct cpuid_feature for the older glibc binary with a
smaller CPUID_INDEX_MAX [BZ #27104].
11. Inside glibc, use the C preprocessor magic so that cpu_features data
can be loaded just once leading to more compact code for glibc.

256 bits are used for each CPUID leaf.  Some leaves only contain a few
features.  We can add exceptions to such leaves.  But it will increase
code sizes and it is harder to provide backward/forward compatibilities
when new features are added to such leaves in the future.

When new leaves are added, _rtld_global_ro offsets will change which
leads to race condition during in-place updates. We may avoid in-place
updates by

1. Rename the old glibc.
2. Install the new glibc.
3. Remove the old glibc.

NB: A function, __x86_get_cpuid_feature_leaf , is used to avoid the copy
relocation issue with IFUNC resolver as shown in IFUNC resolver tests.
This commit is contained in:
H.J. Lu
2020-12-25 07:30:46 -08:00
parent d7ee6bd8c2
commit ff6d62e9ed
24 changed files with 1165 additions and 850 deletions

View File

@@ -293,22 +293,22 @@ get_extended_indices (struct cpu_features *cpu_features)
__cpuid (0x80000000, eax, ebx, ecx, edx);
if (eax >= 0x80000001)
__cpuid (0x80000001,
cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.eax,
cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ebx,
cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ecx,
cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.edx);
cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
if (eax >= 0x80000007)
__cpuid (0x80000007,
cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.eax,
cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ebx,
cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ecx,
cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.edx);
cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
if (eax >= 0x80000008)
__cpuid (0x80000008,
cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.eax,
cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ebx,
cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ecx,
cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.edx);
cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
}
static void
@@ -320,10 +320,10 @@ get_common_indices (struct cpu_features *cpu_features,
{
unsigned int eax;
__cpuid (1, eax,
cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx,
cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx,
cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.edx);
cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.eax = eax;
cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
cpu_features->features[CPUID_INDEX_1].cpuid.edx);
cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
*family = (eax >> 8) & 0x0f;
*model = (eax >> 4) & 0x0f;
*extended_model = (eax >> 12) & 0xf0;
@@ -338,30 +338,30 @@ get_common_indices (struct cpu_features *cpu_features,
if (cpu_features->basic.max_cpuid >= 7)
{
__cpuid_count (7, 0,
cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.eax,
cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ebx,
cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ecx,
cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.edx);
cpu_features->features[CPUID_INDEX_7].cpuid.eax,
cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
cpu_features->features[CPUID_INDEX_7].cpuid.edx);
__cpuid_count (7, 1,
cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.eax,
cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ebx,
cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ecx,
cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.edx);
cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
}
if (cpu_features->basic.max_cpuid >= 0xd)
__cpuid_count (0xd, 1,
cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.eax,
cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ebx,
cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ecx,
cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.edx);
cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
if (cpu_features->basic.max_cpuid >= 0x19)
__cpuid_count (0x19, 0,
cpu_features->features[COMMON_CPUID_INDEX_19].cpuid.eax,
cpu_features->features[COMMON_CPUID_INDEX_19].cpuid.ebx,
cpu_features->features[COMMON_CPUID_INDEX_19].cpuid.ecx,
cpu_features->features[COMMON_CPUID_INDEX_19].cpuid.edx);
cpu_features->features[CPUID_INDEX_19].cpuid.eax,
cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
cpu_features->features[CPUID_INDEX_19].cpuid.edx);
}
_Static_assert (((index_arch_Fast_Unaligned_Load
@@ -536,11 +536,11 @@ init_cpu_features (struct cpu_features *cpu_features)
update_usable (cpu_features);
ecx = cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx;
ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
{
/* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and
/* Since the FMA4 bit is in CPUID_INDEX_80000001 and
FMA4 requires AVX, determine if FMA4 is usable here. */
CPU_FEATURE_SET_USABLE (cpu_features, FMA4);
}