mirror of
https://sourceware.org/git/glibc.git
synced 2025-09-11 12:10:50 +03:00
x86: Optimizing memcpy for AMD Zen architecture.
Modifying the shareable cache '__x86_shared_cache_size', which is a factor in computing the non-temporal threshold parameter '__x86_shared_non_temporal_threshold' to optimize memcpy for AMD Zen architectures. In the existing implementation, the shareable cache is computed as 'L3 per thread, L2 per core'. Recomputing this shareable cache as 'L3 per CCX(Core-Complex)' has brought in performance gains. As per the large bench variant results, this patch also addresses the regression problem on AMD Zen architectures. Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
This commit is contained in:
committed by
Florian Weimer
parent
641a124845
commit
59803e81f9
@@ -320,7 +320,7 @@ init_cacheinfo (void)
|
|||||||
threads = 1 << ((ecx >> 12) & 0x0f);
|
threads = 1 << ((ecx >> 12) & 0x0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (threads == 0)
|
if (threads == 0 || cpu_features->basic.family >= 0x17)
|
||||||
{
|
{
|
||||||
/* If APIC ID width is not available, use logical
|
/* If APIC ID width is not available, use logical
|
||||||
processor count. */
|
processor count. */
|
||||||
@@ -335,13 +335,30 @@ init_cacheinfo (void)
|
|||||||
if (threads > 0)
|
if (threads > 0)
|
||||||
shared /= threads;
|
shared /= threads;
|
||||||
|
|
||||||
/* Account for exclusive L2 and L3 caches. */
|
/* Get shared cache per ccx for Zen architectures. */
|
||||||
shared += core;
|
if (cpu_features->basic.family >= 0x17)
|
||||||
}
|
{
|
||||||
|
unsigned int eax;
|
||||||
|
|
||||||
|
/* Get number of threads share the L3 cache in CCX. */
|
||||||
|
__cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
|
||||||
|
|
||||||
|
unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
|
||||||
|
shared *= threads_per_ccx;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Account for exclusive L2 and L3 caches. */
|
||||||
|
shared += core;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cpu_features->data_cache_size != 0)
|
if (cpu_features->data_cache_size != 0)
|
||||||
data = cpu_features->data_cache_size;
|
{
|
||||||
|
if (data == 0 || cpu_features->basic.kind != arch_kind_amd)
|
||||||
|
data = cpu_features->data_cache_size;
|
||||||
|
}
|
||||||
|
|
||||||
if (data > 0)
|
if (data > 0)
|
||||||
{
|
{
|
||||||
@@ -354,7 +371,10 @@ init_cacheinfo (void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (cpu_features->shared_cache_size != 0)
|
if (cpu_features->shared_cache_size != 0)
|
||||||
shared = cpu_features->shared_cache_size;
|
{
|
||||||
|
if (shared == 0 || cpu_features->basic.kind != arch_kind_amd)
|
||||||
|
shared = cpu_features->shared_cache_size;
|
||||||
|
}
|
||||||
|
|
||||||
if (shared > 0)
|
if (shared > 0)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user