mirror of
https://sourceware.org/git/glibc.git
synced 2025-08-01 10:06:57 +03:00
x86: Make the divisor in setting non_temporal_threshold
cpu specific
Different systems prefer a different divisors. From benchmarks[1] so far the following divisors have been found: ICX : 2 SKX : 2 BWD : 8 For Intel, we are generalizing that BWD and older prefers 8 as a divisor, and SKL and newer prefers 2. This number can be further tuned as benchmarks are run. [1]: https://github.com/goldsteinn/memcpy-nt-benchmarks Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
@ -113,8 +113,11 @@ _dl_diagnostics_cpu (void)
|
||||
cpu_features->level3_cache_linesize);
|
||||
print_cpu_features_value ("level4_cache_size",
|
||||
cpu_features->level4_cache_size);
|
||||
_Static_assert (offsetof (struct cpu_features, level4_cache_size)
|
||||
+ sizeof (cpu_features->level4_cache_size)
|
||||
== sizeof (*cpu_features),
|
||||
"last cpu_features field has been printed");
|
||||
print_cpu_features_value ("cachesize_non_temporal_divisor",
|
||||
cpu_features->cachesize_non_temporal_divisor);
|
||||
_Static_assert (
|
||||
offsetof (struct cpu_features, cachesize_non_temporal_divisor)
|
||||
+ sizeof (cpu_features->cachesize_non_temporal_divisor)
|
||||
== sizeof (*cpu_features),
|
||||
"last cpu_features field has been printed");
|
||||
}
|
||||
|
Reference in New Issue
Block a user