1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-08-01 10:06:57 +03:00

x86: Make the divisor in setting non_temporal_threshold cpu specific

Different systems prefer a different divisors.

From benchmarks[1] so far the following divisors have been found:
    ICX     : 2
    SKX     : 2
    BWD     : 8

For Intel, we are generalizing that BWD and older prefers 8 as a
divisor, and SKL and newer prefers 2. This number can be further tuned
as benchmarks are run.

[1]: https://github.com/goldsteinn/memcpy-nt-benchmarks
Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
Noah Goldstein
2023-06-07 13:18:03 -05:00
parent f193ea20ed
commit 180897c161
4 changed files with 51 additions and 26 deletions

View File

@ -113,8 +113,11 @@ _dl_diagnostics_cpu (void)
cpu_features->level3_cache_linesize);
print_cpu_features_value ("level4_cache_size",
cpu_features->level4_cache_size);
_Static_assert (offsetof (struct cpu_features, level4_cache_size)
+ sizeof (cpu_features->level4_cache_size)
== sizeof (*cpu_features),
"last cpu_features field has been printed");
print_cpu_features_value ("cachesize_non_temporal_divisor",
cpu_features->cachesize_non_temporal_divisor);
_Static_assert (
offsetof (struct cpu_features, cachesize_non_temporal_divisor)
+ sizeof (cpu_features->cachesize_non_temporal_divisor)
== sizeof (*cpu_features),
"last cpu_features field has been printed");
}