1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00

malloc: Revert fastbins to old-style atomics

Commit 6923f6db1e ("malloc: Use current
(C11-style) atomics for fastbin access") caused a substantial
performance regression on POWER and Aarch64, and the old atomics,
while hard to prove correct, seem to work in practice.
This commit is contained in:
Florian Weimer
2019-01-18 22:38:32 +01:00
parent 562f43620d
commit 71effcea34
2 changed files with 84 additions and 96 deletions

View File

@ -1,3 +1,17 @@
2019-01-18 Florian Weimer <fweimer@redhat.com>
malloc: Revert commit 6923f6db1e688dedcf3a6556da76e0bf24a41872
("malloc: Use current (C11-style) atomics for fastbin access").
This commit introduces a substantial performance regression on
POWER and Aarch64.
* malloc/malloc.c (fastbin_push_entry, fastbin_pop_entry): Remove.
(REMOVE_FB): Define.
(_int_malloc): Use it and reindent.
(_int_free): Use CAS loop with
catomic_compare_and_exchange_val_rel.
(malloc_consolidate): Use atomic_exchange_acq.
2019-01-18 H.J. Lu <hongjiu.lu@intel.com> 2019-01-18 H.J. Lu <hongjiu.lu@intel.com>
* signal/Makefile (LDFLAGS-tst-minsigstksz-1): New. Set to * signal/Makefile (LDFLAGS-tst-minsigstksz-1): New. Set to

View File

@ -1316,78 +1316,6 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#define set_foot(p, s) (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s)) #define set_foot(p, s) (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
/* Add an item to the atomic fastbin list at *ROOT. Returns the old
value at *ROOT. Note that properties of the old chunk are only
stable if the caller has acquired the arena lock. With out the
lock, it can be deallocated at any time. */
static inline struct malloc_chunk *
fastbin_push_entry (struct malloc_chunk **root, struct malloc_chunk *e)
{
struct malloc_chunk *head;
if (SINGLE_THREAD_P)
{
/* Check that the top of the bin is not the record we are going
to add (i.e., double free). */
head = *root;
if (head == e)
malloc_printerr ("double free or corruption (fasttop)");
e->fd = head;
*root = e;
}
else
do
{
/* Synchronize with the release release MO CAS below. We do
not need synchronization locally, but fastbin_pop_entry and
(especially) malloc_consolidate read the entire list after
synchronizing on the head, so we need to make sure that the
writes to the next (fd) pointers have happened. */
head = atomic_load_acquire (root);
/* Check that the top of the bin is not the record we are
going to add (i.e., double free). */
if (head == e)
malloc_printerr ("double free or corruption (fasttop)");
e->fd = head;
}
/* Synchronizes with the acquire MO CAS in */
while (!atomic_compare_exchange_weak_release (root, &head, e));
return head;
}
/* Remove an item from the atomic fastbin list at *ROOT. The caller
must have acquired the arena lock. */
static inline struct malloc_chunk *
fastbin_pop_entry (struct malloc_chunk **root)
{
struct malloc_chunk *head;
if (SINGLE_THREAD_P)
{
head = *root;
if (head != NULL)
*root = head->fd;
}
else
{
/* Synchromizes with the release MO store in fastbin_push_entry.
Synchronization is needed because we read the next list
pointer. */
head = atomic_load_acquire (root);
struct malloc_chunk *tail;
do
{
if (head == NULL)
return NULL;
tail = head->fd;
}
/* Synchronizes with the release MO store in fastbin_push_entry.
We do not have an ABA issue here because the caller has
acquired the arena lock, which ensures that there is only one
thread which removes elements from this list. */
while (!atomic_compare_exchange_weak_acquire (root, &head, tail));
}
return head;
}
#pragma GCC poison mchunk_size #pragma GCC poison mchunk_size
#pragma GCC poison mchunk_prev_size #pragma GCC poison mchunk_prev_size
@ -3648,36 +3576,63 @@ _int_malloc (mstate av, size_t bytes)
can try it without checking, which saves some time on this fast path. can try it without checking, which saves some time on this fast path.
*/ */
#define REMOVE_FB(fb, victim, pp) \
do \
{ \
victim = pp; \
if (victim == NULL) \
break; \
} \
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
!= victim); \
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ())) if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{ {
idx = fastbin_index (nb); idx = fastbin_index (nb);
mfastbinptr *fb = &fastbin (av, idx); mfastbinptr *fb = &fastbin (av, idx);
victim = fastbin_pop_entry (fb); mchunkptr pp;
victim = *fb;
if (victim != NULL) if (victim != NULL)
{ {
size_t victim_idx = fastbin_index (chunksize (victim)); if (SINGLE_THREAD_P)
if (victim_idx != idx) *fb = victim->fd;
malloc_printerr ("malloc(): memory corruption (fast)"); else
check_remalloced_chunk (av, victim, nb); REMOVE_FB (fb, pp, victim);
#if USE_TCACHE if (__glibc_likely (victim != NULL))
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{ {
/* While bin not empty and tcache not full, copy chunks. */ size_t victim_idx = fastbin_index (chunksize (victim));
while (tcache->counts[tc_idx] < mp_.tcache_count) if (__builtin_expect (victim_idx != idx, 0))
malloc_printerr ("malloc(): memory corruption (fast)");
check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{ {
mchunkptr tc_victim = fastbin_pop_entry (fb); mchunkptr tc_victim;
if (tc_victim == NULL)
break; /* While bin not empty and tcache not full, copy chunks. */
tcache_put (tc_victim, tc_idx); while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
} }
}
#endif #endif
void *p = chunk2mem (victim); void *p = chunk2mem (victim);
alloc_perturb (p, bytes); alloc_perturb (p, bytes);
return p; return p;
}
} }
} }
@ -4309,7 +4264,28 @@ _int_free (mstate av, mchunkptr p, int have_lock)
fb = &fastbin (av, idx); fb = &fastbin (av, idx);
/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */ /* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = fastbin_push_entry (fb, p); mchunkptr old = *fb, old2;
if (SINGLE_THREAD_P)
{
/* Check that the top of the bin is not the record we are going to
add (i.e., double free). */
if (__builtin_expect (old == p, 0))
malloc_printerr ("double free or corruption (fasttop)");
p->fd = old;
*fb = p;
}
else
do
{
/* Check that the top of the bin is not the record we are going to
add (i.e., double free). */
if (__builtin_expect (old == p, 0))
malloc_printerr ("double free or corruption (fasttop)");
p->fd = old2 = old;
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
!= old2);
/* Check that size of fastbin chunk at the top is the same as /* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD size of the chunk that we are adding. We can dereference OLD
@ -4500,9 +4476,7 @@ static void malloc_consolidate(mstate av)
maxfb = &fastbin (av, NFASTBINS - 1); maxfb = &fastbin (av, NFASTBINS - 1);
fb = &fastbin (av, 0); fb = &fastbin (av, 0);
do { do {
/* Synchronizes with the release MO store in p = atomic_exchange_acq (fb, NULL);
fastbin_push_entry. */
p = atomic_exchange_acquire (fb, NULL);
if (p != 0) { if (p != 0) {
do { do {
{ {