mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-28 00:21:52 +03:00
malloc: Revert fastbins to old-style atomics
Commit 6923f6db1e
("malloc: Use current
(C11-style) atomics for fastbin access") caused a substantial
performance regression on POWER and Aarch64, and the old atomics,
while hard to prove correct, seem to work in practice.
This commit is contained in:
14
ChangeLog
14
ChangeLog
@ -1,3 +1,17 @@
|
|||||||
|
2019-01-18 Florian Weimer <fweimer@redhat.com>
|
||||||
|
|
||||||
|
malloc: Revert commit 6923f6db1e688dedcf3a6556da76e0bf24a41872
|
||||||
|
("malloc: Use current (C11-style) atomics for fastbin access").
|
||||||
|
This commit introduces a substantial performance regression on
|
||||||
|
POWER and Aarch64.
|
||||||
|
* malloc/malloc.c (fastbin_push_entry, fastbin_pop_entry): Remove.
|
||||||
|
(REMOVE_FB): Define.
|
||||||
|
(_int_malloc): Use it and reindent.
|
||||||
|
(_int_free): Use CAS loop with
|
||||||
|
catomic_compare_and_exchange_val_rel.
|
||||||
|
(malloc_consolidate): Use atomic_exchange_acq.
|
||||||
|
|
||||||
|
|
||||||
2019-01-18 H.J. Lu <hongjiu.lu@intel.com>
|
2019-01-18 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
* signal/Makefile (LDFLAGS-tst-minsigstksz-1): New. Set to
|
* signal/Makefile (LDFLAGS-tst-minsigstksz-1): New. Set to
|
||||||
|
166
malloc/malloc.c
166
malloc/malloc.c
@ -1316,78 +1316,6 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|||||||
#define set_foot(p, s) (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
|
#define set_foot(p, s) (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
|
||||||
|
|
||||||
|
|
||||||
/* Add an item to the atomic fastbin list at *ROOT. Returns the old
|
|
||||||
value at *ROOT. Note that properties of the old chunk are only
|
|
||||||
stable if the caller has acquired the arena lock. With out the
|
|
||||||
lock, it can be deallocated at any time. */
|
|
||||||
static inline struct malloc_chunk *
|
|
||||||
fastbin_push_entry (struct malloc_chunk **root, struct malloc_chunk *e)
|
|
||||||
{
|
|
||||||
struct malloc_chunk *head;
|
|
||||||
if (SINGLE_THREAD_P)
|
|
||||||
{
|
|
||||||
/* Check that the top of the bin is not the record we are going
|
|
||||||
to add (i.e., double free). */
|
|
||||||
head = *root;
|
|
||||||
if (head == e)
|
|
||||||
malloc_printerr ("double free or corruption (fasttop)");
|
|
||||||
e->fd = head;
|
|
||||||
*root = e;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
do
|
|
||||||
{
|
|
||||||
/* Synchronize with the release release MO CAS below. We do
|
|
||||||
not need synchronization locally, but fastbin_pop_entry and
|
|
||||||
(especially) malloc_consolidate read the entire list after
|
|
||||||
synchronizing on the head, so we need to make sure that the
|
|
||||||
writes to the next (fd) pointers have happened. */
|
|
||||||
head = atomic_load_acquire (root);
|
|
||||||
/* Check that the top of the bin is not the record we are
|
|
||||||
going to add (i.e., double free). */
|
|
||||||
if (head == e)
|
|
||||||
malloc_printerr ("double free or corruption (fasttop)");
|
|
||||||
e->fd = head;
|
|
||||||
}
|
|
||||||
/* Synchronizes with the acquire MO CAS in */
|
|
||||||
while (!atomic_compare_exchange_weak_release (root, &head, e));
|
|
||||||
return head;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Remove an item from the atomic fastbin list at *ROOT. The caller
|
|
||||||
must have acquired the arena lock. */
|
|
||||||
static inline struct malloc_chunk *
|
|
||||||
fastbin_pop_entry (struct malloc_chunk **root)
|
|
||||||
{
|
|
||||||
struct malloc_chunk *head;
|
|
||||||
if (SINGLE_THREAD_P)
|
|
||||||
{
|
|
||||||
head = *root;
|
|
||||||
if (head != NULL)
|
|
||||||
*root = head->fd;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Synchromizes with the release MO store in fastbin_push_entry.
|
|
||||||
Synchronization is needed because we read the next list
|
|
||||||
pointer. */
|
|
||||||
head = atomic_load_acquire (root);
|
|
||||||
struct malloc_chunk *tail;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
if (head == NULL)
|
|
||||||
return NULL;
|
|
||||||
tail = head->fd;
|
|
||||||
}
|
|
||||||
/* Synchronizes with the release MO store in fastbin_push_entry.
|
|
||||||
We do not have an ABA issue here because the caller has
|
|
||||||
acquired the arena lock, which ensures that there is only one
|
|
||||||
thread which removes elements from this list. */
|
|
||||||
while (!atomic_compare_exchange_weak_acquire (root, &head, tail));
|
|
||||||
}
|
|
||||||
return head;
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma GCC poison mchunk_size
|
#pragma GCC poison mchunk_size
|
||||||
#pragma GCC poison mchunk_prev_size
|
#pragma GCC poison mchunk_prev_size
|
||||||
|
|
||||||
@ -3648,36 +3576,63 @@ _int_malloc (mstate av, size_t bytes)
|
|||||||
can try it without checking, which saves some time on this fast path.
|
can try it without checking, which saves some time on this fast path.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define REMOVE_FB(fb, victim, pp) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
victim = pp; \
|
||||||
|
if (victim == NULL) \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
|
||||||
|
!= victim); \
|
||||||
|
|
||||||
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
|
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
|
||||||
{
|
{
|
||||||
idx = fastbin_index (nb);
|
idx = fastbin_index (nb);
|
||||||
mfastbinptr *fb = &fastbin (av, idx);
|
mfastbinptr *fb = &fastbin (av, idx);
|
||||||
victim = fastbin_pop_entry (fb);
|
mchunkptr pp;
|
||||||
|
victim = *fb;
|
||||||
|
|
||||||
if (victim != NULL)
|
if (victim != NULL)
|
||||||
{
|
{
|
||||||
size_t victim_idx = fastbin_index (chunksize (victim));
|
if (SINGLE_THREAD_P)
|
||||||
if (victim_idx != idx)
|
*fb = victim->fd;
|
||||||
malloc_printerr ("malloc(): memory corruption (fast)");
|
else
|
||||||
check_remalloced_chunk (av, victim, nb);
|
REMOVE_FB (fb, pp, victim);
|
||||||
#if USE_TCACHE
|
if (__glibc_likely (victim != NULL))
|
||||||
/* While we're here, if we see other chunks of the same size,
|
|
||||||
stash them in the tcache. */
|
|
||||||
size_t tc_idx = csize2tidx (nb);
|
|
||||||
if (tcache && tc_idx < mp_.tcache_bins)
|
|
||||||
{
|
{
|
||||||
/* While bin not empty and tcache not full, copy chunks. */
|
size_t victim_idx = fastbin_index (chunksize (victim));
|
||||||
while (tcache->counts[tc_idx] < mp_.tcache_count)
|
if (__builtin_expect (victim_idx != idx, 0))
|
||||||
|
malloc_printerr ("malloc(): memory corruption (fast)");
|
||||||
|
check_remalloced_chunk (av, victim, nb);
|
||||||
|
#if USE_TCACHE
|
||||||
|
/* While we're here, if we see other chunks of the same size,
|
||||||
|
stash them in the tcache. */
|
||||||
|
size_t tc_idx = csize2tidx (nb);
|
||||||
|
if (tcache && tc_idx < mp_.tcache_bins)
|
||||||
{
|
{
|
||||||
mchunkptr tc_victim = fastbin_pop_entry (fb);
|
mchunkptr tc_victim;
|
||||||
if (tc_victim == NULL)
|
|
||||||
break;
|
/* While bin not empty and tcache not full, copy chunks. */
|
||||||
tcache_put (tc_victim, tc_idx);
|
while (tcache->counts[tc_idx] < mp_.tcache_count
|
||||||
|
&& (tc_victim = *fb) != NULL)
|
||||||
|
{
|
||||||
|
if (SINGLE_THREAD_P)
|
||||||
|
*fb = tc_victim->fd;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
REMOVE_FB (fb, pp, tc_victim);
|
||||||
|
if (__glibc_unlikely (tc_victim == NULL))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
tcache_put (tc_victim, tc_idx);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
void *p = chunk2mem (victim);
|
void *p = chunk2mem (victim);
|
||||||
alloc_perturb (p, bytes);
|
alloc_perturb (p, bytes);
|
||||||
return p;
|
return p;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4309,7 +4264,28 @@ _int_free (mstate av, mchunkptr p, int have_lock)
|
|||||||
fb = &fastbin (av, idx);
|
fb = &fastbin (av, idx);
|
||||||
|
|
||||||
/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
|
/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
|
||||||
mchunkptr old = fastbin_push_entry (fb, p);
|
mchunkptr old = *fb, old2;
|
||||||
|
|
||||||
|
if (SINGLE_THREAD_P)
|
||||||
|
{
|
||||||
|
/* Check that the top of the bin is not the record we are going to
|
||||||
|
add (i.e., double free). */
|
||||||
|
if (__builtin_expect (old == p, 0))
|
||||||
|
malloc_printerr ("double free or corruption (fasttop)");
|
||||||
|
p->fd = old;
|
||||||
|
*fb = p;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
do
|
||||||
|
{
|
||||||
|
/* Check that the top of the bin is not the record we are going to
|
||||||
|
add (i.e., double free). */
|
||||||
|
if (__builtin_expect (old == p, 0))
|
||||||
|
malloc_printerr ("double free or corruption (fasttop)");
|
||||||
|
p->fd = old2 = old;
|
||||||
|
}
|
||||||
|
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
|
||||||
|
!= old2);
|
||||||
|
|
||||||
/* Check that size of fastbin chunk at the top is the same as
|
/* Check that size of fastbin chunk at the top is the same as
|
||||||
size of the chunk that we are adding. We can dereference OLD
|
size of the chunk that we are adding. We can dereference OLD
|
||||||
@ -4500,9 +4476,7 @@ static void malloc_consolidate(mstate av)
|
|||||||
maxfb = &fastbin (av, NFASTBINS - 1);
|
maxfb = &fastbin (av, NFASTBINS - 1);
|
||||||
fb = &fastbin (av, 0);
|
fb = &fastbin (av, 0);
|
||||||
do {
|
do {
|
||||||
/* Synchronizes with the release MO store in
|
p = atomic_exchange_acq (fb, NULL);
|
||||||
fastbin_push_entry. */
|
|
||||||
p = atomic_exchange_acquire (fb, NULL);
|
|
||||||
if (p != 0) {
|
if (p != 0) {
|
||||||
do {
|
do {
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user