mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
Restore LF_BACKOFF
Moved InnoDB UT_RELAX_CPU() to server. Restored cross-platform LF_BACKOFF implementation basing on UT_RELAX_CPU().
This commit is contained in:
@@ -90,37 +90,7 @@ C_MODE_END
|
|||||||
ret= 0; /* avoid compiler warning */ \
|
ret= 0; /* avoid compiler warning */ \
|
||||||
ret= IL_COMP_EXCHG ## S (a, ret, ret);
|
ret= IL_COMP_EXCHG ## S (a, ret, ret);
|
||||||
#endif
|
#endif
|
||||||
/*
|
|
||||||
my_yield_processor (equivalent of x86 PAUSE instruction) should be used
|
|
||||||
to improve performance on hyperthreaded CPUs. Intel recommends to use it in
|
|
||||||
spin loops also on non-HT machines to reduce power consumption (see e.g
|
|
||||||
http://softwarecommunity.intel.com/articles/eng/2004.htm)
|
|
||||||
|
|
||||||
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
|
|
||||||
and YieldProcessor shows that much better performance is achieved by calling
|
|
||||||
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
|
|
||||||
loop count in the range 200-300 brought best results.
|
|
||||||
*/
|
|
||||||
#ifndef YIELD_LOOPS
|
|
||||||
#define YIELD_LOOPS 200
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static __inline int my_yield_processor()
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for(i=0; i<YIELD_LOOPS; i++)
|
|
||||||
{
|
|
||||||
#if (_MSC_VER <= 1310)
|
|
||||||
/* On older compilers YieldProcessor is not available, use inline assembly*/
|
|
||||||
__asm { rep nop }
|
|
||||||
#else
|
|
||||||
YieldProcessor();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define LF_BACKOFF my_yield_processor()
|
|
||||||
#else /* cleanup */
|
#else /* cleanup */
|
||||||
|
|
||||||
#undef IL_EXCHG_ADD32
|
#undef IL_EXCHG_ADD32
|
||||||
|
@@ -17,6 +17,7 @@
|
|||||||
#define INCLUDE_LF_INCLUDED
|
#define INCLUDE_LF_INCLUDED
|
||||||
|
|
||||||
#include <my_atomic.h>
|
#include <my_atomic.h>
|
||||||
|
#include <my_cpu.h>
|
||||||
|
|
||||||
C_MODE_START
|
C_MODE_START
|
||||||
|
|
||||||
|
@@ -346,15 +346,6 @@ make_atomic_store(ptr)
|
|||||||
#undef make_atomic_fas_body
|
#undef make_atomic_fas_body
|
||||||
#undef intptr
|
#undef intptr
|
||||||
|
|
||||||
/*
|
|
||||||
the macro below defines (as an expression) the code that
|
|
||||||
will be run in spin-loops. Intel manuals recummend to have PAUSE there.
|
|
||||||
It is expected to be defined in include/atomic/ *.h files
|
|
||||||
*/
|
|
||||||
#ifndef LF_BACKOFF
|
|
||||||
#define LF_BACKOFF (1)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MY_ATOMIC_OK 0
|
#define MY_ATOMIC_OK 0
|
||||||
#define MY_ATOMIC_NOT_1CPU 1
|
#define MY_ATOMIC_NOT_1CPU 1
|
||||||
extern int my_atomic_initialize();
|
extern int my_atomic_initialize();
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
#ifndef MY_CPU_INCLUDED
|
||||||
|
#define MY_CPU_INCLUDED
|
||||||
/* Copyright (c) 2013, MariaDB foundation Ab and SkySQL
|
/* Copyright (c) 2013, MariaDB foundation Ab and SkySQL
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
@@ -42,3 +44,58 @@
|
|||||||
#define HMT_medium_high()
|
#define HMT_medium_high()
|
||||||
#define HMT_high()
|
#define HMT_high()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static inline void MY_RELAX_CPU(void)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_PAUSE_INSTRUCTION
|
||||||
|
/*
|
||||||
|
According to the gcc info page, asm volatile means that the
|
||||||
|
instruction has important side-effects and must not be removed.
|
||||||
|
Also asm volatile may trigger a memory barrier (spilling all registers
|
||||||
|
to memory).
|
||||||
|
*/
|
||||||
|
#ifdef __SUNPRO_CC
|
||||||
|
asm ("pause" );
|
||||||
|
#else
|
||||||
|
__asm__ __volatile__ ("pause");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
|
||||||
|
__asm__ __volatile__ ("rep; nop");
|
||||||
|
#elif defined _WIN32
|
||||||
|
/*
|
||||||
|
In the Win32 API, the x86 PAUSE instruction is executed by calling
|
||||||
|
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
|
||||||
|
independent way by using YieldProcessor.
|
||||||
|
*/
|
||||||
|
YieldProcessor();
|
||||||
|
#elif defined(_ARCH_PWR8)
|
||||||
|
__ppc_get_timebase();
|
||||||
|
#else
|
||||||
|
int32 var, oldval = 0;
|
||||||
|
my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED,
|
||||||
|
MY_MEMORY_ORDER_RELAXED);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
|
||||||
|
recommends to use it in spin loops also on non-HT machines to reduce power
|
||||||
|
consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
|
||||||
|
|
||||||
|
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
|
||||||
|
and YieldProcessor shows that much better performance is achieved by calling
|
||||||
|
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
|
||||||
|
loop count in the range 200-300 brought best results.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline int LF_BACKOFF(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i= 0; i < 200; i++)
|
||||||
|
MY_RELAX_CPU();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@@ -430,7 +430,7 @@ static void alloc_free(uchar *first,
|
|||||||
{
|
{
|
||||||
anext_node(last)= tmp.node;
|
anext_node(last)= tmp.node;
|
||||||
} while (!my_atomic_casptr((void **)(char *)&allocator->top,
|
} while (!my_atomic_casptr((void **)(char *)&allocator->top,
|
||||||
(void **)&tmp.ptr, first) && LF_BACKOFF);
|
(void **)&tmp.ptr, first) && LF_BACKOFF());
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -501,7 +501,7 @@ void *lf_alloc_new(LF_PINS *pins)
|
|||||||
{
|
{
|
||||||
node= allocator->top;
|
node= allocator->top;
|
||||||
lf_pin(pins, 0, node);
|
lf_pin(pins, 0, node);
|
||||||
} while (node != allocator->top && LF_BACKOFF);
|
} while (node != allocator->top && LF_BACKOFF());
|
||||||
if (!node)
|
if (!node)
|
||||||
{
|
{
|
||||||
node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
|
node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
|
||||||
|
@@ -102,7 +102,7 @@ retry:
|
|||||||
do { /* PTR() isn't necessary below, head is a dummy node */
|
do { /* PTR() isn't necessary below, head is a dummy node */
|
||||||
cursor->curr= (LF_SLIST *)(*cursor->prev);
|
cursor->curr= (LF_SLIST *)(*cursor->prev);
|
||||||
lf_pin(pins, 1, cursor->curr);
|
lf_pin(pins, 1, cursor->curr);
|
||||||
} while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
|
} while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF());
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
@@ -117,7 +117,7 @@ retry:
|
|||||||
link= cursor->curr->link;
|
link= cursor->curr->link;
|
||||||
cursor->next= PTR(link);
|
cursor->next= PTR(link);
|
||||||
lf_pin(pins, 0, cursor->next);
|
lf_pin(pins, 0, cursor->next);
|
||||||
} while (link != cursor->curr->link && LF_BACKOFF);
|
} while (link != cursor->curr->link && LF_BACKOFF());
|
||||||
|
|
||||||
if (!DELETED(link))
|
if (!DELETED(link))
|
||||||
{
|
{
|
||||||
@@ -145,7 +145,7 @@ retry:
|
|||||||
and remove this deleted node
|
and remove this deleted node
|
||||||
*/
|
*/
|
||||||
if (my_atomic_casptr((void **) cursor->prev,
|
if (my_atomic_casptr((void **) cursor->prev,
|
||||||
(void **) &cursor->curr, cursor->next) && LF_BACKOFF)
|
(void **) &cursor->curr, cursor->next) && LF_BACKOFF())
|
||||||
lf_alloc_free(pins, cursor->curr);
|
lf_alloc_free(pins, cursor->curr);
|
||||||
else
|
else
|
||||||
goto retry;
|
goto retry;
|
||||||
|
@@ -617,7 +617,7 @@ retry:
|
|||||||
{
|
{
|
||||||
rc= *shared_ptr;
|
rc= *shared_ptr;
|
||||||
lf_pin(arg->thd->pins, 0, rc);
|
lf_pin(arg->thd->pins, 0, rc);
|
||||||
} while (rc != *shared_ptr && LF_BACKOFF);
|
} while (rc != *shared_ptr && LF_BACKOFF());
|
||||||
|
|
||||||
if (rc == 0)
|
if (rc == 0)
|
||||||
{
|
{
|
||||||
|
@@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov
|
|||||||
#include "univ.i"
|
#include "univ.i"
|
||||||
|
|
||||||
#include "ut0ut.h"
|
#include "ut0ut.h"
|
||||||
|
#include "my_cpu.h"
|
||||||
|
|
||||||
/** Execute a given function exactly once in a multi-threaded environment
|
/** Execute a given function exactly once in a multi-threaded environment
|
||||||
or wait for the function to be executed by another thread.
|
or wait for the function to be executed by another thread.
|
||||||
@@ -110,7 +111,7 @@ public:
|
|||||||
ut_error;
|
ut_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
UT_RELAX_CPU();
|
MY_RELAX_CPU();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -52,35 +52,6 @@ Created 1/20/1994 Heikki Tuuri
|
|||||||
/** Time stamp */
|
/** Time stamp */
|
||||||
typedef time_t ib_time_t;
|
typedef time_t ib_time_t;
|
||||||
|
|
||||||
#ifdef HAVE_PAUSE_INSTRUCTION
|
|
||||||
/* According to the gcc info page, asm volatile means that the
|
|
||||||
instruction has important side-effects and must not be removed.
|
|
||||||
Also asm volatile may trigger a memory barrier (spilling all registers
|
|
||||||
to memory). */
|
|
||||||
# ifdef __SUNPRO_CC
|
|
||||||
# define UT_RELAX_CPU() asm ("pause" )
|
|
||||||
# else
|
|
||||||
# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
|
|
||||||
# endif /* __SUNPRO_CC */
|
|
||||||
|
|
||||||
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
|
|
||||||
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
|
|
||||||
#elif defined _WIN32
|
|
||||||
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
|
|
||||||
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
|
|
||||||
independent way by using YieldProcessor. */
|
|
||||||
# define UT_RELAX_CPU() YieldProcessor()
|
|
||||||
#elif defined(__powerpc__) && defined __GLIBC__
|
|
||||||
# include <sys/platform/ppc.h>
|
|
||||||
# define UT_RELAX_CPU() __ppc_get_timebase()
|
|
||||||
#else
|
|
||||||
# define UT_RELAX_CPU() do { \
|
|
||||||
volatile int32 volatile_var; \
|
|
||||||
int32 oldval= 0; \
|
|
||||||
my_atomic_cas32(&volatile_var, &oldval, 1); \
|
|
||||||
} while (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined (__GNUC__)
|
#if defined (__GNUC__)
|
||||||
# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
|
# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
|
||||||
#elif defined (_MSC_VER)
|
#elif defined (_MSC_VER)
|
||||||
|
@@ -293,7 +293,7 @@ ut_delay(
|
|||||||
UT_LOW_PRIORITY_CPU();
|
UT_LOW_PRIORITY_CPU();
|
||||||
|
|
||||||
for (i = 0; i < delay * 50; i++) {
|
for (i = 0; i < delay * 50; i++) {
|
||||||
UT_RELAX_CPU();
|
MY_RELAX_CPU();
|
||||||
UT_COMPILER_BARRIER();
|
UT_COMPILER_BARRIER();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user