diff --git a/config.h.cmake b/config.h.cmake index b8a77899c4d..765d75dfb23 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -187,8 +187,6 @@ #cmakedefine HAVE_LINUX_FALLOC_H 1 #cmakedefine HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE 1 #cmakedefine HAVE_PREAD 1 -#cmakedefine HAVE_PAUSE_INSTRUCTION 1 -#cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1 #cmakedefine HAVE_RDTSCLL 1 #cmakedefine HAVE_READ_REAL_TIME 1 #cmakedefine HAVE_PTHREAD_ATTR_CREATE 1 diff --git a/configure.cmake b/configure.cmake index 67ed6503e3e..e75810f8150 100644 --- a/configure.cmake +++ b/configure.cmake @@ -758,32 +758,6 @@ IF(NOT C_HAS_inline) ENDIF() ENDIF() -IF(NOT CMAKE_CROSSCOMPILING AND NOT MSVC) - STRING(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} processor) - IF(processor MATCHES "86" OR processor MATCHES "amd64" OR processor MATCHES "x64") - #Check for x86 PAUSE instruction - # We have to actually try running the test program, because of a bug - # in Solaris on x86_64, where it wrongly reports that PAUSE is not - # supported when trying to run an application. See - # http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684 - CHECK_C_SOURCE_RUNS(" - int main() - { - __asm__ __volatile__ (\"pause\"); - return 0; - }" HAVE_PAUSE_INSTRUCTION) - ENDIF() - IF (NOT HAVE_PAUSE_INSTRUCTION) - CHECK_C_SOURCE_COMPILES(" - int main() - { - __asm__ __volatile__ (\"rep; nop\"); - return 0; - } - " HAVE_FAKE_PAUSE_INSTRUCTION) - ENDIF() -ENDIF() - CHECK_SYMBOL_EXISTS(tcgetattr "termios.h" HAVE_TCGETATTR 1) # diff --git a/include/my_cpu.h b/include/my_cpu.h index b5665fc108c..0e37eafe60e 100644 --- a/include/my_cpu.h +++ b/include/my_cpu.h @@ -46,10 +46,20 @@ #define HMT_high() #endif +#if defined __i386__ || defined __x86_64__ || defined _WIN32 +# define HAVE_PAUSE_INSTRUCTION /* added in Intel Pentium 4 */ +#endif static inline void MY_RELAX_CPU(void) { -#ifdef HAVE_PAUSE_INSTRUCTION +#ifdef _WIN32 + /* + In the Win32 API, the x86 PAUSE instruction is executed by calling + the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- + independent way by using YieldProcessor. + */ + YieldProcessor(); +#elif defined HAVE_PAUSE_INSTRUCTION /* According to the gcc info page, asm volatile means that the instruction has important side-effects and must not be removed. @@ -61,16 +71,6 @@ static inline void MY_RELAX_CPU(void) #else __asm__ __volatile__ ("pause"); #endif - -#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION) - __asm__ __volatile__ ("rep; nop"); -#elif defined _WIN32 - /* - In the Win32 API, the x86 PAUSE instruction is executed by calling - the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- - independent way by using YieldProcessor. - */ - YieldProcessor(); #elif defined(_ARCH_PWR8) __ppc_get_timebase(); #else @@ -81,6 +81,20 @@ static inline void MY_RELAX_CPU(void) } +#ifdef HAVE_PAUSE_INSTRUCTION +# ifdef __cplusplus +extern "C" { +# endif +extern unsigned my_cpu_relax_multiplier; +void my_cpu_init(void); +# ifdef __cplusplus +} +# endif +#else +# define my_cpu_relax_multiplier 200 +# define my_cpu_init() /* nothing */ +#endif + /* LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel recommends to use it in spin loops also on non-HT machines to reduce power @@ -94,9 +108,23 @@ static inline void MY_RELAX_CPU(void) static inline int LF_BACKOFF(void) { - int i; - for (i= 0; i < 200; i++) + unsigned i= my_cpu_relax_multiplier; + while (i--) MY_RELAX_CPU(); return 1; } + +/** + Run a delay loop while waiting for a shared resource to be released. + @param delay originally, roughly microseconds on 100 MHz Intel Pentium +*/ +static inline void ut_delay(unsigned delay) +{ + unsigned i= my_cpu_relax_multiplier / 4 * delay; + HMT_low(); + while (i--) + MY_RELAX_CPU(); + HMT_medium(); +} + #endif diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 6990d1350e3..438d6b428e0 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -44,7 +44,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c my_default.c my_getncpus.c my_safehash.c my_chmod.c my_rnd.c my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c ../sql-common/my_time.c my_rdtsc.c my_context.c psi_noop.c - my_atomic_writes.c my_likely.c + my_atomic_writes.c my_cpu.c my_likely.c file_logger.c my_dlerror.c) IF (WIN32) diff --git a/mysys/my_cpu.c b/mysys/my_cpu.c new file mode 100644 index 00000000000..cd13624df0f --- /dev/null +++ b/mysys/my_cpu.c @@ -0,0 +1,81 @@ +/* Copyright (c) 2019, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include +#include + +#ifdef HAVE_PAUSE_INSTRUCTION +/** How many times to invoke PAUSE in a loop */ +unsigned my_cpu_relax_multiplier = 200; + +# include + +# ifdef _MSC_VER +# include +# else +# include +# endif + +#define PAUSE4 MY_RELAX_CPU(); MY_RELAX_CPU(); MY_RELAX_CPU(); MY_RELAX_CPU() +#define PAUSE16 PAUSE4; PAUSE4; PAUSE4; PAUSE4 + +/** + Initialize my_cpu_relax_multiplier. + + Determine the duration of a PAUSE instruction by running an + unrolled loop of 16 PAUSE instructions twice, and taking the + faster of the two runs. In this way, even if the execution is + interrupted by the operating system, it should be extremely + unlikely that both loops get interrupted. + + On the Intel Skylake microarchitecture, the PAUSE instruction takes + around 140 clock cycles, while on earlier microarchitectures it could + be 10 clock cycles or less. Scale the PAUSE loop counter accordingly. + + On a pre-Skylake Intel Xeon CPU E5-2630 v4 @ 2.20GHz running an AMD64 + executable, the numbers would be between 172 and 220 when all the code + is inlined as follows: + + rdtsc,mov,shl,or, 16*pause, + rdtsc,mov,shl,or, 16*pause, + rdtsc. + + That would yield 11 to 14 cycles per PAUSE instruction even if we + (wrongly) ignore the overhead of the other instructions. + + On a Skylake mobile processor Intel Core i7-6500U CPU @ 2.50GHz, the + numbers would range from 1896 to 2410 (or 1976 if taking the minimum + of two runs), yielding 118 to 151 (or 123) cycles per PAUSE instruction. + + Let us define a threshold at roughly 30 cycles per PAUSE instruction, + and use a shorter delay if the PAUSE instruction takes longer than + that. In some AMD processors, the PAUSE instruction could take 40 or + 50 cycles. Let us use a shorter delay multiplier for them as well. + + The 1/10 scaling factor (200/20) was derived experimentally by + Mikhail Sinyavin from Intel. +*/ +void my_cpu_init(void) +{ + uint64_t t0, t1, t2; + t0= __rdtsc(); + PAUSE16; + t1= __rdtsc(); + PAUSE16; + t2= __rdtsc(); + if (t2 - t1 > 30 * 16 && t1 - t0 > 30 * 16) + my_cpu_relax_multiplier= 20; +} +#endif diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 4b31b78fc21..f98f51c73d0 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -5113,6 +5113,7 @@ static int init_server_components() We need to call each of these following functions to ensure that all things are initialized so that unireg_abort() doesn't fail */ + my_cpu_init(); mdl_init(); if (tdc_init() || hostname_cache_init()) unireg_abort(1); diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h index 1b8ec8d0fe4..3e99eb79416 100644 --- a/storage/innobase/include/ib0mutex.h +++ b/storage/innobase/include/ib0mutex.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,8 +29,7 @@ Created 2013-03-26 Sunny Bains. #ifndef ib0mutex_h #define ib0mutex_h -#include "ut0ut.h" -#include "ut0rnd.h" +#include "my_cpu.h" #include "os0event.h" #include "sync0arr.h" diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index b54e41ea614..51e00c6f0fe 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -54,14 +54,6 @@ Created 1/20/1994 Heikki Tuuri /** Time stamp */ typedef time_t ib_time_t; -#if defined (__GNUC__) -# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory") -#elif defined (_MSC_VER) -# define UT_COMPILER_BARRIER() _ReadWriteBarrier() -#else -# define UT_COMPILER_BARRIER() -#endif - /*********************************************************************//** Delays execution for at most max_wait_us microseconds or returns earlier if cond becomes true. @@ -270,14 +262,7 @@ void ut_sprintf_timestamp( /*=================*/ char* buf); /*!< in: buffer where to sprintf */ -/*************************************************************//** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. -@return dummy value */ -void -ut_delay( -/*=====*/ - ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */ + /*************************************************************//** Prints the contents of a memory buffer in hex and ascii. */ void diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index 7ee015f8f38..42054f309c7 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -283,27 +283,6 @@ ut_sprintf_timestamp( #endif } -/*************************************************************//** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. -@return dummy value */ -void -ut_delay( -/*=====*/ - ulint delay) /*!< in: delay in microseconds on 100 MHz Pentium */ -{ - ulint i; - - HMT_low(); - - for (i = 0; i < delay * 50; i++) { - MY_RELAX_CPU(); - UT_COMPILER_BARRIER(); - } - - HMT_medium(); -} - /*************************************************************//** Prints the contents of a memory buffer in hex and ascii. */ void