1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Use ARMv8 CRC instructions where available.

ARMv8 introduced special CPU instructions for calculating CRC-32C. Use
them, when available, for speed.

Like with the similar Intel CRC instructions, several factors affect
whether the instructions can be used. The compiler intrinsics for them must
be supported by the compiler, and the instructions must be supported by the
target architecture. If the compilation target architecture does not
support the instructions, but adding "-march=armv8-a+crc" makes them
available, then we compile the code with a runtime check to determine if
the host we're running on supports them or not.

For the runtime check, use glibc getauxval() function. Unfortunately,
that's not very portable, but I couldn't find any more portable way to do
it. If getauxval() is not available, the CRC instructions will still be
used if the target architecture supports them without any additional
compiler flags, but the runtime check will not be available.

Original patch by Yuqi Gu, heavily modified by me. Reviewed by Andres
Freund, Thomas Munro.

Discussion: https://www.postgresql.org/message-id/HE1PR0801MB1323D171938EABC04FFE7FA9E3110%40HE1PR0801MB1323.eurprd08.prod.outlook.com
This commit is contained in:
Heikki Linnakangas
2018-04-04 12:22:45 +03:00
parent 638a199fa9
commit f044d71e33
11 changed files with 456 additions and 41 deletions

View File

@ -260,6 +260,7 @@ CXX = @CXX@
CFLAGS = @CFLAGS@
CFLAGS_VECTOR = @CFLAGS_VECTOR@
CFLAGS_SSE42 = @CFLAGS_SSE42@
CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@
CXXFLAGS = @CXXFLAGS@
LLVM_CPPFLAGS = @LLVM_CPPFLAGS@

View File

@ -239,6 +239,9 @@
/* Define to 1 if you have the `getaddrinfo' function. */
#undef HAVE_GETADDRINFO
/* Define to 1 if you have the `getauxval' function. */
#undef HAVE_GETAUXVAL
/* Define to 1 if you have the `gethostbyname_r' function. */
#undef HAVE_GETHOSTBYNAME_R
@ -842,6 +845,12 @@
/* Define to 1 if your <sys/time.h> declares `struct tm'. */
#undef TM_IN_SYS_TIME
/* Define to 1 to use ARMv8 CRC Extension. */
#undef USE_ARMV8_CRC32C
/* Define to 1 to use ARMv8 CRC Extension with a runtime check. */
#undef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK
/* Define to 1 to build with assertion checks. (--enable-cassert) */
#undef USE_ASSERT_CHECKING

View File

@ -42,26 +42,42 @@ typedef uint32 pg_crc32c;
#define EQ_CRC32C(c1, c2) ((c1) == (c2))
#if defined(USE_SSE42_CRC32C)
/* Use SSE4.2 instructions. */
/* Use Intel SSE4.2 instructions. */
#define COMP_CRC32C(crc, data, len) \
((crc) = pg_comp_crc32c_sse42((crc), (data), (len)))
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
#elif defined(USE_ARMV8_CRC32C)
/* Use ARMv8 CRC Extension instructions. */
#define COMP_CRC32C(crc, data, len) \
((crc) = pg_comp_crc32c_armv8((crc), (data), (len)))
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
/*
* Use SSE4.2 instructions, but perform a runtime check first to check that
* they are available.
* Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first
* to check that they are available.
*/
#define COMP_CRC32C(crc, data, len) \
((crc) = pg_comp_crc32c((crc), (data), (len)))
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len);
#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
#endif
#ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK
extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
#endif
#else
/*
* Use slicing-by-8 algorithm.

View File

@ -65,6 +65,10 @@ thread.o: CFLAGS+=$(PTHREAD_CFLAGS)
pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42)
pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42)
# pg_crc32c_armv8.o and its _srv.o version need CFLAGS_ARMV8_CRC32C
pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C)
#
# Server versions of object files
#

View File

@ -0,0 +1,72 @@
/*-------------------------------------------------------------------------
*
* pg_crc32c_armv8.c
* Compute CRC-32C checksum using ARMv8 CRC Extension instructions
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/port/pg_crc32c_armv8.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include "port/pg_crc32c.h"
#include <arm_acle.h>
pg_crc32c
pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len)
{
const unsigned char *p = data;
const unsigned char *pend = p + len;
/*
* ARMv8 doesn't require alignment, but aligned memory access is
* significantly faster. Process leading bytes so that the loop below
* starts with a pointer aligned to eight bytes.
*/
if (!PointerIsAligned(p, uint16) && p < pend)
{
crc = __crc32cb(crc, *p);
p += 1;
}
if (!PointerIsAligned(p, uint32) && p < pend)
{
crc = __crc32ch(crc, *(uint16 *) p);
p += 2;
}
if (!PointerIsAligned(p, uint64) && p < pend)
{
crc = __crc32cw(crc, *(uint32 *) p);
p += 4;
}
/* Process eight bytes at a time, as far as we can. */
while (p + 8 <= pend)
{
crc = __crc32cd(crc, *(uint64 *) p);
p += 8;
}
/* Process remaining 0-7 bytes. */
if (p + 4 <= pend)
{
crc = __crc32cw(crc, *(uint32 *) p);
p += 4;
}
if (p + 2 <= pend)
{
crc = __crc32ch(crc, *(uint16 *) p);
p += 2;
}
if (p < pend)
{
crc = __crc32cb(crc, *p);
}
return crc;
}

View File

@ -0,0 +1,55 @@
/*-------------------------------------------------------------------------
*
* pg_crc32c_armv8_choose.c
* Choose between ARMv8 and software CRC-32C implementation.
*
* On first call, checks if the CPU we're running on supports the ARMv8
* CRC Extension. If it does, use the special instructions for CRC-32C
* computation. Otherwise, fall back to the pure software implementation
* (slicing-by-8).
*
* XXX: The glibc-specific getauxval() function, with the HWCAP_CRC32
* flag, is used to determine if the CRC Extension is available on the
* current platform. Is there a more portable way to determine that?
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/port/pg_crc32c_armv8_choose.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include <sys/auxv.h>
#include <asm/hwcap.h>
#include "port/pg_crc32c.h"
static bool
pg_crc32c_armv8_available(void)
{
unsigned long auxv = getauxval(AT_HWCAP);
return (auxv & HWCAP_CRC32) != 0;
}
/*
* This gets called on the first call. It replaces the function pointer
* so that subsequent calls are routed directly to the chosen implementation.
*/
static pg_crc32c
pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
{
if (pg_crc32c_armv8_available())
pg_comp_crc32c = pg_comp_crc32c_armv8;
else
pg_comp_crc32c = pg_comp_crc32c_sb8;
return pg_comp_crc32c(crc, data, len);
}
pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;

View File

@ -1,18 +1,19 @@
/*-------------------------------------------------------------------------
*
* pg_crc32c_choose.c
* Choose which CRC-32C implementation to use, at runtime.
* pg_crc32c_sse42_choose.c
* Choose between Intel SSE 4.2 and software CRC-32C implementation.
*
* Try to the special CRC instructions introduced in Intel SSE 4.2,
* if available on the platform we're running on, but fall back to the
* slicing-by-8 implementation otherwise.
* On first call, checks if the CPU we're running on supports Intel SSE
* 4.2. If it does, use the special SSE instructions for CRC-32C
* computation. Otherwise, fall back to the pure software implementation
* (slicing-by-8).
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/port/pg_crc32c_choose.c
* src/port/pg_crc32c_sse42_choose.c
*
*-------------------------------------------------------------------------
*/

View File

@ -101,7 +101,7 @@ sub mkvcbuild
if ($vsVersion >= '9.00')
{
push(@pgportfiles, 'pg_crc32c_choose.c');
push(@pgportfiles, 'pg_crc32c_sse42_choose.c');
push(@pgportfiles, 'pg_crc32c_sse42.c');
push(@pgportfiles, 'pg_crc32c_sb8.c');
}