1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-14 18:42:34 +03:00

Use native CRC instructions on 64-bit LoongArch

As with the Intel and Arm CRC instructions, compiler intrinsics for
them must be supported by the compiler. In contrast, no runtime check
is needed. Aligned memory access is faster, so use the Arm coding as
a model.

YANG Xudong

Discussion: https://postgr.es/m/b522a0c5-e3b2-99cc-6387-58134fb88cbe%40ymatrix.cn
This commit is contained in:
John Naylor
2023-08-10 11:36:15 +07:00
parent fa2e874946
commit 4d14ccd6af
8 changed files with 240 additions and 17 deletions

View File

@ -714,6 +714,9 @@
/* Define to 1 to build with LLVM based JIT support. (--with-llvm) */
#undef USE_LLVM
/* Define to 1 to use LoongArch CRCC instructions. */
#undef USE_LOONGARCH_CRC32C
/* Define to 1 to build with LZ4 support. (--with-lz4) */
#undef USE_LZ4

View File

@ -58,6 +58,15 @@ extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t le
extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
#elif defined(USE_LOONGARCH_CRC32C)
/* Use LoongArch CRCC instructions. */
#define COMP_CRC32C(crc, data, len) \
((crc) = pg_comp_crc32c_loongarch((crc), (data), (len)))
#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len);
#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
/*

View File

@ -92,6 +92,9 @@ replace_funcs_pos = [
['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'],
['pg_crc32c_sb8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'],
# loongarch
['pg_crc32c_loongarch', 'USE_LOONGARCH_CRC32C'],
# generic fallback
['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'],
]

View File

@ -0,0 +1,73 @@
/*-------------------------------------------------------------------------
*
* pg_crc32c_loongarch.c
* Compute CRC-32C checksum using LoongArch CRCC instructions
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/port/pg_crc32c_loongarch.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include "port/pg_crc32c.h"
pg_crc32c
pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len)
{
const unsigned char *p = data;
const unsigned char *pend = p + len;
/*
* LoongArch doesn't require alignment, but aligned memory access is
* significantly faster. Process leading bytes so that the loop below
* starts with a pointer aligned to eight bytes.
*/
if (!PointerIsAligned(p, uint16) &&
p + 1 <= pend)
{
crc = __builtin_loongarch_crcc_w_b_w(*p, crc);
p += 1;
}
if (!PointerIsAligned(p, uint32) &&
p + 2 <= pend)
{
crc = __builtin_loongarch_crcc_w_h_w(*(uint16 *) p, crc);
p += 2;
}
if (!PointerIsAligned(p, uint64) &&
p + 4 <= pend)
{
crc = __builtin_loongarch_crcc_w_w_w(*(uint32 *) p, crc);
p += 4;
}
/* Process eight bytes at a time, as far as we can. */
while (p + 8 <= pend)
{
crc = __builtin_loongarch_crcc_w_d_w(*(uint64 *) p, crc);
p += 8;
}
/* Process remaining 0-7 bytes. */
if (p + 4 <= pend)
{
crc = __builtin_loongarch_crcc_w_w_w(*(uint32 *) p, crc);
p += 4;
}
if (p + 2 <= pend)
{
crc = __builtin_loongarch_crcc_w_h_w(*(uint16 *) p, crc);
p += 2;
}
if (p < pend)
{
crc = __builtin_loongarch_crcc_w_b_w(*p, crc);
}
return crc;
}