mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-24 01:29:19 +03:00 
			
		
		
		
	Use ARMv8 CRC instructions where available.
ARMv8 introduced special CPU instructions for calculating CRC-32C. Use them, when available, for speed. Like with the similar Intel CRC instructions, several factors affect whether the instructions can be used. The compiler intrinsics for them must be supported by the compiler, and the instructions must be supported by the target architecture. If the compilation target architecture does not support the instructions, but adding "-march=armv8-a+crc" makes them available, then we compile the code with a runtime check to determine if the host we're running on supports them or not. For the runtime check, use glibc getauxval() function. Unfortunately, that's not very portable, but I couldn't find any more portable way to do it. If getauxval() is not available, the CRC instructions will still be used if the target architecture supports them without any additional compiler flags, but the runtime check will not be available. Original patch by Yuqi Gu, heavily modified by me. Reviewed by Andres Freund, Thomas Munro. Discussion: https://www.postgresql.org/message-id/HE1PR0801MB1323D171938EABC04FFE7FA9E3110%40HE1PR0801MB1323.eurprd08.prod.outlook.com
This commit is contained in:
		| @@ -667,3 +667,37 @@ if test x"$Ac_cachevar" = x"yes"; then | |||||||
| fi | fi | ||||||
| undefine([Ac_cachevar])dnl | undefine([Ac_cachevar])dnl | ||||||
| ])# PGAC_SSE42_CRC32_INTRINSICS | ])# PGAC_SSE42_CRC32_INTRINSICS | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # PGAC_ARMV8_CRC32C_INTRINSICS | ||||||
|  | # ----------------------- | ||||||
|  | # Check if the compiler supports the CRC32C instructions using the __crc32cb, | ||||||
|  | # __crc32ch, __crc32cw, and __crc32cd intrinsic functions. These instructions | ||||||
|  | # were first introduced in ARMv8 in the optional CRC Extension, and became | ||||||
|  | # mandatory in ARMv8.1. | ||||||
|  | # | ||||||
|  | # An optional compiler flag can be passed as argument (e.g. | ||||||
|  | # -march=armv8-a+crc). If the intrinsics are supported, sets | ||||||
|  | # pgac_armv8_crc32c_intrinsics, and CFLAGS_ARMV8_CRC32C. | ||||||
|  | AC_DEFUN([PGAC_ARMV8_CRC32C_INTRINSICS], | ||||||
|  | [define([Ac_cachevar], [AS_TR_SH([pgac_cv_armv8_crc32c_intrinsics_$1])])dnl | ||||||
|  | AC_CACHE_CHECK([for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=$1], [Ac_cachevar], | ||||||
|  | [pgac_save_CFLAGS=$CFLAGS | ||||||
|  | CFLAGS="$pgac_save_CFLAGS $1" | ||||||
|  | AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_acle.h>], | ||||||
|  |   [unsigned int crc = 0; | ||||||
|  |    crc = __crc32cb(crc, 0); | ||||||
|  |    crc = __crc32ch(crc, 0); | ||||||
|  |    crc = __crc32cw(crc, 0); | ||||||
|  |    crc = __crc32cd(crc, 0); | ||||||
|  |    /* return computed value, to prevent the above being optimized away */ | ||||||
|  |    return crc == 0;])], | ||||||
|  |   [Ac_cachevar=yes], | ||||||
|  |   [Ac_cachevar=no]) | ||||||
|  | CFLAGS="$pgac_save_CFLAGS"]) | ||||||
|  | if test x"$Ac_cachevar" = x"yes"; then | ||||||
|  |   CFLAGS_ARMV8_CRC32C="$1" | ||||||
|  |   pgac_armv8_crc32c_intrinsics=yes | ||||||
|  | fi | ||||||
|  | undefine([Ac_cachevar])dnl | ||||||
|  | ])# PGAC_ARMV8_CRC32C_INTRINSICS | ||||||
|   | |||||||
							
								
								
									
										188
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										188
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @@ -646,6 +646,7 @@ MSGMERGE | |||||||
| MSGFMT_FLAGS | MSGFMT_FLAGS | ||||||
| MSGFMT | MSGFMT | ||||||
| PG_CRC32C_OBJS | PG_CRC32C_OBJS | ||||||
|  | CFLAGS_ARMV8_CRC32C | ||||||
| CFLAGS_SSE42 | CFLAGS_SSE42 | ||||||
| have_win32_dbghelp | have_win32_dbghelp | ||||||
| HAVE_IPV6 | HAVE_IPV6 | ||||||
| @@ -17254,30 +17255,177 @@ if ac_fn_c_try_compile "$LINENO"; then : | |||||||
| fi | fi | ||||||
| rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext | rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext | ||||||
|  |  | ||||||
|  | # Check for ARMv8 CRC Extension intrinsics to do CRC calculations. | ||||||
|  | # | ||||||
|  | # First check if __crc32c* intrinsics can be used with the default compiler | ||||||
|  | # flags. If not, check if adding -march=armv8-a+crc flag helps. | ||||||
|  | # CFLAGS_ARMV8_CRC32C is set if the extra flag is required. | ||||||
|  | { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=" >&5 | ||||||
|  | $as_echo_n "checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=... " >&6; } | ||||||
|  | if ${pgac_cv_armv8_crc32c_intrinsics_+:} false; then : | ||||||
|  |   $as_echo_n "(cached) " >&6 | ||||||
|  | else | ||||||
|  |   pgac_save_CFLAGS=$CFLAGS | ||||||
|  | CFLAGS="$pgac_save_CFLAGS " | ||||||
|  | cat confdefs.h - <<_ACEOF >conftest.$ac_ext | ||||||
|  | /* end confdefs.h.  */ | ||||||
|  | #include <arm_acle.h> | ||||||
|  | int | ||||||
|  | main () | ||||||
|  | { | ||||||
|  | unsigned int crc = 0; | ||||||
|  |    crc = __crc32cb(crc, 0); | ||||||
|  |    crc = __crc32ch(crc, 0); | ||||||
|  |    crc = __crc32cw(crc, 0); | ||||||
|  |    crc = __crc32cd(crc, 0); | ||||||
|  |    /* return computed value, to prevent the above being optimized away */ | ||||||
|  |    return crc == 0; | ||||||
|  |   ; | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
|  | _ACEOF | ||||||
|  | if ac_fn_c_try_link "$LINENO"; then : | ||||||
|  |   pgac_cv_armv8_crc32c_intrinsics_=yes | ||||||
|  | else | ||||||
|  |   pgac_cv_armv8_crc32c_intrinsics_=no | ||||||
|  | fi | ||||||
|  | rm -f core conftest.err conftest.$ac_objext \ | ||||||
|  |     conftest$ac_exeext conftest.$ac_ext | ||||||
|  | CFLAGS="$pgac_save_CFLAGS" | ||||||
|  | fi | ||||||
|  | { $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_armv8_crc32c_intrinsics_" >&5 | ||||||
|  | $as_echo "$pgac_cv_armv8_crc32c_intrinsics_" >&6; } | ||||||
|  | if test x"$pgac_cv_armv8_crc32c_intrinsics_" = x"yes"; then | ||||||
|  |   CFLAGS_ARMV8_CRC32C="" | ||||||
|  |   pgac_armv8_crc32c_intrinsics=yes | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | if test x"$pgac_armv8_crc32c_intrinsics" != x"yes"; then | ||||||
|  |   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=-march=armv8-a+crc" >&5 | ||||||
|  | $as_echo_n "checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=-march=armv8-a+crc... " >&6; } | ||||||
|  | if ${pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc+:} false; then : | ||||||
|  |   $as_echo_n "(cached) " >&6 | ||||||
|  | else | ||||||
|  |   pgac_save_CFLAGS=$CFLAGS | ||||||
|  | CFLAGS="$pgac_save_CFLAGS -march=armv8-a+crc" | ||||||
|  | cat confdefs.h - <<_ACEOF >conftest.$ac_ext | ||||||
|  | /* end confdefs.h.  */ | ||||||
|  | #include <arm_acle.h> | ||||||
|  | int | ||||||
|  | main () | ||||||
|  | { | ||||||
|  | unsigned int crc = 0; | ||||||
|  |    crc = __crc32cb(crc, 0); | ||||||
|  |    crc = __crc32ch(crc, 0); | ||||||
|  |    crc = __crc32cw(crc, 0); | ||||||
|  |    crc = __crc32cd(crc, 0); | ||||||
|  |    /* return computed value, to prevent the above being optimized away */ | ||||||
|  |    return crc == 0; | ||||||
|  |   ; | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
|  | _ACEOF | ||||||
|  | if ac_fn_c_try_link "$LINENO"; then : | ||||||
|  |   pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc=yes | ||||||
|  | else | ||||||
|  |   pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc=no | ||||||
|  | fi | ||||||
|  | rm -f core conftest.err conftest.$ac_objext \ | ||||||
|  |     conftest$ac_exeext conftest.$ac_ext | ||||||
|  | CFLAGS="$pgac_save_CFLAGS" | ||||||
|  | fi | ||||||
|  | { $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc" >&5 | ||||||
|  | $as_echo "$pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc" >&6; } | ||||||
|  | if test x"$pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc" = x"yes"; then | ||||||
|  |   CFLAGS_ARMV8_CRC32C="-march=armv8-a+crc" | ||||||
|  |   pgac_armv8_crc32c_intrinsics=yes | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | fi | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # In order to detect at runtime, if the ARM CRC Extension is available, | ||||||
|  | # we will do "getauxval(AT_HWCAP) & HWCAP_CRC32". Check if we have | ||||||
|  | # everything we need for that. | ||||||
|  | for ac_func in getauxval | ||||||
|  | do : | ||||||
|  |   ac_fn_c_check_func "$LINENO" "getauxval" "ac_cv_func_getauxval" | ||||||
|  | if test "x$ac_cv_func_getauxval" = xyes; then : | ||||||
|  |   cat >>confdefs.h <<_ACEOF | ||||||
|  | #define HAVE_GETAUXVAL 1 | ||||||
|  | _ACEOF | ||||||
|  |  | ||||||
|  | fi | ||||||
|  | done | ||||||
|  |  | ||||||
|  | cat confdefs.h - <<_ACEOF >conftest.$ac_ext | ||||||
|  | /* end confdefs.h.  */ | ||||||
|  |  | ||||||
|  | #include <sys/auxv.h> | ||||||
|  | #include <asm/hwcap.h> | ||||||
|  |  | ||||||
|  | int | ||||||
|  | main () | ||||||
|  | { | ||||||
|  |  | ||||||
|  | #ifndef AT_HWCAP | ||||||
|  | #error AT_HWCAP not defined | ||||||
|  | #endif | ||||||
|  | #ifndef HWCAP_CRC32 | ||||||
|  | #error HWCAP_CRC32 not defined | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   ; | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
|  | _ACEOF | ||||||
|  | if ac_fn_c_try_compile "$LINENO"; then : | ||||||
|  |   HAVE_HWCAP_CRC32=1 | ||||||
|  | fi | ||||||
|  | rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext | ||||||
|  |  | ||||||
| # Select CRC-32C implementation. | # Select CRC-32C implementation. | ||||||
| # | # | ||||||
| # If we are targeting a processor that has SSE 4.2 instructions, we can use the | # If we are targeting a processor that has Intel SSE 4.2 instructions, we can | ||||||
| # special CRC instructions for calculating CRC-32C. If we're not targeting such | # use the special CRC instructions for calculating CRC-32C. If we're not | ||||||
| # a processor, but we can nevertheless produce code that uses the SSE | # targeting such a processor, but we can nevertheless produce code that uses | ||||||
| # intrinsics, perhaps with some extra CFLAGS, compile both implementations and | # the SSE intrinsics, perhaps with some extra CFLAGS, compile both | ||||||
| # select which one to use at runtime, depending on whether SSE 4.2 is supported | # implementations and select which one to use at runtime, depending on whether | ||||||
| # by the processor we're running on. | # SSE 4.2 is supported by the processor we're running on. | ||||||
|  | # | ||||||
|  | # Similarly, if we are targeting an ARM processor that has the CRC | ||||||
|  | # instructions that are part of the ARMv8 CRC Extension, use them. And if | ||||||
|  | # we're not targeting such a processor, but can nevertheless produce code that | ||||||
|  | # uses the CRC instructions, compile both, and select at runtime. | ||||||
| # | # | ||||||
| # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 | # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 | ||||||
| # in the template or configure command line. | # in the template or configure command line. | ||||||
| if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then | if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x""; then | ||||||
|  |   # Use Intel SSE 4.2 if available. | ||||||
|   if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then |   if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then | ||||||
|     USE_SSE42_CRC32C=1 |     USE_SSE42_CRC32C=1 | ||||||
|   else |   else | ||||||
|     # the CPUID instruction is needed for the runtime check. |     # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for | ||||||
|  |     # the runtime check. | ||||||
|     if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then |     if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then | ||||||
|       USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 |       USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 | ||||||
|     else |     else | ||||||
|       # fall back to slicing-by-8 algorithm which doesn't require any special |       # Use ARM CRC Extension if available. | ||||||
|       # CPU support. |       if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_ARMV8_CRC32C" = x""; then | ||||||
|  |         USE_ARMV8_CRC32C=1 | ||||||
|  |       else | ||||||
|  |         # ARM CRC Extension, with runtime check? The getauxval() function and | ||||||
|  |         # HWCAP_CRC32 are needed for the runtime check. | ||||||
|  |         if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$ac_cv_func_getauxval" = x"yes" && test x"$HAVE_HWCAP_CRC32" = x"1"; then | ||||||
|  |           USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 | ||||||
|  |         else | ||||||
|  |           # fall back to slicing-by-8 algorithm, which doesn't require any | ||||||
|  |           # special CPU support. | ||||||
|           USE_SLICING_BY_8_CRC32C=1 |           USE_SLICING_BY_8_CRC32C=1 | ||||||
| 	fi | 	fi | ||||||
|       fi |       fi | ||||||
|  |     fi | ||||||
|  |   fi | ||||||
| fi | fi | ||||||
|  |  | ||||||
| # Set PG_CRC32C_OBJS appropriately depending on the selected implementation. | # Set PG_CRC32C_OBJS appropriately depending on the selected implementation. | ||||||
| @@ -17295,10 +17443,26 @@ else | |||||||
|  |  | ||||||
| $as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h | $as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h | ||||||
|  |  | ||||||
|     PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o" |     PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" | ||||||
|     { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 |     { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 | ||||||
| $as_echo "SSE 4.2 with runtime check" >&6; } | $as_echo "SSE 4.2 with runtime check" >&6; } | ||||||
|   else |   else | ||||||
|  |     if test x"$USE_ARMV8_CRC32C" = x"1"; then | ||||||
|  |  | ||||||
|  | $as_echo "#define USE_ARMV8_CRC32C 1" >>confdefs.h | ||||||
|  |  | ||||||
|  |       PG_CRC32C_OBJS="pg_crc32c_armv8.o" | ||||||
|  |       { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions" >&5 | ||||||
|  | $as_echo "ARMv8 CRC instructions" >&6; } | ||||||
|  |     else | ||||||
|  |       if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then | ||||||
|  |  | ||||||
|  | $as_echo "#define USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h | ||||||
|  |  | ||||||
|  |         PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o" | ||||||
|  |         { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions with runtime check" >&5 | ||||||
|  | $as_echo "ARMv8 CRC instructions with runtime check" >&6; } | ||||||
|  |       else | ||||||
|  |  | ||||||
| $as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h | $as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h | ||||||
|  |  | ||||||
| @@ -17306,6 +17470,8 @@ $as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h | |||||||
|         { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 |         { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 | ||||||
| $as_echo "slicing-by-8" >&6; } | $as_echo "slicing-by-8" >&6; } | ||||||
|       fi |       fi | ||||||
|  |     fi | ||||||
|  |   fi | ||||||
| fi | fi | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										79
									
								
								configure.in
									
									
									
									
									
								
							
							
						
						
									
										79
									
								
								configure.in
									
									
									
									
									
								
							| @@ -2003,30 +2003,75 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [ | |||||||
| #endif | #endif | ||||||
| ])], [SSE4_2_TARGETED=1]) | ])], [SSE4_2_TARGETED=1]) | ||||||
|  |  | ||||||
|  | # Check for ARMv8 CRC Extension intrinsics to do CRC calculations. | ||||||
|  | # | ||||||
|  | # First check if __crc32c* intrinsics can be used with the default compiler | ||||||
|  | # flags. If not, check if adding -march=armv8-a+crc flag helps. | ||||||
|  | # CFLAGS_ARMV8_CRC32C is set if the extra flag is required. | ||||||
|  | PGAC_ARMV8_CRC32C_INTRINSICS([]) | ||||||
|  | if test x"$pgac_armv8_crc32c_intrinsics" != x"yes"; then | ||||||
|  |   PGAC_ARMV8_CRC32C_INTRINSICS([-march=armv8-a+crc]) | ||||||
|  | fi | ||||||
|  | AC_SUBST(CFLAGS_ARMV8_CRC32C) | ||||||
|  |  | ||||||
|  | # In order to detect at runtime, if the ARM CRC Extension is available, | ||||||
|  | # we will do "getauxval(AT_HWCAP) & HWCAP_CRC32". Check if we have | ||||||
|  | # everything we need for that. | ||||||
|  | AC_CHECK_FUNCS([getauxval]) | ||||||
|  | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ | ||||||
|  | #include <sys/auxv.h> | ||||||
|  | #include <asm/hwcap.h> | ||||||
|  | ], [ | ||||||
|  | #ifndef AT_HWCAP | ||||||
|  | #error AT_HWCAP not defined | ||||||
|  | #endif | ||||||
|  | #ifndef HWCAP_CRC32 | ||||||
|  | #error HWCAP_CRC32 not defined | ||||||
|  | #endif | ||||||
|  | ])], [HAVE_HWCAP_CRC32=1]) | ||||||
|  |  | ||||||
| # Select CRC-32C implementation. | # Select CRC-32C implementation. | ||||||
| # | # | ||||||
| # If we are targeting a processor that has SSE 4.2 instructions, we can use the | # If we are targeting a processor that has Intel SSE 4.2 instructions, we can | ||||||
| # special CRC instructions for calculating CRC-32C. If we're not targeting such | # use the special CRC instructions for calculating CRC-32C. If we're not | ||||||
| # a processor, but we can nevertheless produce code that uses the SSE | # targeting such a processor, but we can nevertheless produce code that uses | ||||||
| # intrinsics, perhaps with some extra CFLAGS, compile both implementations and | # the SSE intrinsics, perhaps with some extra CFLAGS, compile both | ||||||
| # select which one to use at runtime, depending on whether SSE 4.2 is supported | # implementations and select which one to use at runtime, depending on whether | ||||||
| # by the processor we're running on. | # SSE 4.2 is supported by the processor we're running on. | ||||||
|  | # | ||||||
|  | # Similarly, if we are targeting an ARM processor that has the CRC | ||||||
|  | # instructions that are part of the ARMv8 CRC Extension, use them. And if | ||||||
|  | # we're not targeting such a processor, but can nevertheless produce code that | ||||||
|  | # uses the CRC instructions, compile both, and select at runtime. | ||||||
| # | # | ||||||
| # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 | # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 | ||||||
| # in the template or configure command line. | # in the template or configure command line. | ||||||
| if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then | if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x""; then | ||||||
|  |   # Use Intel SSE 4.2 if available. | ||||||
|   if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then |   if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then | ||||||
|     USE_SSE42_CRC32C=1 |     USE_SSE42_CRC32C=1 | ||||||
|   else |   else | ||||||
|     # the CPUID instruction is needed for the runtime check. |     # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for | ||||||
|  |     # the runtime check. | ||||||
|     if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then |     if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then | ||||||
|       USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 |       USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 | ||||||
|     else |     else | ||||||
|       # fall back to slicing-by-8 algorithm which doesn't require any special |       # Use ARM CRC Extension if available. | ||||||
|       # CPU support. |       if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_ARMV8_CRC32C" = x""; then | ||||||
|  |         USE_ARMV8_CRC32C=1 | ||||||
|  |       else | ||||||
|  |         # ARM CRC Extension, with runtime check? The getauxval() function and | ||||||
|  |         # HWCAP_CRC32 are needed for the runtime check. | ||||||
|  |         if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$ac_cv_func_getauxval" = x"yes" && test x"$HAVE_HWCAP_CRC32" = x"1"; then | ||||||
|  |           USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 | ||||||
|  |         else | ||||||
|  |           # fall back to slicing-by-8 algorithm, which doesn't require any | ||||||
|  |           # special CPU support. | ||||||
|           USE_SLICING_BY_8_CRC32C=1 |           USE_SLICING_BY_8_CRC32C=1 | ||||||
| 	fi | 	fi | ||||||
|       fi |       fi | ||||||
|  |     fi | ||||||
|  |   fi | ||||||
| fi | fi | ||||||
|  |  | ||||||
| # Set PG_CRC32C_OBJS appropriately depending on the selected implementation. | # Set PG_CRC32C_OBJS appropriately depending on the selected implementation. | ||||||
| @@ -2038,13 +2083,25 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then | |||||||
| else | else | ||||||
|   if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then |   if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then | ||||||
|     AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) |     AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) | ||||||
|     PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o" |     PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" | ||||||
|     AC_MSG_RESULT(SSE 4.2 with runtime check) |     AC_MSG_RESULT(SSE 4.2 with runtime check) | ||||||
|  |   else | ||||||
|  |     if test x"$USE_ARMV8_CRC32C" = x"1"; then | ||||||
|  |       AC_DEFINE(USE_ARMV8_CRC32C, 1, [Define to 1 to use ARMv8 CRC Extension.]) | ||||||
|  |       PG_CRC32C_OBJS="pg_crc32c_armv8.o" | ||||||
|  |       AC_MSG_RESULT(ARMv8 CRC instructions) | ||||||
|  |     else | ||||||
|  |       if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then | ||||||
|  |         AC_DEFINE(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARMv8 CRC Extension with a runtime check.]) | ||||||
|  |         PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o" | ||||||
|  |         AC_MSG_RESULT(ARMv8 CRC instructions with runtime check) | ||||||
|       else |       else | ||||||
|         AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use software CRC-32C implementation (slicing-by-8).]) |         AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use software CRC-32C implementation (slicing-by-8).]) | ||||||
|         PG_CRC32C_OBJS="pg_crc32c_sb8.o" |         PG_CRC32C_OBJS="pg_crc32c_sb8.o" | ||||||
|         AC_MSG_RESULT(slicing-by-8) |         AC_MSG_RESULT(slicing-by-8) | ||||||
|       fi |       fi | ||||||
|  |     fi | ||||||
|  |   fi | ||||||
| fi | fi | ||||||
| AC_SUBST(PG_CRC32C_OBJS) | AC_SUBST(PG_CRC32C_OBJS) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -260,6 +260,7 @@ CXX = @CXX@ | |||||||
| CFLAGS = @CFLAGS@ | CFLAGS = @CFLAGS@ | ||||||
| CFLAGS_VECTOR = @CFLAGS_VECTOR@ | CFLAGS_VECTOR = @CFLAGS_VECTOR@ | ||||||
| CFLAGS_SSE42 = @CFLAGS_SSE42@ | CFLAGS_SSE42 = @CFLAGS_SSE42@ | ||||||
|  | CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@ | ||||||
| CXXFLAGS = @CXXFLAGS@ | CXXFLAGS = @CXXFLAGS@ | ||||||
|  |  | ||||||
| LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ | LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ | ||||||
|   | |||||||
| @@ -239,6 +239,9 @@ | |||||||
| /* Define to 1 if you have the `getaddrinfo' function. */ | /* Define to 1 if you have the `getaddrinfo' function. */ | ||||||
| #undef HAVE_GETADDRINFO | #undef HAVE_GETADDRINFO | ||||||
|  |  | ||||||
|  | /* Define to 1 if you have the `getauxval' function. */ | ||||||
|  | #undef HAVE_GETAUXVAL | ||||||
|  |  | ||||||
| /* Define to 1 if you have the `gethostbyname_r' function. */ | /* Define to 1 if you have the `gethostbyname_r' function. */ | ||||||
| #undef HAVE_GETHOSTBYNAME_R | #undef HAVE_GETHOSTBYNAME_R | ||||||
|  |  | ||||||
| @@ -842,6 +845,12 @@ | |||||||
| /* Define to 1 if your <sys/time.h> declares `struct tm'. */ | /* Define to 1 if your <sys/time.h> declares `struct tm'. */ | ||||||
| #undef TM_IN_SYS_TIME | #undef TM_IN_SYS_TIME | ||||||
|  |  | ||||||
|  | /* Define to 1 to use ARMv8 CRC Extension. */ | ||||||
|  | #undef USE_ARMV8_CRC32C | ||||||
|  |  | ||||||
|  | /* Define to 1 to use ARMv8 CRC Extension with a runtime check. */ | ||||||
|  | #undef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK | ||||||
|  |  | ||||||
| /* Define to 1 to build with assertion checks. (--enable-cassert) */ | /* Define to 1 to build with assertion checks. (--enable-cassert) */ | ||||||
| #undef USE_ASSERT_CHECKING | #undef USE_ASSERT_CHECKING | ||||||
|  |  | ||||||
|   | |||||||
| @@ -42,26 +42,42 @@ typedef uint32 pg_crc32c; | |||||||
| #define EQ_CRC32C(c1, c2) ((c1) == (c2)) | #define EQ_CRC32C(c1, c2) ((c1) == (c2)) | ||||||
|  |  | ||||||
| #if defined(USE_SSE42_CRC32C) | #if defined(USE_SSE42_CRC32C) | ||||||
| /* Use SSE4.2 instructions. */ | /* Use Intel SSE4.2 instructions. */ | ||||||
| #define COMP_CRC32C(crc, data, len) \ | #define COMP_CRC32C(crc, data, len) \ | ||||||
| 	((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) | 	((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) | ||||||
| #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) | #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) | ||||||
|  |  | ||||||
| extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); | extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); | ||||||
|  |  | ||||||
| #elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) | #elif defined(USE_ARMV8_CRC32C) | ||||||
|  | /* Use ARMv8 CRC Extension instructions. */ | ||||||
|  |  | ||||||
|  | #define COMP_CRC32C(crc, data, len)							\ | ||||||
|  | 	((crc) = pg_comp_crc32c_armv8((crc), (data), (len))) | ||||||
|  | #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) | ||||||
|  |  | ||||||
|  | extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); | ||||||
|  |  | ||||||
|  | #elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Use SSE4.2 instructions, but perform a runtime check first to check that |  * Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first | ||||||
|  * they are available. |  * to check that they are available. | ||||||
|  */ |  */ | ||||||
| #define COMP_CRC32C(crc, data, len) \ | #define COMP_CRC32C(crc, data, len) \ | ||||||
| 	((crc) = pg_comp_crc32c((crc), (data), (len))) | 	((crc) = pg_comp_crc32c((crc), (data), (len))) | ||||||
| #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) | #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) | ||||||
|  |  | ||||||
| extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); |  | ||||||
| extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); | extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); | ||||||
| extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); | extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); | ||||||
|  |  | ||||||
|  | #ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK | ||||||
|  | extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); | ||||||
|  | #endif | ||||||
|  | #ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK | ||||||
|  | extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); | ||||||
|  | #endif | ||||||
|  |  | ||||||
| #else | #else | ||||||
| /* | /* | ||||||
|  * Use slicing-by-8 algorithm. |  * Use slicing-by-8 algorithm. | ||||||
|   | |||||||
| @@ -65,6 +65,10 @@ thread.o: CFLAGS+=$(PTHREAD_CFLAGS) | |||||||
| pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42) | pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42) | ||||||
| pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42) | pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42) | ||||||
|  |  | ||||||
|  | # pg_crc32c_armv8.o and its _srv.o version need CFLAGS_ARMV8_CRC32C | ||||||
|  | pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) | ||||||
|  | pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) | ||||||
|  |  | ||||||
| # | # | ||||||
| # Server versions of object files | # Server versions of object files | ||||||
| # | # | ||||||
|   | |||||||
							
								
								
									
										72
									
								
								src/port/pg_crc32c_armv8.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								src/port/pg_crc32c_armv8.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * pg_crc32c_armv8.c | ||||||
|  |  *	  Compute CRC-32C checksum using ARMv8 CRC Extension instructions | ||||||
|  |  * | ||||||
|  |  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group | ||||||
|  |  * Portions Copyright (c) 1994, Regents of the University of California | ||||||
|  |  * | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  src/port/pg_crc32c_armv8.c | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  | #include "c.h" | ||||||
|  |  | ||||||
|  | #include "port/pg_crc32c.h" | ||||||
|  |  | ||||||
|  | #include <arm_acle.h> | ||||||
|  |  | ||||||
|  | pg_crc32c | ||||||
|  | pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len) | ||||||
|  | { | ||||||
|  | 	const unsigned char *p = data; | ||||||
|  | 	const unsigned char *pend = p + len; | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	 * ARMv8 doesn't require alignment, but aligned memory access is | ||||||
|  | 	 * significantly faster. Process leading bytes so that the loop below | ||||||
|  | 	 * starts with a pointer aligned to eight bytes. | ||||||
|  | 	 */ | ||||||
|  | 	if (!PointerIsAligned(p, uint16) && p < pend) | ||||||
|  | 	{ | ||||||
|  | 		crc = __crc32cb(crc, *p); | ||||||
|  | 		p += 1; | ||||||
|  | 	} | ||||||
|  | 	if (!PointerIsAligned(p, uint32) && p < pend) | ||||||
|  | 	{ | ||||||
|  | 		crc = __crc32ch(crc, *(uint16 *) p); | ||||||
|  | 		p += 2; | ||||||
|  | 	} | ||||||
|  | 	if (!PointerIsAligned(p, uint64) && p < pend) | ||||||
|  | 	{ | ||||||
|  | 		crc = __crc32cw(crc, *(uint32 *) p); | ||||||
|  | 		p += 4; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Process eight bytes at a time, as far as we can. */ | ||||||
|  | 	while (p + 8 <= pend) | ||||||
|  | 	{ | ||||||
|  | 		crc = __crc32cd(crc, *(uint64 *) p); | ||||||
|  | 		p += 8; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* Process remaining 0-7 bytes. */ | ||||||
|  | 	if (p + 4 <= pend) | ||||||
|  | 	{ | ||||||
|  | 		crc = __crc32cw(crc, *(uint32 *) p); | ||||||
|  | 		p += 4; | ||||||
|  | 	} | ||||||
|  | 	if (p + 2 <= pend) | ||||||
|  | 	{ | ||||||
|  | 		crc = __crc32ch(crc, *(uint16 *) p); | ||||||
|  | 		p += 2; | ||||||
|  | 	} | ||||||
|  | 	if (p < pend) | ||||||
|  | 	{ | ||||||
|  | 		crc = __crc32cb(crc, *p); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return crc; | ||||||
|  | } | ||||||
							
								
								
									
										55
									
								
								src/port/pg_crc32c_armv8_choose.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								src/port/pg_crc32c_armv8_choose.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | |||||||
|  | /*------------------------------------------------------------------------- | ||||||
|  |  * | ||||||
|  |  * pg_crc32c_armv8_choose.c | ||||||
|  |  *	  Choose between ARMv8 and software CRC-32C implementation. | ||||||
|  |  * | ||||||
|  |  * On first call, checks if the CPU we're running on supports the ARMv8 | ||||||
|  |  * CRC Extension. If it does, use the special instructions for CRC-32C | ||||||
|  |  * computation. Otherwise, fall back to the pure software implementation | ||||||
|  |  * (slicing-by-8). | ||||||
|  |  * | ||||||
|  |  * XXX: The glibc-specific getauxval() function, with the HWCAP_CRC32 | ||||||
|  |  * flag, is used to determine if the CRC Extension is available on the | ||||||
|  |  * current platform. Is there a more portable way to determine that? | ||||||
|  |  * | ||||||
|  |  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group | ||||||
|  |  * Portions Copyright (c) 1994, Regents of the University of California | ||||||
|  |  * | ||||||
|  |  * | ||||||
|  |  * IDENTIFICATION | ||||||
|  |  *	  src/port/pg_crc32c_armv8_choose.c | ||||||
|  |  * | ||||||
|  |  *------------------------------------------------------------------------- | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | #include "c.h" | ||||||
|  |  | ||||||
|  | #include <sys/auxv.h> | ||||||
|  | #include <asm/hwcap.h> | ||||||
|  |  | ||||||
|  | #include "port/pg_crc32c.h" | ||||||
|  |  | ||||||
|  | static bool | ||||||
|  | pg_crc32c_armv8_available(void) | ||||||
|  | { | ||||||
|  | 	unsigned long auxv = getauxval(AT_HWCAP); | ||||||
|  |  | ||||||
|  | 	return (auxv & HWCAP_CRC32) != 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * This gets called on the first call. It replaces the function pointer | ||||||
|  |  * so that subsequent calls are routed directly to the chosen implementation. | ||||||
|  |  */ | ||||||
|  | static pg_crc32c | ||||||
|  | pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) | ||||||
|  | { | ||||||
|  | 	if (pg_crc32c_armv8_available()) | ||||||
|  | 		pg_comp_crc32c = pg_comp_crc32c_armv8; | ||||||
|  | 	else | ||||||
|  | 		pg_comp_crc32c = pg_comp_crc32c_sb8; | ||||||
|  |  | ||||||
|  | 	return pg_comp_crc32c(crc, data, len); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pg_crc32c	(*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; | ||||||
| @@ -1,18 +1,19 @@ | |||||||
| /*-------------------------------------------------------------------------
 | /*-------------------------------------------------------------------------
 | ||||||
|  * |  * | ||||||
|  * pg_crc32c_choose.c |  * pg_crc32c_sse42_choose.c | ||||||
|  *	  Choose which CRC-32C implementation to use, at runtime. |  *	  Choose between Intel SSE 4.2 and software CRC-32C implementation. | ||||||
|  * |  * | ||||||
|  * Try to the special CRC instructions introduced in Intel SSE 4.2, |  * On first call, checks if the CPU we're running on supports Intel SSE | ||||||
|  * if available on the platform we're running on, but fall back to the |  * 4.2. If it does, use the special SSE instructions for CRC-32C | ||||||
|  * slicing-by-8 implementation otherwise. |  * computation. Otherwise, fall back to the pure software implementation | ||||||
|  |  * (slicing-by-8). | ||||||
|  * |  * | ||||||
|  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group |  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group | ||||||
|  * Portions Copyright (c) 1994, Regents of the University of California |  * Portions Copyright (c) 1994, Regents of the University of California | ||||||
|  * |  * | ||||||
|  * |  * | ||||||
|  * IDENTIFICATION |  * IDENTIFICATION | ||||||
|  *	  src/port/pg_crc32c_choose.c |  *	  src/port/pg_crc32c_sse42_choose.c | ||||||
|  * |  * | ||||||
|  *------------------------------------------------------------------------- |  *------------------------------------------------------------------------- | ||||||
|  */ |  */ | ||||||
| @@ -101,7 +101,7 @@ sub mkvcbuild | |||||||
|  |  | ||||||
| 	if ($vsVersion >= '9.00') | 	if ($vsVersion >= '9.00') | ||||||
| 	{ | 	{ | ||||||
| 		push(@pgportfiles, 'pg_crc32c_choose.c'); | 		push(@pgportfiles, 'pg_crc32c_sse42_choose.c'); | ||||||
| 		push(@pgportfiles, 'pg_crc32c_sse42.c'); | 		push(@pgportfiles, 'pg_crc32c_sse42.c'); | ||||||
| 		push(@pgportfiles, 'pg_crc32c_sb8.c'); | 		push(@pgportfiles, 'pg_crc32c_sb8.c'); | ||||||
| 	} | 	} | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user