diff --git a/mysys/crc32/crc32_arm64.c b/mysys/crc32/crc32_arm64.c index c39f7b5f6f6..1f2f023e6ca 100644 --- a/mysys/crc32/crc32_arm64.c +++ b/mysys/crc32/crc32_arm64.c @@ -4,31 +4,38 @@ #include typedef unsigned (*my_crc32_t)(unsigned, const void *, size_t); +unsigned crc32_aarch64(unsigned, const void *, size_t); #ifdef HAVE_ARMV8_CRC -#ifdef _WIN32 -#include +# ifdef HAVE_ARMV8_CRYPTO +static unsigned crc32c_aarch64_pmull(unsigned, const void *, size_t); +# endif + +# ifdef _WIN32 +# include +# ifdef __clang__ +# include +# include +# endif int crc32_aarch64_available(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); } -const char *crc32c_aarch64_available(void) +unsigned crc32c_aarch64(unsigned, const void *, size_t); + +my_crc32_t crc32c_aarch64_available(void) { if (crc32_aarch64_available() == 0) return NULL; /* TODO : pmull seems supported, but does not compile*/ - return "Using ARMv8 crc32 instructions"; + return crc32c_aarch64; } -#endif /* _WIN32 */ -#ifdef HAVE_ARMV8_CRYPTO -static unsigned crc32c_aarch64_pmull(unsigned, const void *, size_t); -# endif - -# ifdef __APPLE__ -# include +# else /* _WIN32 */ +# ifdef __APPLE__ +# include int crc32_aarch64_available(void) { @@ -41,16 +48,16 @@ int crc32_aarch64_available(void) my_crc32_t crc32c_aarch64_available(void) { -# ifdef HAVE_ARMV8_CRYPTO +# ifdef HAVE_ARMV8_CRYPTO if (crc32_aarch64_available()) return crc32c_aarch64_pmull; -# endif +# endif return NULL; } -# else /* __APPLE__ */ -# include -# ifdef __FreeBSD__ +# else /* __APPLE__ */ +# include +# ifdef __FreeBSD__ static unsigned long getauxval(unsigned int key) { unsigned long val; @@ -58,17 +65,17 @@ static unsigned long getauxval(unsigned int key) return 0ul; return val; } -# else -# include -# endif +# else +# include +# endif -# ifndef HWCAP_CRC32 -# define HWCAP_CRC32 (1 << 7) -# endif +# ifndef HWCAP_CRC32 +# define HWCAP_CRC32 (1 << 7) +# endif -# ifndef HWCAP_PMULL -# define HWCAP_PMULL (1 << 4) -# endif +# ifndef HWCAP_PMULL +# define HWCAP_PMULL (1 << 4) +# endif /* ARM made crc32 default from ARMv8.1 but optional in ARMv8A * Runtime check API. @@ -78,9 +85,9 @@ int crc32_aarch64_available(void) unsigned long auxv= getauxval(AT_HWCAP); return (auxv & HWCAP_CRC32) != 0; } -# endif /* __APPLE__ */ +# endif /* __APPLE__ */ -# ifndef __APPLE__ +# ifndef __APPLE__ static unsigned crc32c_aarch64(unsigned, const void *, size_t); my_crc32_t crc32c_aarch64_available(void) @@ -88,14 +95,15 @@ my_crc32_t crc32c_aarch64_available(void) unsigned long auxv= getauxval(AT_HWCAP); if (!(auxv & HWCAP_CRC32)) return NULL; -# ifdef HAVE_ARMV8_CRYPTO +# ifdef HAVE_ARMV8_CRYPTO /* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030). */ if (auxv & HWCAP_PMULL) return crc32c_aarch64_pmull; -# endif +# endif return crc32c_aarch64; } -# endif /* __APPLE__ */ +# endif /* __APPLE__ */ +# endif /* _WIN32 */ const char *crc32c_aarch64_impl(my_crc32_t c) { @@ -370,7 +378,7 @@ static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len) /* There are multiple approaches to calculate crc. Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes Approach-2: Process 8 bytes and remaining workload using 1 bytes -Apporach-3: Process 64 bytes at once by issuing 8 crc call and remaining +Approach-3: Process 64 bytes at once by issuing 8 crc call and remaining using 8/1 combination. Based on micro-benchmark testing we found that Approach-2 works best especially