From dec3f8ca69e5eb19a4be7a175d3834874c4d880b Mon Sep 17 00:00:00 2001 From: mysqlonarm <61234003+mysqlonarm@users.noreply.github.com> Date: Mon, 1 Jun 2020 14:04:06 +0530 Subject: [PATCH] MDEV-22641: Provide SIMD optimized wrapper for zlib crc32() (#1558) Existing implementation used my_checksum (from mysys) for calculating table checksum and binlog checksum. This implementation was optimized for powerpc only and lacked SIMD implementation for x86 (using clmul) and ARM (using ACLE) instead used zlib-crc32. mariabackup had its own copy of the crc32 implementation using hardware optimized implementation only for x86 and lagged hardware based implementation for powerpc and ARM. Patch helps unifies all such calls and help aggregate all of them using an unified interface my_checksum(). Said unification also enables hardware optimized calls for all architecture viz. x86, ARM, POWERPC. Default always fallback to zlib crc32. Thanks to Daniel Black for reviewing, fixing and testing PowerPC changes. Thanks to Marko and Daniel for early code feedback. --- CMakeLists.txt | 1 - cmake/crc32.cmake | 36 ---------- extra/crc32-vpmsum/CMakeLists.txt | 9 --- extra/crc32_armv8_neon/CMakeLists.txt | 8 --- extra/mariabackup/CMakeLists.txt | 5 +- extra/mariabackup/crc/CMakeLists.txt | 33 --------- extra/mariabackup/crc/config.h.cmake | 21 ------ extra/mariabackup/crc/crc-intel-pclmul.h | 25 ------- extra/mariabackup/crc/crc_glue.c | 72 ------------------- extra/mariabackup/crc/crc_glue.h | 31 -------- extra/mariabackup/xbstream.cc | 3 +- extra/mariabackup/xbstream_read.cc | 4 +- extra/mariabackup/xbstream_write.cc | 3 +- extra/mariabackup/xtrabackup.cc | 3 +- include/my_sys.h | 15 +++- mysys/CMakeLists.txt | 55 +++++++++++++- mysys/checksum.c | 56 +++++++++------ .../crc32}/clang_workaround.h | 0 .../crc32/crc32_arm64.c | 45 ++++++++++-- .../vec_crc32.c => mysys/crc32/crc32_ppc64.c | 3 +- .../crc32/crc32_x86.c | 36 +++++++++- .../crc32/pcc_crc32_constants.h | 0 .../crc32/pcc_crc32c_constants.h | 0 mysys/my_init.c | 4 +- storage/innobase/ut/ut0crc32.cc | 8 +-- storage/maria/ma_loghandler.c | 2 +- storage/maria/ma_pagecrc.c | 2 +- storage/rocksdb/rdb_converter.cc | 8 +-- storage/rocksdb/rdb_datadic.cc | 9 +-- 29 files changed, 200 insertions(+), 297 deletions(-) delete mode 100644 cmake/crc32.cmake delete mode 100644 extra/crc32-vpmsum/CMakeLists.txt delete mode 100644 extra/crc32_armv8_neon/CMakeLists.txt delete mode 100644 extra/mariabackup/crc/CMakeLists.txt delete mode 100644 extra/mariabackup/crc/config.h.cmake delete mode 100644 extra/mariabackup/crc/crc-intel-pclmul.h delete mode 100644 extra/mariabackup/crc/crc_glue.c delete mode 100644 extra/mariabackup/crc/crc_glue.h rename {extra/crc32-vpmsum => mysys/crc32}/clang_workaround.h (100%) rename extra/crc32_armv8_neon/crc32_armv8.c => mysys/crc32/crc32_arm64.c (88%) rename extra/crc32-vpmsum/vec_crc32.c => mysys/crc32/crc32_ppc64.c (99%) rename extra/mariabackup/crc/crc-intel-pclmul.c => mysys/crc32/crc32_x86.c (94%) rename extra/crc32-vpmsum/crc32ieee_constants.h => mysys/crc32/pcc_crc32_constants.h (100%) rename extra/crc32-vpmsum/crc32c_constants.h => mysys/crc32/pcc_crc32c_constants.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d85d565e52..b170ecabef5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -170,7 +170,6 @@ INCLUDE(systemd) INCLUDE(mysql_add_executable) INCLUDE(symlinks) INCLUDE(compile_flags) -INCLUDE(crc32) INCLUDE(pmem) # Handle options diff --git a/cmake/crc32.cmake b/cmake/crc32.cmake deleted file mode 100644 index 78d57dec3fb..00000000000 --- a/cmake/crc32.cmake +++ /dev/null @@ -1,36 +0,0 @@ -IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") - IF(CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) - include(CheckCXXSourceCompiles) - - CHECK_CXX_SOURCE_COMPILES(" - #define CRC32CX(crc, value) __asm__(\"crc32cx %w[c], %w[c], %x[v]\":[c]\"+r\"(crc):[v]\"r\"(value)) - asm(\".arch_extension crc\"); - unsigned int foo(unsigned int ret) { - CRC32CX(ret, 0); - return ret; - } - int main() { foo(0); }" HAVE_ARMV8_CRC) - - CHECK_CXX_SOURCE_COMPILES(" - asm(\".arch_extension crypto\"); - unsigned int foo(unsigned int ret) { - __asm__(\"pmull v2.1q, v2.1d, v1.1d\"); - return ret; - } - int main() { foo(0); }" HAVE_ARMV8_CRYPTO) - - CHECK_C_COMPILER_FLAG(-march=armv8-a+crc+crypto HAVE_ARMV8_CRC_CRYPTO_INTRINSICS) - IF(HAVE_ARMV8_CRC_CRYPTO_INTRINSICS) - SET(ARMV8_CRC_COMPILE_FLAGS "${ARMV8_CRC_COMPILE_FLAGS} -march=armv8-a+crc+crypto") - ENDIF() - - SET(CRC32_LIBRARY crc32_armv8_neon) - ADD_SUBDIRECTORY(extra/crc32_armv8_neon) - ENDIF() -ENDIF() - -IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64") - SET(HAVE_CRC32_VPMSUM 1) - SET(CRC32_LIBRARY crc32-vpmsum) - ADD_SUBDIRECTORY(extra/crc32-vpmsum) -ENDIF() diff --git a/extra/crc32-vpmsum/CMakeLists.txt b/extra/crc32-vpmsum/CMakeLists.txt deleted file mode 100644 index b4adebdadf5..00000000000 --- a/extra/crc32-vpmsum/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -ADD_CONVENIENCE_LIBRARY(${CRC32_LIBRARY} $ $) -ADD_LIBRARY(crc32c OBJECT vec_crc32.c) -ADD_LIBRARY(crc32ieee OBJECT vec_crc32.c) - -GET_PROPERTY(CFLAGS_CRC32_VPMSUM TARGET ${CRC32_LIBRARY} PROPERTY COMPILE_FLAGS) -SET_TARGET_PROPERTIES(crc32c crc32ieee PROPERTIES COMPILE_FLAGS "${CFLAGS_CRC32_VPMSUM} -maltivec -mvsx -mpower8-vector -mcrypto -mpower8-vector") -SET_TARGET_PROPERTIES(crc32ieee PROPERTIES COMPILE_DEFINITIONS "CRC32_FUNCTION=crc32ieee_vpmsum;CRC32_CONSTANTS_HEADER=\"crc32ieee_constants.h\"") -SET_TARGET_PROPERTIES(crc32c PROPERTIES COMPILE_DEFINITIONS "CRC32_FUNCTION=crc32c_vpmsum;CRC32_CONSTANTS_HEADER=\"crc32c_constants.h\"") - diff --git a/extra/crc32_armv8_neon/CMakeLists.txt b/extra/crc32_armv8_neon/CMakeLists.txt deleted file mode 100644 index ba1d34d7c2e..00000000000 --- a/extra/crc32_armv8_neon/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include) -INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include) - -ADD_CONVENIENCE_LIBRARY(${CRC32_LIBRARY} $) -ADD_LIBRARY(common_crc32c_armv8 OBJECT crc32_armv8.c) - -SET_TARGET_PROPERTIES(common_crc32c_armv8 PROPERTIES COMPILE_FLAGS "${ARMV8_CRC_COMPILE_FLAGS}") - diff --git a/extra/mariabackup/CMakeLists.txt b/extra/mariabackup/CMakeLists.txt index ad36d2fa6a6..561f2ab0500 100644 --- a/extra/mariabackup/CMakeLists.txt +++ b/extra/mariabackup/CMakeLists.txt @@ -33,7 +33,6 @@ INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/sql ${CMAKE_CURRENT_SOURCE_DIR}/quicklz ${CMAKE_CURRENT_SOURCE_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}/crc ) IF(NOT HAVE_SYSTEM_REGEX) @@ -84,9 +83,8 @@ MYSQL_ADD_EXECUTABLE(mariadb-backup # Export all symbols on Unix, for better crash callstacks SET_TARGET_PROPERTIES(mariadb-backup PROPERTIES ENABLE_EXPORTS TRUE) -ADD_SUBDIRECTORY(crc) -TARGET_LINK_LIBRARIES(mariadb-backup sql sql_builtins crc) +TARGET_LINK_LIBRARIES(mariadb-backup sql sql_builtins) IF(NOT HAVE_SYSTEM_REGEX) TARGET_LINK_LIBRARIES(mariadb-backup pcre2-posix) ENDIF() @@ -109,7 +107,6 @@ MYSQL_ADD_EXECUTABLE(mbstream TARGET_LINK_LIBRARIES(mbstream mysys - crc ) ADD_DEPENDENCIES(mbstream GenError) diff --git a/extra/mariabackup/crc/CMakeLists.txt b/extra/mariabackup/crc/CMakeLists.txt deleted file mode 100644 index c057e59a7b9..00000000000 --- a/extra/mariabackup/crc/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2017 Percona LLC and/or its affiliates. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -PROJECT(crc C) - -IF(NOT CMAKE_CROSSCOMPILING AND NOT MSVC) - STRING(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} processor) - IF(processor MATCHES "86" OR processor MATCHES "amd64" OR processor MATCHES "x64") - # Check for PCLMUL instruction - CHECK_C_SOURCE_RUNS(" - int main() - { - asm volatile (\"pclmulqdq \\$0x00, %%xmm1, %%xmm0\":::\"cc\"); - return 0; - }" HAVE_CLMUL_INSTRUCTION) - ENDIF() -ENDIF() -IF(HAVE_CLMUL_INSTRUCTION) - ADD_DEFINITIONS(-DHAVE_CLMUL_INSTRUCTION) -ENDIF() -ADD_LIBRARY(crc STATIC crc_glue.c crc-intel-pclmul.c) diff --git a/extra/mariabackup/crc/config.h.cmake b/extra/mariabackup/crc/config.h.cmake deleted file mode 100644 index beca62d1efb..00000000000 --- a/extra/mariabackup/crc/config.h.cmake +++ /dev/null @@ -1,21 +0,0 @@ -/****************************************************** -Copyright (c) 2017 Percona LLC and/or its affiliates. - -Zlib compatible CRC-32 implementation. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*******************************************************/ - -#cmakedefine HAVE_CLMUL_INSTRUCTION 1 diff --git a/extra/mariabackup/crc/crc-intel-pclmul.h b/extra/mariabackup/crc/crc-intel-pclmul.h deleted file mode 100644 index c95c59601ae..00000000000 --- a/extra/mariabackup/crc/crc-intel-pclmul.h +++ /dev/null @@ -1,25 +0,0 @@ -/****************************************************** -Copyright (c) 2017 Percona LLC and/or its affiliates. - -CRC32 using Intel's PCLMUL instruction. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*******************************************************/ - -#include -#include - -void -crc32_intel_pclmul(uint32_t *pcrc, const uint8_t *inbuf, size_t inlen); diff --git a/extra/mariabackup/crc/crc_glue.c b/extra/mariabackup/crc/crc_glue.c deleted file mode 100644 index bc14d0bb9df..00000000000 --- a/extra/mariabackup/crc/crc_glue.c +++ /dev/null @@ -1,72 +0,0 @@ -/****************************************************** -Copyright (c) 2017 Percona LLC and/or its affiliates. - -Zlib compatible CRC-32 implementation. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*******************************************************/ -#include "my_config.h" -#include "crc_glue.h" -#include "crc-intel-pclmul.h" -#include -#include -#include - -#if defined(__GNUC__) && defined(__x86_64__) -static int pclmul_enabled = 0; -#endif - -#if defined(__GNUC__) && defined(__x86_64__) -static -uint32_t -cpuid(uint32_t* ecx, uint32_t* edx) -{ - uint32_t level; - - asm("cpuid" : "=a" (level) : "a" (0) : "ebx", "ecx", "edx"); - - if (level < 1) { - return level; - } - - asm("cpuid" : "=c" (*ecx), "=d" (*edx) - : "a" (1) - : "ebx"); - - return level; -} -#endif - -void crc_init() { -#if defined(__GNUC__) && defined(__x86_64__) - uint32_t ecx, edx; - - if (cpuid(&ecx, &edx) > 0) { - pclmul_enabled = ((ecx >> 19) & 1) && ((ecx >> 1) & 1); - } -#endif -} - -unsigned long crc32_iso3309(unsigned long crc, const unsigned char *buf, unsigned int len) -{ -#if __GNUC__ >= 4 && defined(__x86_64__) && defined(HAVE_CLMUL_INSTRUCTION) - if (pclmul_enabled) { - uint32_t crc_accum = (uint32_t) ~crc; - crc32_intel_pclmul(&crc_accum, buf, len); - return ~crc_accum; - } -#endif - return crc32(crc, buf, len); -} diff --git a/extra/mariabackup/crc/crc_glue.h b/extra/mariabackup/crc/crc_glue.h deleted file mode 100644 index 598330263b3..00000000000 --- a/extra/mariabackup/crc/crc_glue.h +++ /dev/null @@ -1,31 +0,0 @@ -/****************************************************** -Copyright (c) 2017 Percona LLC and/or its affiliates. - -Zlib compatible CRC-32 implementation. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*******************************************************/ - - -#ifdef __cplusplus -extern "C" { -#endif - -void crc_init(); -unsigned long crc32_iso3309(unsigned long crc, const unsigned char *buf, unsigned int len); - -#ifdef __cplusplus -} -#endif diff --git a/extra/mariabackup/xbstream.cc b/extra/mariabackup/xbstream.cc index ba55141cdd9..761b8e69890 100644 --- a/extra/mariabackup/xbstream.cc +++ b/extra/mariabackup/xbstream.cc @@ -26,7 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA #include "common.h" #include "xbstream.h" #include "datasink.h" -#include "crc_glue.h" #define XBSTREAM_VERSION "1.0" #define XBSTREAM_BUFFER_SIZE (10 * 1024 * 1024UL) @@ -98,7 +97,7 @@ main(int argc, char **argv) { MY_INIT(argv[0]); - crc_init(); + my_checksum_init(); if (get_options(&argc, &argv)) { goto err; diff --git a/extra/mariabackup/xbstream_read.cc b/extra/mariabackup/xbstream_read.cc index 3880dd50ed5..84bb279aba0 100644 --- a/extra/mariabackup/xbstream_read.cc +++ b/extra/mariabackup/xbstream_read.cc @@ -23,7 +23,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA #include #include "common.h" #include "xbstream.h" -#include "crc_glue.h" /* Allocate 1 MB for the payload buffer initially */ #define INIT_BUFFER_LEN (1024 * 1024) @@ -71,8 +70,7 @@ xb_rstream_result_t xb_stream_validate_checksum(xb_rstream_chunk_t *chunk) { ulong checksum; - - checksum = crc32_iso3309(0, (unsigned char *)chunk->data, (uint)chunk->length); + checksum = my_checksum(0, chunk->data, chunk->length); if (checksum != chunk->checksum) { msg("xb_stream_read_chunk(): invalid checksum at offset " "0x%llx: expected 0x%lx, read 0x%lx.", diff --git a/extra/mariabackup/xbstream_write.cc b/extra/mariabackup/xbstream_write.cc index b6fd9c294a5..2c9ffde6c42 100644 --- a/extra/mariabackup/xbstream_write.cc +++ b/extra/mariabackup/xbstream_write.cc @@ -23,7 +23,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA #include #include "common.h" #include "xbstream.h" -#include "crc_glue.h" /* Group writes smaller than this into a single chunk */ #define XB_STREAM_MIN_CHUNK_SIZE (10 * 1024 * 1024) @@ -216,7 +215,7 @@ xb_stream_write_chunk(xb_wstream_file_t *file, const void *buf, size_t len) int8store(ptr, len); /* Payload length */ ptr += 8; - checksum = crc32_iso3309(0, (const uchar *)buf, (uint)len); /* checksum */ + checksum = my_checksum(0, buf, len); pthread_mutex_lock(&stream->mutex); diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 1eba84e381f..d50832405a3 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -101,7 +101,6 @@ Street, Fifth Floor, Boston, MA 02110-1335 USA #include "encryption_plugin.h" #include #include -#include #include #include #include @@ -4034,7 +4033,7 @@ fail: trx_pool_init(); ut_crc32_init(); - crc_init(); + my_checksum_init(); recv_sys.create(); #ifdef WITH_INNODB_DISALLOW_WRITES diff --git a/include/my_sys.h b/include/my_sys.h index 639429e9c26..0807d5b6701 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -921,8 +921,18 @@ extern int my_compress_buffer(uchar *dest, size_t *destLen, extern int packfrm(const uchar *, size_t, uchar **, size_t *); extern int unpackfrm(uchar **, size_t *, const uchar *); -extern ha_checksum my_checksum(ha_checksum crc, const uchar *mem, - size_t count); +void my_checksum_init(void); +#ifdef HAVE_CRC32_VPMSUM +extern my_checksum(ha_checksum, const void *, size_t); +#else +typedef ha_checksum (*my_crc32_t)(ha_checksum, const void *, size_t); +extern my_crc32_t my_checksum; +#endif + +#if defined(__GNUC__) && defined(HAVE_ARMV8_CRC) +int crc32_aarch64_available(void); +#endif + #ifdef DBUG_ASSERT_EXISTS extern void my_debug_put_break_here(void); #else @@ -930,7 +940,6 @@ extern void my_debug_put_break_here(void); #endif extern void my_sleep(ulong m_seconds); -extern ulong crc32(ulong crc, const uchar *buf, uint len); extern uint my_set_max_open_files(uint files); void my_free_open_file_info(void); diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index ef159748092..ecb4166802f 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -58,6 +58,59 @@ IF (WIN32) my_win_popen.cc) ENDIF() +IF(NOT MSVC AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + #Check for PCLMUL instruction (x86) + CHECK_C_SOURCE_COMPILES(" + int main() + { + asm volatile (\"pclmulqdq \\$0x00, %%xmm1, %%xmm0\":::\"cc\"); + return 0; + }" HAVE_CLMUL_INSTRUCTION) + + IF(HAVE_CLMUL_INSTRUCTION) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_x86.c) + ENDIF() +ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + IF(CMAKE_COMPILER_IS_GNUCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) + include(CheckCXXSourceCompiles) + + CHECK_CXX_SOURCE_COMPILES(" + #define CRC32CX(crc, value) __asm__(\"crc32cx %w[c], %w[c], %x[v]\":[c]\"+r\"(crc):[v]\"r\"(value)) + asm(\".arch_extension crc\"); + unsigned int foo(unsigned int ret) { + CRC32CX(ret, 0); + return ret; + } + #include + int main() { foo(0); getauxval(AT_HWCAP); }" HAVE_ARMV8_CRC) + + CHECK_CXX_SOURCE_COMPILES(" + asm(\".arch_extension crypto\"); + unsigned int foo(unsigned int ret) { + __asm__(\"pmull v2.1q, v2.1d, v1.1d\"); + return ret; + } + int main() { foo(0); }" HAVE_ARMV8_CRYPTO) + + CHECK_C_COMPILER_FLAG(-march=armv8-a+crc+crypto HAVE_ARMV8_CRC_CRYPTO_INTRINSICS) + IF(HAVE_ARMV8_CRC_CRYPTO_INTRINSICS) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_arm64.c) + SET_SOURCE_FILES_PROPERTIES(crc32/crc32_arm64.c PROPERTIES + COMPILE_FLAGS "-march=armv8-a+crc+crypto") + ENDIF() + ENDIF() +ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64") + SET(HAVE_CRC32_VPMSUM 1) + SET(MYSYS_SOURCES ${MYSYS_SOURCES} $ $) + + ADD_LIBRARY(crc32c OBJECT crc32/crc32_ppc64.c) + ADD_LIBRARY(crc32ieee OBJECT crc32/crc32_ppc64.c) + + SET_TARGET_PROPERTIES(crc32c crc32ieee PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -maltivec -mvsx -mpower8-vector -mcrypto -mpower8-vector") + SET_TARGET_PROPERTIES(crc32ieee PROPERTIES COMPILE_DEFINITIONS "CRC32_FUNCTION=my_checksum;CRC32_CONSTANTS_HEADER=\"pcc_crc32_constants.h\"") + SET_TARGET_PROPERTIES(crc32c PROPERTIES COMPILE_DEFINITIONS "CRC32_FUNCTION=crc32c_vpmsum;CRC32_CONSTANTS_HEADER=\"pcc_crc32c_constants.h\"") +ENDIF() + IF(UNIX) SET (MYSYS_SOURCES ${MYSYS_SOURCES} my_addr_resolve.c my_setuser.c) ENDIF() @@ -73,7 +126,7 @@ ENDIF() ADD_CONVENIENCE_LIBRARY(mysys ${MYSYS_SOURCES}) MAYBE_DISABLE_IPO(mysys) TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY} - ${LIBNSL} ${LIBM} ${LIBRT} ${LIBDL} ${LIBSOCKET} ${LIBEXECINFO} ${CRC32_LIBRARY}) + ${LIBNSL} ${LIBM} ${LIBRT} ${LIBDL} ${LIBSOCKET} ${LIBEXECINFO}) DTRACE_INSTRUMENT(mysys) IF(HAVE_BFD_H) diff --git a/mysys/checksum.c b/mysys/checksum.c index 91e681d0db4..e86c2726722 100644 --- a/mysys/checksum.c +++ b/mysys/checksum.c @@ -18,25 +18,41 @@ #include #include -/* - Calculate a long checksum for a memoryblock. - - SYNOPSIS - my_checksum() - crc start value for crc - pos pointer to memory block - length length of the block -*/ - -ha_checksum my_checksum(ha_checksum crc, const uchar *pos, size_t length) +/* TODO: remove this once zlib adds inherent support for hardware accelerated +crc32 for all architectures. */ +static unsigned int my_crc32_zlib(unsigned int crc, const void *data, + size_t len) { -#ifdef HAVE_CRC32_VPMSUM - extern unsigned int crc32ieee_vpmsum(unsigned int crc, const unsigned char *p, - unsigned long len); - crc= (ha_checksum) crc32ieee_vpmsum((uint) crc, pos, (uint) length); -#else - crc= (ha_checksum) crc32((uint)crc, pos, (uint) length); -#endif - DBUG_PRINT("info", ("crc: %lu", (ulong) crc)); - return crc; + return (unsigned int) crc32(crc, data, (unsigned int) len); } + +#if !defined(HAVE_CRC32_VPMSUM) +my_crc32_t my_checksum= my_crc32_zlib; +#endif + +#if __GNUC__ >= 4 && defined(__x86_64__) + +extern int crc32_pclmul_enabled(); +extern unsigned int crc32_pclmul(unsigned int, const void *, size_t); + +/*----------------------------- x86_64 ---------------------------------*/ +void my_checksum_init(void) +{ + if (crc32_pclmul_enabled()) + my_checksum= crc32_pclmul; +} +#elif defined(__GNUC__) && defined(HAVE_ARMV8_CRC) +/*----------------------------- aarch64 --------------------------------*/ + +extern unsigned int crc32_aarch64(unsigned int, const void *, size_t); + +/* Ideally all ARM 64 bit processor should support crc32 but if some model +doesn't support better to find it out through auxillary vector. */ +void my_checksum_init(void) +{ + if (crc32_aarch64_available()) + my_checksum= crc32_aarch64; +} +#else +void my_checksum_init(void) {} +#endif diff --git a/extra/crc32-vpmsum/clang_workaround.h b/mysys/crc32/clang_workaround.h similarity index 100% rename from extra/crc32-vpmsum/clang_workaround.h rename to mysys/crc32/clang_workaround.h diff --git a/extra/crc32_armv8_neon/crc32_armv8.c b/mysys/crc32/crc32_arm64.c similarity index 88% rename from extra/crc32_armv8_neon/crc32_armv8.c rename to mysys/crc32/crc32_arm64.c index 20f341552e2..09ac7a12a66 100644 --- a/extra/crc32_armv8_neon/crc32_armv8.c +++ b/mysys/crc32/crc32_arm64.c @@ -1,8 +1,8 @@ #include #include +#include - -#if defined(__GNUC__) && defined(__linux__) && defined(HAVE_ARMV8_CRC) +#if defined(__GNUC__) && defined(HAVE_ARMV8_CRC) #include #include @@ -11,12 +11,13 @@ #define HWCAP_CRC32 (1 << 7) #endif -unsigned int crc32c_aarch64_available(void) +/* ARM made crc32 default from ARMv8.1 but optional in ARMv8A +so the runtime check. */ +int crc32_aarch64_available(void) { - unsigned long auxv = getauxval(AT_HWCAP); - return (auxv & HWCAP_CRC32) != 0; + unsigned long auxv= getauxval(AT_HWCAP); + return (auxv & HWCAP_CRC32) != 0; } - #endif #ifndef HAVE_ARMV8_CRC_CRYPTO_INTRINSICS @@ -299,3 +300,35 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len) return (~crc); } + +/* There are multiple approaches to calculate crc. +Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes +Approach-2: Process 8 bytes and remaining workload using 1 bytes +Apporach-3: Process 64 bytes at once by issuing 8 crc call and remaining + using 8/1 combination. + +Based on micro-benchmark testing we found that Approach-2 works best especially +given small chunk of variable data. */ +unsigned int crc32_aarch64(unsigned int crc, const void *buf, size_t len) +{ + const uint8_t *buf1= buf; + const uint64_t *buf8= (const uint64_t *) (((uintptr_t) buf + 7) & ~7); + + crc= ~crc; + + /* if start pointer is not 8 bytes aligned */ + while ((buf1 != (const uint8_t *) buf8) && len) + { + crc= __crc32b(crc, *buf1++); + len--; + } + + for (; len >= 8; len-= 8) + crc= __crc32d(crc, *buf8++); + + buf1= (const uint8_t *) buf8; + while (len--) + crc= __crc32b(crc, *buf1++); + + return ~crc; +} diff --git a/extra/crc32-vpmsum/vec_crc32.c b/mysys/crc32/crc32_ppc64.c similarity index 99% rename from extra/crc32-vpmsum/vec_crc32.c rename to mysys/crc32/crc32_ppc64.c index bb2204b247c..2e8b9fc1b12 100644 --- a/extra/crc32-vpmsum/vec_crc32.c +++ b/mysys/crc32/crc32_ppc64.c @@ -151,6 +151,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { 0xffffffffffffffffUL}; #ifdef REFLECT + __vector unsigned char vsht_splat; const __vector unsigned long long vmask_32bit = (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, (__vector unsigned char)vones, 4); @@ -598,7 +599,7 @@ __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { #ifdef REFLECT /* shift left one bit */ - __vector unsigned char vsht_splat = vec_splat_u8 (1); + vsht_splat = vec_splat_u8 (1); v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0, vsht_splat); #endif diff --git a/extra/mariabackup/crc/crc-intel-pclmul.c b/mysys/crc32/crc32_x86.c similarity index 94% rename from extra/mariabackup/crc/crc-intel-pclmul.c rename to mysys/crc32/crc32_x86.c index 032802c1823..3f176a6c145 100644 --- a/extra/mariabackup/crc/crc-intel-pclmul.c +++ b/mysys/crc32/crc32_x86.c @@ -39,6 +39,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA * */ +#include + #include #include #include @@ -55,7 +57,7 @@ typedef uint8_t byte; # define _gcry_bswap32 __builtin_bswap32 -#if __GNUC__ >= 4 && defined(__x86_64__) && defined(HAVE_CLMUL_INSTRUCTION) +#if __GNUC__ >= 4 && defined(__x86_64__) #if defined(_GCRY_GCC_VERSION) && _GCRY_GCC_VERSION >= 40400 /* 4.4 */ /* Prevent compiler from issuing SSE instructions between asm blocks. */ @@ -508,4 +510,36 @@ crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) #endif } +#ifdef __GNUC__ +int crc32_pclmul_enabled(void) +{ + int eax, ecx; + /* We assume that the CPUID instruction and its parameter 1 are available. + We do not support any precursors of the Intel 80486. */ + asm("cpuid" : "=a"(eax), "=c"(ecx) : "0"(1) : "ebx", "edx"); + return !(~ecx & (1 << 19 | 1 << 1)); +} +#elif 0 /* defined _MSC_VER */ /* FIXME: implement the pclmul interface */ +#include +int crc32_pclmul_enabled(void) +{ + /* We assume that the CPUID instruction and its parameter 1 are available. + We do not support any precursors of the Intel 80486. */ + int regs[4]; + __cpuid(regs, 1); + return !(~regs[2] & (1 << 19 | 1 << 1)); +} +#else +int crc32_pclmul_enabled(void) +{ + return 0; +} +#endif + +unsigned int crc32_pclmul(unsigned int crc32, const void *buf, size_t len) +{ + crc32= ~crc32; + crc32_intel_pclmul(&crc32, buf, len); + return ~crc32; +} #endif diff --git a/extra/crc32-vpmsum/crc32ieee_constants.h b/mysys/crc32/pcc_crc32_constants.h similarity index 100% rename from extra/crc32-vpmsum/crc32ieee_constants.h rename to mysys/crc32/pcc_crc32_constants.h diff --git a/extra/crc32-vpmsum/crc32c_constants.h b/mysys/crc32/pcc_crc32c_constants.h similarity index 100% rename from extra/crc32-vpmsum/crc32c_constants.h rename to mysys/crc32/pcc_crc32c_constants.h diff --git a/mysys/my_init.c b/mysys/my_init.c index 4ae0cb9966c..cd9875017f0 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -59,7 +59,6 @@ static ulong atoi_octal(const char *str) MYSQL_FILE *mysql_stdin= NULL; static MYSQL_FILE instrumented_stdin; - /** Initialize my_sys functions, resources and variables @@ -101,6 +100,9 @@ my_bool my_init(void) /* Initialize our mutex handling */ my_mutex_init(); + /* Initialize CPU architecture specific hardware based crc32 optimization */ + my_checksum_init(); + if (my_thread_global_init()) return 1; diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc index 3275448293f..3c40a2aac7d 100644 --- a/storage/innobase/ut/ut0crc32.cc +++ b/storage/innobase/ut/ut0crc32.cc @@ -97,11 +97,9 @@ unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned lo ut_crc32_func_t ut_crc32_low= crc32c_vpmsum; const char* ut_crc32_implementation = "Using POWER8 crc32 instructions"; #else -# if defined(__GNUC__) && defined(__linux__) && defined(HAVE_ARMV8_CRC) +# if defined(__GNUC__) && defined(HAVE_ARMV8_CRC) extern "C" { uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len); -/* For runtime check */ -unsigned int crc32c_aarch64_available(void); }; # elif defined(_MSC_VER) # define TRY_SSE4_2 @@ -343,8 +341,8 @@ allocations, would not hurt if called twice, but would be pointless. */ void ut_crc32_init() { #ifndef HAVE_CRC32_VPMSUM -# if defined(__GNUC__) && defined(__linux__) && defined(HAVE_ARMV8_CRC) - if (crc32c_aarch64_available()) +# if defined(__GNUC__) && defined(HAVE_ARMV8_CRC) + if (crc32_aarch64_available()) { ut_crc32_low= crc32c_aarch64; ut_crc32_implementation= "Using ARMv8 crc32 instructions"; diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 89e7e7d1551..0ec0e58f8a8 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -1909,7 +1909,7 @@ static void translog_put_sector_protection(uchar *page, static uint32 translog_crc(uchar *area, uint length) { DBUG_ENTER("translog_crc"); - DBUG_RETURN(crc32(0L, (unsigned char*) area, length)); + DBUG_RETURN(my_checksum(0L, area, length)); } diff --git a/storage/maria/ma_pagecrc.c b/storage/maria/ma_pagecrc.c index f682397d996..4e1389b1163 100644 --- a/storage/maria/ma_pagecrc.c +++ b/storage/maria/ma_pagecrc.c @@ -28,7 +28,7 @@ static uint32 maria_page_crc(uint32 start, uchar *data, uint length) { - uint32 crc= crc32(start, data, length); + uint32 crc= my_checksum(start, data, length); /* we need this assert to get following comparison working */ compile_time_assert(MARIA_NO_CRC_BITMAP_PAGE == diff --git a/storage/rocksdb/rdb_converter.cc b/storage/rocksdb/rdb_converter.cc index e799d67f813..65f0b81cc7f 100644 --- a/storage/rocksdb/rdb_converter.cc +++ b/storage/rocksdb/rdb_converter.cc @@ -646,9 +646,9 @@ int Rdb_converter::verify_row_debug_checksum( rdb_netbuf_to_uint32((const uchar *)reader->read(RDB_CHECKSUM_SIZE)); const uint32_t computed_key_chksum = - my_core::crc32(0, rdb_slice_to_uchar_ptr(key), key->size()); + my_core::my_checksum(0, rdb_slice_to_uchar_ptr(key), key->size()); const uint32_t computed_val_chksum = - my_core::crc32(0, rdb_slice_to_uchar_ptr(value), + my_core::my_checksum(0, rdb_slice_to_uchar_ptr(value), value->size() - RDB_CHECKSUM_CHUNK_SIZE); DBUG_EXECUTE_IF("myrocks_simulate_bad_pk_checksum1", stored_key_chksum++;); @@ -816,10 +816,10 @@ int Rdb_converter::encode_value_slice( } if (store_row_debug_checksums) { - const uint32_t key_crc32 = my_core::crc32( + const uint32_t key_crc32 = my_core::my_checksum( 0, rdb_slice_to_uchar_ptr(&pk_packed_slice), pk_packed_slice.size()); const uint32_t val_crc32 = - my_core::crc32(0, rdb_mysql_str_to_uchar_str(&m_storage_record), + my_core::my_checksum(0, rdb_mysql_str_to_uchar_str(&m_storage_record), m_storage_record.length()); uchar key_crc_buf[RDB_CHECKSUM_SIZE]; uchar val_crc_buf[RDB_CHECKSUM_SIZE]; diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc index a7b44ff85ab..719830af283 100644 --- a/storage/rocksdb/rdb_datadic.cc +++ b/storage/rocksdb/rdb_datadic.cc @@ -1432,9 +1432,10 @@ uint Rdb_key_def::pack_record(const TABLE *const tbl, uchar *const pack_buffer, // ha_rocksdb::convert_record_to_storage_format // if (should_store_row_debug_checksums) { - const uint32_t key_crc32 = crc32(0, packed_tuple, tuple - packed_tuple); + const uint32_t key_crc32 = + my_checksum(0, packed_tuple, tuple - packed_tuple); const uint32_t val_crc32 = - crc32(0, unpack_info->ptr(), unpack_info->get_current_pos()); + my_checksum(0, unpack_info->ptr(), unpack_info->get_current_pos()); unpack_info->write_uint8(RDB_CHECKSUM_DATA_TAG); unpack_info->write_uint32(key_crc32); @@ -1690,9 +1691,9 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf, (const uchar *)unp_reader.read(RDB_CHECKSUM_SIZE)); const uint32_t computed_key_chksum = - crc32(0, (const uchar *)packed_key->data(), packed_key->size()); + my_checksum(0, packed_key->data(), packed_key->size()); const uint32_t computed_val_chksum = - crc32(0, (const uchar *)unpack_info->data(), + my_checksum(0, unpack_info->data(), unpack_info->size() - RDB_CHECKSUM_CHUNK_SIZE); DBUG_EXECUTE_IF("myrocks_simulate_bad_key_checksum1",