From 5b3ad94c7b070be1b1e5ab186c5d4d017e1fe8cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 21 Jan 2022 19:24:00 +0200 Subject: [PATCH] MDEV-27208: Extend CRC32() and implement CRC32C() We used to define a native unary function CRC32() that computes the CRC-32 of a string using the ISO 3309 polynomial that is being used by zlib and many others. Often, a CRC is computed in pieces. To faciliate this, we introduce a 2-ary variant of the function that inputs a previous CRC as the first argument: CRC32('MariaDB')=CRC32(CRC32('Maria'),'DB'). InnoDB and MyRocks use a different polynomial, which was implemented in SSE4.2 instructions that were introduced in the Intel Nehalem microarchitecture. This is commonly called CRC-32C (Castagnoli). We introduce a native function that uses the Castagnoli polynomial: CRC32C('MariaDB')=CRC32C(CRC32C('Maria'),'DB'). This allows SELECT...INTO DUMPFILE to be used for the creation of files with valid checksums, such as a logically empty InnoDB redo log file ib_logfile0 corresponding to a particular log sequence number. --- mysql-test/main/func_math.result | 117 +++++++++++++++++++++++++++---- mysql-test/main/func_math.test | 49 ++++++++++++- mysys/crc32ieee.cc | 4 +- sql/item_create.cc | 68 ++++++++++++++++-- sql/item_strfunc.cc | 26 +++++-- sql/item_strfunc.h | 22 ++++-- 6 files changed, 254 insertions(+), 32 deletions(-) diff --git a/mysql-test/main/func_math.result b/mysql-test/main/func_math.result index ec2521bcfd7..0c6fcb08239 100644 --- a/mysql-test/main/func_math.result +++ b/mysql-test/main/func_math.result @@ -1840,24 +1840,115 @@ CRC32(99999999999999999999999999999999) SELECT CRC32(-99999999999999999999999999999999); CRC32(-99999999999999999999999999999999) 1052326872 +SELECT CRC32C(NULL), CRC32C(''), CRC32C('MariaDB'), CRC32C('mariadb'); +CRC32C(NULL) CRC32C('') CRC32C('MariaDB') CRC32C('mariadb') +NULL 0 809606978 1378644259 +SELECT CRC32(NULL,1),CRC32C(NULL,1), CRC32(1,''), CRC32C(1,''); +CRC32(NULL,1) CRC32C(NULL,1) CRC32(1,'') CRC32C(1,'') +NULL NULL 1 1 +SELECT CRC32(42,''),CRC32C(42,''),CRC32('42',''),CRC32C('42',''); +CRC32(42,'') CRC32C(42,'') CRC32('42','') CRC32C('42','') +42 42 42 42 +SELECT CRC32(42,NULL),CRC32C(42,NULL); +CRC32(42,NULL) CRC32C(42,NULL) +NULL NULL +SELECT CRC32 ('5c',''),CRC32 ('5c',0),CRC32 ('5c', '0'),CRC32 ('5c',NULL); +CRC32 ('5c','') CRC32 ('5c',0) CRC32 ('5c', '0') CRC32 ('5c',NULL) +5 2226203566 2226203566 NULL +Warnings: +Warning 1292 Truncated incorrect INTEGER value: '5c' +Warning 1292 Truncated incorrect INTEGER value: '5c' +Warning 1292 Truncated incorrect INTEGER value: '5c' +Warning 1292 Truncated incorrect INTEGER value: '5c' +SELECT CRC32C('5c',''),CRC32C('5c',0),CRC32C('5c', '0'),CRC32C('5c',NULL); +CRC32C('5c','') CRC32C('5c',0) CRC32C('5c', '0') CRC32C('5c',NULL) +5 1466896124 1466896124 NULL +Warnings: +Warning 1292 Truncated incorrect INTEGER value: '5c' +Warning 1292 Truncated incorrect INTEGER value: '5c' +Warning 1292 Truncated incorrect INTEGER value: '5c' +Warning 1292 Truncated incorrect INTEGER value: '5c' +SELECT CRC32('MariaDB',NULL),CRC32C('MariaDB',NULL); +CRC32('MariaDB',NULL) CRC32C('MariaDB',NULL) +NULL NULL +Warnings: +Warning 1292 Truncated incorrect INTEGER value: 'MariaDB' +Warning 1292 Truncated incorrect INTEGER value: 'MariaDB' +SELECT CRC32(CRC32('MySQL'),''),CRC32(CRC32('My'),'SQL'),CRC32(0,'MySQL'); +CRC32(CRC32('MySQL'),'') CRC32(CRC32('My'),'SQL') CRC32(0,'MySQL') +3259397556 3259397556 3259397556 +SELECT CRC32C(CRC32C('MariaDB'),''),CRC32C(CRC32C('Maria'),'DB'),CRC32C(0,'MariaDB'); +CRC32C(CRC32C('MariaDB'),'') CRC32C(CRC32C('Maria'),'DB') CRC32C(0,'MariaDB') +809606978 809606978 809606978 +select crc32(0,'My','SQL'); +ERROR 42000: Incorrect parameter count in the call to native function 'crc32' +select crc32c(0,'Maria','DB'); +ERROR 42000: Incorrect parameter count in the call to native function 'crc32c' +select crc32(); +ERROR 42000: Incorrect parameter count in the call to native function 'crc32' +select crc32c(); +ERROR 42000: Incorrect parameter count in the call to native function 'crc32c' +select crc32('' as empty); +ERROR 42000: Incorrect parameters in the call to native function 'crc32' +select crc32c('' as empty); +ERROR 42000: Incorrect parameters in the call to native function 'crc32c' +select crc32(0, '' as empty); +ERROR 42000: Incorrect parameters in the call to native function 'crc32' +select crc32c(0, '' as empty); +ERROR 42000: Incorrect parameters in the call to native function 'crc32c' +select crc32(0 as zero, ''); +ERROR 42000: Incorrect parameters in the call to native function 'crc32' +select crc32c(0 as zero, ''); +ERROR 42000: Incorrect parameters in the call to native function 'crc32c' +CREATE TEMPORARY TABLE t +(a CHAR(2), i INT UNSIGNED, c INT UNSIGNED AS (CRC32C(i,a))); +INSERT INTO t (a,i) VALUES ('DB',CRC32C('Maria')); +SELECT * FROM t; +a i c +DB 1253907744 809606978 +DROP TEMPORARY TABLE t; +select crc32(4294967296,''), hex(char(4294967296)); +crc32(4294967296,'') hex(char(4294967296)) +0 00 +select crc32(1e100,''), hex(char(1e100)); +crc32(1e100,'') hex(char(1e100)) +4294967295 FFFFFFFF +select crc32(10.11,''), hex(char(10.11)); +crc32(10.11,'') hex(char(10.11)) +10 0A +select crc32(-1,''), hex(char(-1)); +crc32(-1,'') hex(char(-1)) +4294967295 FFFFFFFF +select crc32('',''), hex(char('')); +crc32('','') hex(char('')) +0 00 +Warnings: +Warning 1292 Truncated incorrect INTEGER value: '' +Warning 1292 Truncated incorrect INTEGER value: '' +select crc32(429496729656755555555555555555555555555555555555555555555555555555555555555555555555555,'a') as x; +x +3310005809 +Warnings: +Warning 1916 Got overflow when converting '' to DECIMAL. Value truncated +Warning 1916 Got overflow when converting '99999999999999999999999999999999999999999999999999999999999999999' to INT. Value truncated DROP TABLE IF EXISTS t; Warnings: Note 1051 Unknown table 'test.t' CREATE TABLE t(a INT, b VARCHAR(2)); INSERT INTO t VALUES (1,'a'), (2,'qw'), (1,'t'), (3,'t'); -SELECT crc32(SUM(a)) FROM t; -crc32(SUM(a)) -1790921346 -SELECT crc32(AVG(a)) FROM t GROUP BY b; -crc32(AVG(a)) -768278432 -2875100430 -2875100430 -SELECT crc32(MAX(b)) FROM t GROUP BY a; -crc32(MAX(b)) -2238339752 -3114057431 -2238339752 +SELECT crc32(SUM(a)),crc32c(SUM(a)) FROM t; +crc32(SUM(a)) crc32c(SUM(a)) +1790921346 3058990603 +SELECT crc32(AVG(a)),crc32c(AVG(a)) FROM t GROUP BY b; +crc32(AVG(a)) crc32c(AVG(a)) +768278432 1816172052 +2875100430 1492934094 +2875100430 1492934094 +SELECT crc32(MAX(b)),crc32c(MAX(b)) FROM t GROUP BY a; +crc32(MAX(b)) crc32c(MAX(b)) +2238339752 3833565251 +3114057431 4173859780 +2238339752 3833565251 SELECT a, b, crc32(a) FROM t GROUP BY a,b HAVING crc32(MAX(a))=450215437; a b crc32(a) 2 qw 450215437 diff --git a/mysql-test/main/func_math.test b/mysql-test/main/func_math.test index 572e0fd0f6b..df83d13789c 100644 --- a/mysql-test/main/func_math.test +++ b/mysql-test/main/func_math.test @@ -849,15 +849,58 @@ SELECT CRC32('01234567'), CRC32('012345678'); SELECT CRC32('~!@$%^*'), CRC32('-0.0001'); SELECT CRC32(99999999999999999999999999999999); SELECT CRC32(-99999999999999999999999999999999); +SELECT CRC32C(NULL), CRC32C(''), CRC32C('MariaDB'), CRC32C('mariadb'); +SELECT CRC32(NULL,1),CRC32C(NULL,1), CRC32(1,''), CRC32C(1,''); +SELECT CRC32(42,''),CRC32C(42,''),CRC32('42',''),CRC32C('42',''); +SELECT CRC32(42,NULL),CRC32C(42,NULL); +SELECT CRC32 ('5c',''),CRC32 ('5c',0),CRC32 ('5c', '0'),CRC32 ('5c',NULL); +SELECT CRC32C('5c',''),CRC32C('5c',0),CRC32C('5c', '0'),CRC32C('5c',NULL); +SELECT CRC32('MariaDB',NULL),CRC32C('MariaDB',NULL); +SELECT CRC32(CRC32('MySQL'),''),CRC32(CRC32('My'),'SQL'),CRC32(0,'MySQL'); +SELECT CRC32C(CRC32C('MariaDB'),''),CRC32C(CRC32C('Maria'),'DB'),CRC32C(0,'MariaDB'); + +--error ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT +select crc32(0,'My','SQL'); +--error ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT +select crc32c(0,'Maria','DB'); +--error ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT +select crc32(); +--error ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT +select crc32c(); +--error ER_WRONG_PARAMETERS_TO_NATIVE_FCT +select crc32('' as empty); +--error ER_WRONG_PARAMETERS_TO_NATIVE_FCT +select crc32c('' as empty); +--error ER_WRONG_PARAMETERS_TO_NATIVE_FCT +select crc32(0, '' as empty); +--error ER_WRONG_PARAMETERS_TO_NATIVE_FCT +select crc32c(0, '' as empty); +--error ER_WRONG_PARAMETERS_TO_NATIVE_FCT +select crc32(0 as zero, ''); +--error ER_WRONG_PARAMETERS_TO_NATIVE_FCT +select crc32c(0 as zero, ''); + +CREATE TEMPORARY TABLE t +(a CHAR(2), i INT UNSIGNED, c INT UNSIGNED AS (CRC32C(i,a))); +INSERT INTO t (a,i) VALUES ('DB',CRC32C('Maria')); +SELECT * FROM t; +DROP TEMPORARY TABLE t; + +select crc32(4294967296,''), hex(char(4294967296)); +select crc32(1e100,''), hex(char(1e100)); +select crc32(10.11,''), hex(char(10.11)); +select crc32(-1,''), hex(char(-1)); +select crc32('',''), hex(char('')); +select crc32(429496729656755555555555555555555555555555555555555555555555555555555555555555555555555,'a') as x; # Test cases for using the function in aggregate functions, group-by, having # and order-by clauses DROP TABLE IF EXISTS t; CREATE TABLE t(a INT, b VARCHAR(2)); INSERT INTO t VALUES (1,'a'), (2,'qw'), (1,'t'), (3,'t'); -SELECT crc32(SUM(a)) FROM t; -SELECT crc32(AVG(a)) FROM t GROUP BY b; -SELECT crc32(MAX(b)) FROM t GROUP BY a; +SELECT crc32(SUM(a)),crc32c(SUM(a)) FROM t; +SELECT crc32(AVG(a)),crc32c(AVG(a)) FROM t GROUP BY b; +SELECT crc32(MAX(b)),crc32c(MAX(b)) FROM t GROUP BY a; SELECT a, b, crc32(a) FROM t GROUP BY a,b HAVING crc32(MAX(a))=450215437; SELECT a,b,concat(a,b),crc32(concat(a,b)) FROM t ORDER BY crc32(concat(a,b)); DROP TABLE t; diff --git a/mysys/crc32ieee.cc b/mysys/crc32ieee.cc index bbafa1230f8..14e8017de4b 100644 --- a/mysys/crc32ieee.cc +++ b/mysys/crc32ieee.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2020, 2021, MariaDB +/* Copyright (c) 2020, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,7 +55,7 @@ static const my_crc32_t my_checksum_func= init_crc32(); # error "my_checksum() is defined in mysys/crc32/crc32_ppc64.c" #endif extern "C" -unsigned int my_checksum(unsigned int crc, const void *data, size_t len) +uint32 my_checksum(uint32 crc, const void *data, size_t len) { return my_checksum_func(crc, data, len); } diff --git a/sql/item_create.cc b/sql/item_create.cc index 1aa7d02e76b..20b86cfc1dc 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. - Copyright (c) 2008, 2021, MariaDB Corporation. + Copyright (c) 2008, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -512,10 +512,10 @@ protected: }; -class Create_func_crc32 : public Create_func_arg1 +class Create_func_crc32 : public Create_native_func { public: - virtual Item *create_1_arg(THD *thd, Item *arg1); + Item *create_native(THD *thd, LEX_CSTRING *, List *item_list) override; static Create_func_crc32 s_singleton; @@ -525,6 +525,19 @@ protected: }; +class Create_func_crc32c : public Create_native_func +{ +public: + Item *create_native(THD *thd, LEX_CSTRING *, List *item_list) override; + + static Create_func_crc32c s_singleton; + +protected: + Create_func_crc32c() {} + virtual ~Create_func_crc32c() {} +}; + + class Create_func_datediff : public Create_func_arg2 { public: @@ -3118,11 +3131,55 @@ Create_func_cot::create_1_arg(THD *thd, Item *arg1) Create_func_crc32 Create_func_crc32::s_singleton; Item* -Create_func_crc32::create_1_arg(THD *thd, Item *arg1) +Create_func_crc32::create_native(THD *thd, LEX_CSTRING *name, + List *item_list) { - return new (thd->mem_root) Item_func_crc32(thd, arg1); + int argc= item_list ? item_list->elements : 0; + + if (unlikely(argc != 1 && argc != 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(), *arg2= argc < 2 ? nullptr : item_list->pop(); + + /* This was checked in Create_native_func::create_func() */ + DBUG_ASSERT(!arg1->is_explicit_name()); + DBUG_ASSERT(!arg2 || !arg2->is_explicit_name()); + + return arg2 + ? new (thd->mem_root) Item_func_crc32(thd, false, arg1, arg2) + : new (thd->mem_root) Item_func_crc32(thd, false, arg1); } + +Create_func_crc32c Create_func_crc32c::s_singleton; + +Item* +Create_func_crc32c::create_native(THD *thd, LEX_CSTRING *name, + List *item_list) +{ + int argc= item_list ? item_list->elements : 0; + + if (unlikely(argc != 1 && argc != 2)) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return nullptr; + } + + Item *arg1= item_list->pop(), *arg2= argc < 2 ? nullptr : item_list->pop(); + + /* This was checked in Create_native_func::create_func() */ + DBUG_ASSERT(!arg1->is_explicit_name()); + DBUG_ASSERT(!arg2 || !arg2->is_explicit_name()); + + return arg2 + ? new (thd->mem_root) Item_func_crc32(thd, true, arg1, arg2) + : new (thd->mem_root) Item_func_crc32(thd, true, arg1); +} + + Create_func_datediff Create_func_datediff::s_singleton; Item* @@ -5555,6 +5612,7 @@ Native_func_registry func_array[] = { { STRING_WITH_LEN("COS") }, BUILDER(Create_func_cos)}, { { STRING_WITH_LEN("COT") }, BUILDER(Create_func_cot)}, { { STRING_WITH_LEN("CRC32") }, BUILDER(Create_func_crc32)}, + { { STRING_WITH_LEN("CRC32C") }, BUILDER(Create_func_crc32c)}, { { STRING_WITH_LEN("DATEDIFF") }, BUILDER(Create_func_datediff)}, { { STRING_WITH_LEN("DAYNAME") }, BUILDER(Create_func_dayname)}, { { STRING_WITH_LEN("DAYOFMONTH") }, BUILDER(Create_func_dayofmonth)}, diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 00413f95e32..962c0e291b5 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2017, Oracle and/or its affiliates. - Copyright (c) 2009, 2021, MariaDB Corporation. + Copyright (c) 2009, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4385,14 +4385,32 @@ longlong Item_func_uncompressed_length::val_int() longlong Item_func_crc32::val_int() { DBUG_ASSERT(fixed()); - String *res=args[0]->val_str(&value); + DBUG_ASSERT(arg_count == 1 || arg_count == 2); + String *res; + longlong crc; + if (arg_count > 1) + { + crc= args[0]->val_int(); + null_value= args[0]->null_value; + if (null_value) + return 0; + res= args[1]->val_str(&value); + } + else + { + crc= 0; + null_value= 0; + res= args[0]->val_str(&value); + } + if (!res) { null_value=1; return 0; /* purecov: inspected */ } - null_value=0; - return (longlong) my_checksum(0L, (uchar*)res->ptr(), res->length()); + + return static_cast + (ulonglong{crc_func(uint32_t(crc), res->ptr(), res->length())}); } #ifdef HAVE_COMPRESS diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index ba61206d8d9..106742ddd4e 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -3,7 +3,7 @@ /* Copyright (c) 2000, 2011, Oracle and/or its affiliates. - Copyright (c) 2009, 2019, MariaDB + Copyright (c) 2009, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1945,15 +1945,27 @@ public: class Item_func_crc32 :public Item_long_func { bool check_arguments() const override - { return args[0]->check_type_can_return_str(func_name_cstring()); } + { + return args[0]->check_type_can_return_str(func_name_cstring()) && + (arg_count == 1 || + args[1]->check_type_can_return_int(func_name_cstring())); + } String value; + uint32 (*const crc_func)(uint32, const void*, size_t); public: - Item_func_crc32(THD *thd, Item *a): Item_long_func(thd, a) + Item_func_crc32(THD *thd, bool Castagnoli, Item *a) : + Item_long_func(thd, a), + crc_func(Castagnoli ? my_crc32c : my_checksum) + { unsigned_flag= 1; } + Item_func_crc32(THD *thd, bool Castagnoli, Item *a, Item *b) : + Item_long_func(thd, a, b), + crc_func(Castagnoli ? my_crc32c : my_checksum) { unsigned_flag= 1; } LEX_CSTRING func_name_cstring() const override { - static LEX_CSTRING name= {STRING_WITH_LEN("crc32") }; - return name; + static LEX_CSTRING crc32_name= {STRING_WITH_LEN("crc32") }; + static LEX_CSTRING crc32c_name= {STRING_WITH_LEN("crc32c") }; + return crc_func == my_crc32c ? crc32c_name : crc32_name; } bool fix_length_and_dec() override { max_length=10; return FALSE; } longlong val_int() override;