1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-31340 Remove MY_COLLATION_HANDLER::strcasecmp()

This patch also fixes:
  MDEV-33050 Build-in schemas like oracle_schema are accent insensitive
  MDEV-33084 LASTVAL(t1) and LASTVAL(T1) do not work well with lower-case-table-names=0
  MDEV-33085 Tables T1 and t1 do not work well with ENGINE=CSV and lower-case-table-names=0
  MDEV-33086 SHOW OPEN TABLES IN DB1 -- is case insensitive with lower-case-table-names=0
  MDEV-33088 Cannot create triggers in the database `MYSQL`
  MDEV-33103 LOCK TABLE t1 AS t2 -- alias is not case sensitive with lower-case-table-names=0
  MDEV-33109 DROP DATABASE MYSQL -- does not drop SP with lower-case-table-names=0
  MDEV-33110 HANDLER commands are case insensitive with lower-case-table-names=0
  MDEV-33119 User is case insensitive in INFORMATION_SCHEMA.VIEWS
  MDEV-33120 System log table names are case insensitive with lower-cast-table-names=0

- Removing the virtual function strnncoll() from MY_COLLATION_HANDLER

- Adding a wrapper function CHARSET_INFO::streq(), to compare
  two strings for equality. For now it calls strnncoll() internally.
  In the future it will turn into a virtual function.

- Adding new accent sensitive case insensitive collations:
    - utf8mb4_general1400_as_ci
    - utf8mb3_general1400_as_ci
  They implement accent sensitive case insensitive comparison.
  The weight of a character is equal to the code point of its
  upper case variant. These collations use Unicode-14.0.0 casefolding data.

  The result of
     my_charset_utf8mb3_general1400_as_ci.strcoll()
  is very close to the former
     my_charset_utf8mb3_general_ci.strcasecmp()

  There is only a difference in a couple dozen rare characters, because:
    - the switch from "tolower" to "toupper" comparison, to make
      utf8mb3_general1400_as_ci closer to utf8mb3_general_ci
    - the switch from Unicode-3.0.0 to Unicode-14.0.0
  This difference should be tolarable. See the list of affected
  characters in the MDEV description.

  Note, utf8mb4_general1400_as_ci correctly handles non-BMP characters!
  Unlike utf8mb4_general_ci, it does not treat all BMP characters
  as equal.

- Adding classes representing names of the file based database objects:

    Lex_ident_db
    Lex_ident_table
    Lex_ident_trigger

  Their comparison collation depends on the underlying
  file system case sensitivity and on --lower-case-table-names
  and can be either my_charset_bin or my_charset_utf8mb3_general1400_as_ci.

- Adding classes representing names of other database objects,
  whose names have case insensitive comparison style,
  using my_charset_utf8mb3_general1400_as_ci:

  Lex_ident_column
  Lex_ident_sys_var
  Lex_ident_user_var
  Lex_ident_sp_var
  Lex_ident_ps
  Lex_ident_i_s_table
  Lex_ident_window
  Lex_ident_func
  Lex_ident_partition
  Lex_ident_with_element
  Lex_ident_rpl_filter
  Lex_ident_master_info
  Lex_ident_host
  Lex_ident_locale
  Lex_ident_plugin
  Lex_ident_engine
  Lex_ident_server
  Lex_ident_savepoint
  Lex_ident_charset
  engine_option_value::Name

- All the mentioned Lex_ident_xxx classes implement a method streq():

  if (ident1.streq(ident2))
     do_equal();

  This method works as a wrapper for CHARSET_INFO::streq().

- Changing a lot of "LEX_CSTRING name" to "Lex_ident_xxx name"
  in class members and in function/method parameters.

- Replacing all calls like
    system_charset_info->coll->strcasecmp(ident1, ident2)
  to
    ident1.streq(ident2)

- Taking advantage of the c++11 user defined literal operator
  for LEX_CSTRING (see m_strings.h) and Lex_ident_xxx (see lex_ident.h)
  data types. Use example:

  const Lex_ident_column primary_key_name= "PRIMARY"_Lex_ident_column;

  is now a shorter version of:

  const Lex_ident_column primary_key_name=
    Lex_ident_column({STRING_WITH_LEN("PRIMARY")});
This commit is contained in:
Alexander Barkov
2023-04-26 15:27:01 +04:00
parent 159b7ca3f2
commit fd247cc21f
204 changed files with 8971 additions and 3202 deletions

View File

@ -0,0 +1,154 @@
--echo #
--echo # Start of 11.5 tests
--echo #
--echo #
--echo # MDEV-31340 Remove MY_COLLATION_HANDLER::strcasecmp()
--echo #
#
# Identifiers are accent sensitive and case insensitive,
# and there are usually only two variants of a letter (capital and small)
# having equal octet length in utf8.
#
# There are a few exceptions (coming from Unicode casefolding rules)
# changing octet length during casefolding.
#
# Testing "U+0132 LATIN SMALL LETTER DOTLESS I" versus letters I and i.
#
# Column
SET NAMES utf8;
--error ER_DUP_FIELDNAME
CREATE TABLE t1 (I int, ı int);
# Index
--error ER_DUP_KEYNAME
CREATE TABLE t1 (a int, b int, KEY I(a), KEY ı(b));
# Locale
SET @@lc_time_names=it_ıT;
SELECT @@lc_time_names;
SET @@lc_time_names=DEFAULT;
# Host
#CREATE user u1@ıT;
#SELECT user, host FROM mysql.user WHERE user='u1';
#DROP USER u1@it;
# I_S table name
CREATE VIEW v1 AS SELECT 1;
SELECT COUNT(*) FROM INFORMATION_SCHEMA.VIEWS WHERE TABLE_NAME='v1';
SELECT COUNT(*) FROM INFORMATION_SCHEMA.VıEWS WHERE TABLE_NAME='v1';
DROP VIEW v1;
# Window name
CREATE OR REPLACE TABLE t1 (pk int, c int);
INSERT INTO t1 VALUES (1,1);
INSERT INTO t1 VALUES (1,2);
INSERT INTO t1 VALUES (1,3);
INSERT INTO t1 VALUES (2,1);
INSERT INTO t1 VALUES (2,2);
INSERT INTO t1 VALUES (2,3);
SELECT pk, COUNT(*) OVER I AS cnt
FROM t1
WINDOW ı AS (PARTITION BY c ORDER BY pk ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING);
DROP TABLE t1;
# Function
SELECT CEIL(10.6);
SELECT CEıL(10.6);
CREATE FUNCTION I() RETURNS INT RETURN 1;
SELECT ı();
DROP FUNCTION ı;
# CTE - WITH reference
WITH I AS (SELECT 'a1' AS a, 'b1' AS b) SELECT * FROM ı;
# Plugin
--error ER_PLUGIN_INSTALLED
INSTALL PLUGIN arıa SONAME 'not important';
# Engine
CREATE TABLE t1 (a INT) ENGINE=MyıSAM;
SHOW CREATE TABLE t1;
DROP TABLE t1;
# System variable
SELECT @@CHARACTER_SET_CLıENT;
# User variable
SET @I='i';
SELECT @ı;
# System versioning: row start, row end
CREATE TABLE t1(
x INT,
start_timestamp TIMESTAMP(6) GENERATED ALWAYS AS ROW START,
end_timestamp TIMESTAMP(6) GENERATED ALWAYS AS ROW END,
PERIOD FOR SYSTEM_TIME(start_tımestamp, end_tımestamp)
) WITH SYSTEM VERSIONING;
SHOW CREATE TABLE t1;
DROP TABLE t1;
# Label names
DELIMITER $$;
--error ER_SP_LABEL_REDEFINE
BEGIN NOT ATOMIC
label_ı:
LOOP
label_I:
LOOP
LEAVE label_I;
END LOOP;
LEAVE label_ı;
END LOOP;
END;
$$
DELIMITER ;$$
DELIMITER $$;
BEGIN NOT ATOMIC
label_ı:
LOOP
SELECT 'looping' AS stage;
LEAVE label_I;
END LOOP;
SELECT 'out of loop' AS stage;
END;
$$
DELIMITER ;$$
# References in GROUP BY
CREATE TABLE t1 (a INT);
INSERT INTO t1 VALUES (1),(2),(3);
SELECT 'a' AS I FROM t1 GROUP BY ı;
SELECT 'a' AS ı FROM t1 GROUP BY I;
--error ER_BAD_FIELD_ERROR
SELECT 'a' AS İ FROM t1 GROUP BY i;
--error ER_BAD_FIELD_ERROR
SELECT 'a' AS i FROM t1 GROUP BY İ;
DROP TABLE t1;
--echo #
--echo # End of 11.5 tests
--echo #