1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations

- Adding a new argument "flag" to MY_COLLATION_HANDLER::strnncollsp_nchars()
  and a flag MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES.
  The flag defines if strnncollsp_nchars() should emulate trailing spaces
  which were possibly trimmed earlier (e.g. in InnoDB CHAR compression).
  This is important for NOPAD collations.

  For example, with this input:
   - str1= 'a '    (Latin letter a followed by one space)
   - str2= 'a  '   (Latin letter a followed by two spaces)
   - nchars= 3
  if the flag is given, strnncollsp_nchars() will virtually restore
  one trailing space to str1 up to nchars (3) characters and compare two
  strings as equal:
  - str1= 'a  '  (one extra trailing space emulated)
  - str2= 'a  '  (as is)

  If the flag is not given, strnncollsp_nchars() does not add trailing
  virtual spaces, so in case of a NOPAD collation, str1 will be compared
  as less than str2 because it is shorter.

- Field_string::cmp_prefix() now passes the new flag.
  Field_varstring::cmp_prefix() and Field_blob::cmp_prefix() do
  not pass the new flag.

- The branch in cmp_whole_field() in storage/innobase/rem/rem0cmp.cc
  (which handles the CHAR data type) now also passed the new flag.

- Fixing UCA collations to respect the new flag.
  Other collations are possibly also affected, however
  I had no success in making an SQL script demonstrating the problem.
  Other collations will be extended to respect this flags in a separate
  patch later.

- Changing the meaning of the last parameter of Field::cmp_prefix()
  from "number of bytes" (internal length)
  to "number of characters" (user visible length).

  The code calling cmp_prefix() from handler.cc was wrong.
  After this change, the call in handler.cc became correct.

  The code calling cmp_prefix() from key_rec_cmp() in key.cc
  was adjusted according to this change.

- Old strnncollsp_nchar() related tests in unittest/strings/strings-t.c
  now pass the new flag.
  A few new tests also were added, without the flag.
This commit is contained in:
Alexander Barkov
2023-03-31 17:20:03 +04:00
parent 0cc1694e9c
commit 8020b1bd73
18 changed files with 631 additions and 209 deletions

View File

@ -1109,7 +1109,8 @@ public:
The following method is used for comparing prefix keys.
Currently it's only used in partitioning.
*/
virtual int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len)
virtual int cmp_prefix(const uchar *a, const uchar *b,
size_t prefix_char_len)
{ return cmp(a, b); }
virtual int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U)
{ return memcmp(a,b,pack_length()); }
@ -3728,7 +3729,7 @@ public:
String *val_str(String*,String *);
my_decimal *val_decimal(my_decimal *);
int cmp(const uchar *a,const uchar *b);
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len);
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_char_len);
void sort_string(uchar *buff,uint length);
uint get_key_image(uchar *buff,uint length, imagetype type);
void set_key_image(const uchar *buff,uint length);
@ -3964,7 +3965,7 @@ public:
String *val_str(String*,String *);
my_decimal *val_decimal(my_decimal *);
int cmp(const uchar *a,const uchar *b);
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len);
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_char_len);
int cmp(const uchar *a, uint32 a_length, const uchar *b, uint32 b_length);
int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U);
int key_cmp(const uchar *,const uchar*);
@ -4501,7 +4502,7 @@ public:
}
int cmp_binary_offset(uint row_offset)
{ return cmp_offset(row_offset); }
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len);
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_char_len);
int key_cmp(const uchar *a, const uchar *b)
{ return cmp_binary((uchar *) a, (uchar *) b); }
int key_cmp(const uchar *str, uint length);