Merge branch '10.7' into 10.8

2025-08-08 11:22:35 +03:00 · 2022-02-04 14:50:25 +01:00
parent c0f5fd2754 9ed8deb656
commit 4fb2cb1a30
907 changed files with 20182 additions and 6211 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -351,6 +351,60 @@ struct my_collation_handler_st
 		       const uchar *, size_t, const uchar *, size_t, my_bool);
  int     (*strnncollsp)(CHARSET_INFO *,
                         const uchar *, size_t, const uchar *, size_t);
+  /*
+    strnncollsp_nchars() - similar to strnncollsp() but assumes that both
+                           strings were originally CHAR(N) values with the
+                           same N, then were optionally space-padded,
+                           or optionally space-trimmed.
+
+                           In other words, this function compares in the way
+                           if we insert both values into a CHAR(N) column
+                           and then compare the two column values.
+
+    It compares the same amount of characters from the two strings.
+    This is especially important for NOPAD collations.
+
+    If CHAR_LENGTH of the two strings are different,
+    the shorter string is virtually padded with trailing spaces
+    up to CHAR_LENGTH of the longer string, to guarantee that the
+    same amount of characters are compared.
+    This is important if the two CHAR(N) strings are space-trimmed 
+    (e.g. like in InnoDB compact format for CHAR).
+
+    The function compares not more than "nchars" characters only.
+    This can be useful to compare CHAR(N) space-padded strings
+    (when the exact N is known) without having to truncate them before
+    the comparison.
+
+    For example, Field_string stores a "CHAR(3) CHARACTER SET utf8mb4" value
+    of "aaa" as 12 bytes in a record buffer:
+    - 3 bytes of the actual data, followed by
+    - 9 bytes of spaces (just fillers, not real data)
+    The caller can pass nchars=3 to compare CHAR(3) record values.
+    In such case, the comparator won't go inside the 9 bytes of the fillers.
+
+    If N is not known, the caller can pass max(len1,len2) as the "nchars" value
+    (i.e. the maximum of the OCTET_LENGTH of the two strings).
+
+    Notes on complex collations.
+
+    This function counts contraction parts as individual characters.
+    For example, the Czech letter 'ch' (in Czech collations)
+    is ordinarily counted by the "nchars" limit as TWO characters
+    (although it is only one letter).
+    This corresponds to what CHAR(N) does in INSERT.
+
+    If the "nchars" limit tears apart a contraction, only the part fitting
+    into "nchars" characters is used. For example, in case of a Czech collation,
+    the string "ach" with nchars=2 is compared as 'ac': the contraction
+    'ch' is torn apart and the letter 'c' acts as an individual character.
+    This emulates the same comparison result with the scenario when we insert
+    'ach' into a CHAR(2) column and then compare it.
+  */
+  int     (*strnncollsp_nchars)(CHARSET_INFO *,
+                                const uchar *str1, size_t len1,
+                                const uchar *str2, size_t len2,
+                                size_t nchars);
  size_t     (*strnxfrm)(CHARSET_INFO *,
                         uchar *dst, size_t dstlen, uint nweights,
                         const uchar *src, size_t srclen, uint flags);