MDEV-8214 Asian MB2 charsets: compare broken bytes as "greater than any non-broken character"

2025-08-08 11:22:35 +03:00 · 2015-06-26 13:40:28 +04:00
parent d535728165
commit 4f828a1cac
9 changed files with 830 additions and 373 deletions
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -49,6 +49,7 @@
 #define big5tail(e)	((uchar)(e&0xff))
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _big5
 #define IS_MB1_CHAR(x)        ((uchar) (x) < 0x80)
 #define IS_MB2_CHAR(x,y)      (isbig5head(x) && isbig5tail(y))
 #define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
@@ -849,89 +850,6 @@ static uint16 big5strokexfrm(uint16 i)
 }
 static int my_strnncoll_big5_internal(const uchar **a_res,
 				      const uchar **b_res, size_t length)
 {
  const uchar *a= *a_res, *b= *b_res;
  while (length--)
  {
    if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1)))
    {
      if (*a != *b || *(a+1) != *(b+1))
 	return ((int) big5code(*a,*(a+1)) -
 		(int) big5code(*b,*(b+1)));
      a+= 2;
      b+= 2;
      length--;
    }
    else if (sort_order_big5[*a++] !=
 	     sort_order_big5[*b++])
      return ((int) sort_order_big5[a[-1]] -
 	      (int) sort_order_big5[b[-1]]);
  }
  *a_res= a;
  *b_res= b;
  return 0;
 }
 /* Compare strings */
 static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), 
 			     const uchar *a, size_t a_length,
                             const uchar *b, size_t b_length,
                             my_bool b_is_prefix)
 {
  size_t length= MY_MIN(a_length, b_length);
  int res= my_strnncoll_big5_internal(&a, &b, length);
  return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
 }
 /* compare strings, ignore end space */
 static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)), 
 			       const uchar *a, size_t a_length, 
 			       const uchar *b, size_t b_length,
                               my_bool diff_if_only_endspace_difference)
 {
  size_t length= MY_MIN(a_length, b_length);
  int res= my_strnncoll_big5_internal(&a, &b, length);
 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
  diff_if_only_endspace_difference= 0;
 #endif
  if (!res && a_length != b_length)
  {
    const uchar *end;
    int swap= 1;
    if (diff_if_only_endspace_difference)
      res= 1;                                   /* Assume 'a' is bigger */
    /*
      Check the next not space character of the longer key. If it's < ' ',
      then it's smaller than the other key.
    */
    if (a_length < b_length)
    {
      /* put longer key in a */
      a_length= b_length;
      a= b;
      swap= -1;                                 /* swap sign of result */
      res= -res;
    }
    for (end= a + a_length-length; a < end ; a++)
    {
      if (*a != ' ')
 	return (*a < ' ') ? -swap : swap;
    }
  }
  return res;
 }
 static size_t
 my_strnxfrm_big5(CHARSET_INFO *cs,
                 uchar *dst, size_t dstlen, uint nweights,
@@ -6853,11 +6771,23 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
 }
-static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _big5_chinese_ci
 #define WEIGHT_MB1(x)        (sort_order_big5[(uchar) (x)])
 #define WEIGHT_MB2(x,y)      (big5code(x, y))
 #include "strcoll.ic"
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _big5_bin
 #define WEIGHT_MB1(x)        ((uchar) (x))
 #define WEIGHT_MB2(x,y)      (big5code(x, y))
 #include "strcoll.ic"
 static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci=
 {
  NULL,			/* init */
-  my_strnncoll_big5,
+  my_strnncoll_big5_chinese_ci,
-  my_strnncollsp_big5,
+  my_strnncollsp_big5_chinese_ci,
  my_strnxfrm_big5,
  my_strnxfrmlen_simple,
  my_like_range_mb,
@@ -6868,6 +6798,23 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
  my_propagate_simple
 };
 static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
 {
  NULL,	                /* init */
  my_strnncoll_big5_bin,
  my_strnncollsp_big5_bin,
  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
  my_like_range_mb,
  my_wildcmp_mb_bin,
  my_strcasecmp_mb_bin,
  my_instr_mb,
  my_hash_sort_mb_bin,
  my_propagate_simple
 };
 static MY_CHARSET_HANDLER my_charset_big5_handler=
 {
  NULL,			/* init */
@@ -6931,7 +6878,7 @@ struct charset_info_st my_charset_big5_chinese_ci=
    1,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_big5_handler,
-    &my_collation_big5_chinese_ci_handler
+    &my_collation_handler_big5_chinese_ci
 };
@@ -6964,7 +6911,7 @@ struct charset_info_st my_charset_big5_bin=
    1,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_big5_handler,
-    &my_collation_mb_bin_handler
+    &my_collation_handler_big5_bin
 };
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -185,6 +185,7 @@ static const uchar sort_order_cp932[]=
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _cp932
 #define IS_8BIT_CHAR(x)       iscp932kata(x)
 #define IS_MB1_CHAR(x)        ((uchar) (x) < 0x80 || iscp932kata(x))
 #define IS_MB2_CHAR(x,y)      (iscp932head(x) && iscp932tail(y))
 #define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
@@ -1717,90 +1718,6 @@ MY_UNICASE_INFO my_caseinfo_cp932=
  my_caseinfo_pages_cp932
 };
 static int my_strnncoll_cp932_internal(CHARSET_INFO *cs,
 				      const uchar **a_res, size_t a_length,
 				      const uchar **b_res, size_t b_length)
 {
  const uchar *a= *a_res, *b= *b_res;
  const uchar *a_end= a + a_length;
  const uchar *b_end= b + b_length;
  while (a < a_end && b < b_end)
  {
    if (ismbchar_cp932(cs,(char*) a, (char*) a_end) &&
 	ismbchar_cp932(cs,(char*) b, (char*) b_end))
    {
      uint a_char= cp932code(*a, *(a+1));
      uint b_char= cp932code(*b, *(b+1));
      if (a_char != b_char)
 	return a_char - b_char;
      a += 2;
      b += 2;
    } else
    {
      if (sort_order_cp932[(uchar)*a] != sort_order_cp932[(uchar)*b])
 	return sort_order_cp932[(uchar)*a] - sort_order_cp932[(uchar)*b];
      a++;
      b++;
    }
  }
  *a_res= a;
  *b_res= b;
  return 0;
 }
 static int my_strnncoll_cp932(CHARSET_INFO *cs __attribute__((unused)),
 			      const uchar *a, size_t a_length, 
 			      const uchar *b, size_t b_length,
                              my_bool b_is_prefix)
 {
  int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
  if (b_is_prefix && a_length > b_length)
    a_length= b_length;
  return res ? res : (int) (a_length - b_length);
 }
 static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
                                const uchar *a, size_t a_length, 
                                const uchar *b, size_t b_length,
                                my_bool diff_if_only_endspace_difference
                                __attribute__((unused)))
 {
  const uchar *a_end= a + a_length;
  const uchar *b_end= b + b_length;
  int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
  diff_if_only_endspace_difference= 0;
 #endif
  if (!res && (a != a_end || b != b_end))
  {
    int swap= 1;
    if (diff_if_only_endspace_difference)
      res= 1;                                   /* Assume 'a' is bigger */
    /*
      Check the next not space character of the longer key. If it's < ' ',
      then it's smaller than the other key.
    */
    if (a == a_end)
    {
      /* put shorter key in a */
      a_end= b_end;
      a= b;
      swap= -1;				/* swap sign of result */
      res= -res;
    }
    for (; a < a_end ; a++)
    {
      if (*a != (uchar) ' ')
 	return (*a < (uchar) ' ') ? -swap : swap;
    }
  }
  return res;
 }
 static const uint16 cp932_to_unicode[65536]=
 {
@@ -34720,15 +34637,36 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)),
 }
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+/*
  cp932_chinese_ci and cp932_bin sort character blocks in this order:
  1. [00..7F]                - 7BIT characters (ASCII)
  2. [81..9F][40..7E,80..FC] - MB2 characters, part1
  3. [A1..DF]                - 8BIT characters (Kana)
  4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
 */
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _cp932_japanese_ci
 #define WEIGHT_PAD_SPACE     (256 * (int) ' ')
 #define WEIGHT_MB1(x)        (256 * (int) sort_order_cp932[(uchar) (x)])
 #define WEIGHT_MB2(x,y)      (cp932code(x, y))
 #include "strcoll.ic"
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _cp932_bin
 #define WEIGHT_PAD_SPACE     (256 * (int) ' ')
 #define WEIGHT_MB1(x)        (256 * (int) (uchar) (x))
 #define WEIGHT_MB2(x,y)      (cp932code(x, y))
 #include "strcoll.ic"
 static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci=
 {
-  NULL,			/* init */
+  NULL,                  /* init */
-  my_strnncoll_cp932,
+  my_strnncoll_cp932_japanese_ci,
-  my_strnncollsp_cp932,
+  my_strnncollsp_cp932_japanese_ci,
  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
  my_like_range_mb,
-  my_wildcmp_mb,	/* wildcmp  */
+  my_wildcmp_mb,
  my_strcasecmp_8bit,
  my_instr_mb,
  my_hash_sort_simple,
@@ -34736,6 +34674,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 };
 static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
 {
  NULL,	                /* init */
  my_strnncoll_cp932_bin,
  my_strnncollsp_cp932_bin,
  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
  my_like_range_mb,
  my_wildcmp_mb_bin,
  my_strcasecmp_mb_bin,
  my_instr_mb,
  my_hash_sort_mb_bin,
  my_propagate_simple
 };
 static MY_CHARSET_HANDLER my_charset_handler=
 {
  NULL,			/* init */
@@ -34800,7 +34754,7 @@ struct charset_info_st my_charset_cp932_japanese_ci=
    1,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_ci_handler
+    &my_collation_handler_cp932_japanese_ci
 };
 struct charset_info_st my_charset_cp932_bin=
@@ -34832,7 +34786,7 @@ struct charset_info_st my_charset_cp932_bin=
    1,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_mb_bin_handler
+    &my_collation_handler_cp932_bin
 };
 #endif
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -201,8 +201,10 @@ static const uchar sort_order_euc_kr[]=
                              iseuc_kr_tail2(c) || \
                              iseuc_kr_tail3(c))
 #define euckrcode(c,d)        (((uchar)(c) <<8) | (uchar)(d))
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _euckr
 #define IS_MB1_CHAR(x)        ((uchar) (x) < 0x80)
 #define IS_MB2_CHAR(x,y)      (iseuc_kr_head(x) && iseuc_kr_tail(y))
 #define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
@@ -9938,21 +9940,50 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
 }
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _euckr_korean_ci
 #define WEIGHT_MB1(x)        (sort_order_euc_kr[(uchar) (x)])
 #define WEIGHT_MB2(x,y)      (euckrcode(x, y))
 #include "strcoll.ic"
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _euckr_bin
 #define WEIGHT_MB1(x)        ((uchar) (x))
 #define WEIGHT_MB2(x,y)      (euckrcode(x, y))
 #include "strcoll.ic"
 static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci=
 {
-  NULL,			/* init */
+  NULL,                 /* init */
-  my_strnncoll_simple,  /* strnncoll  */
+  my_strnncoll_euckr_korean_ci,
-  my_strnncollsp_simple,
+  my_strnncollsp_euckr_korean_ci,
-  my_strnxfrm_mb,	/* strnxfrm   */
+  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
-  my_like_range_mb,     /* like_range */
+  my_like_range_mb,
-  my_wildcmp_mb,	/* wildcmp    */
+  my_wildcmp_mb,
  my_strcasecmp_mb,
  my_instr_mb,
  my_hash_sort_simple,
  my_propagate_simple
 };
 static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
 {
  NULL,                 /* init */
  my_strnncoll_euckr_bin,
  my_strnncollsp_euckr_bin,
  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
  my_like_range_mb,
  my_wildcmp_mb_bin,
  my_strcasecmp_mb_bin,
  my_instr_mb,
  my_hash_sort_mb_bin,
  my_propagate_simple
 };
 static MY_CHARSET_HANDLER my_charset_handler=
 {
  NULL,			/* init */
@@ -10017,7 +10048,7 @@ struct charset_info_st my_charset_euckr_korean_ci=
    0,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_ci_handler
+    &my_collation_handler_euckr_korean_ci
 };
@@ -10050,7 +10081,7 @@ struct charset_info_st my_charset_euckr_bin=
    0,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_mb_bin_handler
+    &my_collation_handler_euckr_bin
 };
 #endif
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -163,9 +163,11 @@ static const uchar sort_order_gb2312[]=
 #define isgb2312head(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xf7)
 #define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
 #define gb2312code(c,d) (((uchar)(c) <<8) | (uchar)(d))
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _gb2312
 #define IS_MB1_CHAR(x)        ((uchar) (x) < 0x80)
 #define IS_MB2_CHAR(x,y)      (isgb2312head(x) && isgb2312tail(y))
 #define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
@@ -6341,11 +6343,23 @@ my_mb_wc_gb2312(CHARSET_INFO *cs  __attribute__((unused)),
 }
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _gb2312_chinese_ci
 #define WEIGHT_MB1(x)        (sort_order_gb2312[(uchar) (x)])
 #define WEIGHT_MB2(x,y)      (gb2312code(x, y))
 #include "strcoll.ic"
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _gb2312_bin
 #define WEIGHT_MB1(x)        ((uchar) (x))
 #define WEIGHT_MB2(x,y)      (gb2312code(x, y))
 #include "strcoll.ic"
 static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci=
 {
-  NULL,			/* init */
+  NULL,                 /* init */
-  my_strnncoll_simple,  /* strnncoll  */
+  my_strnncoll_gb2312_chinese_ci,
-  my_strnncollsp_simple,
+  my_strnncollsp_gb2312_chinese_ci,
  my_strnxfrm_mb,       /* strnxfrm   */
  my_strnxfrmlen_simple,
  my_like_range_mb,     /* like_range */
@@ -6356,6 +6370,24 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
  my_propagate_simple
 };
 static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
 {
  NULL,	                /* init */
  my_strnncoll_gb2312_bin,
  my_strnncollsp_gb2312_bin,
  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
  my_like_range_mb,
  my_wildcmp_mb_bin,
  my_strcasecmp_mb_bin,
  my_instr_mb,
  my_hash_sort_mb_bin,
  my_propagate_simple
 };
 static MY_CHARSET_HANDLER my_charset_handler=
 {
  NULL,			/* init */
@@ -6420,9 +6452,10 @@ struct charset_info_st my_charset_gb2312_chinese_ci=
    0,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_ci_handler
+    &my_collation_handler_gb2312_chinese_ci
 };
 struct charset_info_st my_charset_gb2312_bin=
 {
    86,0,0,		/* number */
@@ -6452,7 +6485,7 @@ struct charset_info_st my_charset_gb2312_bin=
    0,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_mb_bin_handler
+    &my_collation_handler_gb2312_bin
 };
 #endif
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -44,6 +44,7 @@
 #define gbktail(e)     ((uchar)(e&0xff))
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _gbk
 #define IS_MB1_CHAR(x)        ((uchar) (x) < 0x80)
 #define IS_MB2_CHAR(x,y)      (isgbkhead(x) && isgbktail(y))
 #define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
@@ -3450,87 +3451,6 @@ static uint16 gbksortorder(uint16 i)
 }
 int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res,
 			      size_t length)
 {
  const uchar *a= *a_res, *b= *b_res;
  uint a_char,b_char; 
  while (length--)
  {
    if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
    {
      a_char= gbkcode(*a,*(a+1));
      b_char= gbkcode(*b,*(b+1));
      if (a_char != b_char)
        return ((int) gbksortorder((uint16) a_char) -
 		(int) gbksortorder((uint16) b_char));
      a+= 2;
      b+= 2;
      length--;
    }
    else if (sort_order_gbk[*a++] != sort_order_gbk[*b++])
      return ((int) sort_order_gbk[a[-1]] -
 	      (int) sort_order_gbk[b[-1]]);
  }
  *a_res= a;
  *b_res= b;
  return 0;
 }
 int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
 		     const uchar *a, size_t a_length,
                     const uchar *b, size_t b_length,
                     my_bool b_is_prefix)
 {
  size_t length= MY_MIN(a_length, b_length);
  int res= my_strnncoll_gbk_internal(&a, &b, length);
  return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
 }
 static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
 			      const uchar *a, size_t a_length, 
 			      const uchar *b, size_t b_length,
                              my_bool diff_if_only_endspace_difference)
 {
  size_t length= MY_MIN(a_length, b_length);
  int res= my_strnncoll_gbk_internal(&a, &b, length);
 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
  diff_if_only_endspace_difference= 0;
 #endif
  if (!res && a_length != b_length)
  {
    const uchar *end;
    int swap= 1;
    if (diff_if_only_endspace_difference)
      res= 1;                                   /* Assume 'a' is bigger */
    /*
      Check the next not space character of the longer key. If it's < ' ',
      then it's smaller than the other key.
    */
    if (a_length < b_length)
    {
      /* put shorter key in a */
      a_length= b_length;
      a= b;
      swap= -1;				/* swap sign of result */
      res= -res;
    }
    for (end= a + a_length-length; a < end ; a++)
    {
      if (*a != ' ')
 	return (*a < ' ') ? -swap : swap;
    }
  }
  return res;
 }
 static size_t
 my_strnxfrm_gbk(CHARSET_INFO *cs,
                uchar *dst, size_t dstlen, uint nweights,
@@ -10735,11 +10655,23 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
 }
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+#define MY_FUNCTION_NAME(x)   my_ ## x ## _gbk_chinese_ci
 #define WEIGHT_MB1(x)        (sort_order_gbk[(uchar) (x)])
 #define WEIGHT_MB2(x,y)      (gbksortorder(gbkcode(x,y)))
 #include "strcoll.ic"
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _gbk_bin
 #define WEIGHT_MB1(x)        ((uchar) (x))
 #define WEIGHT_MB2(x,y)      (gbkcode(x,y))
 #include "strcoll.ic"
 static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci=
 {
-  NULL,			/* init */
+  NULL,                 /* init */
-  my_strnncoll_gbk,
+  my_strnncoll_gbk_chinese_ci,
-  my_strnncollsp_gbk,
+  my_strnncollsp_gbk_chinese_ci,
  my_strnxfrm_gbk,
  my_strnxfrmlen_simple,
  my_like_range_mb,
@@ -10750,6 +10682,24 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
  my_propagate_simple
 };
 static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
 {
  NULL,                 /* init */
  my_strnncoll_gbk_bin,
  my_strnncollsp_gbk_bin,
  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
  my_like_range_mb,
  my_wildcmp_mb_bin,
  my_strcasecmp_mb_bin,
  my_instr_mb,
  my_hash_sort_mb_bin,
  my_propagate_simple
 };
 static MY_CHARSET_HANDLER my_charset_handler=
 {
  NULL,			/* init */
@@ -10814,7 +10764,7 @@ struct charset_info_st my_charset_gbk_chinese_ci=
    1,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_ci_handler
+    &my_collation_handler_gbk_chinese_ci
 };
 struct charset_info_st my_charset_gbk_bin=
@@ -10846,7 +10796,7 @@ struct charset_info_st my_charset_gbk_bin=
    1,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_mb_bin_handler
+    &my_collation_handler_gbk_bin
 };
--- a/strings/ctype-mb.ic
+++ b/strings/ctype-mb.ic
@@ -256,3 +256,5 @@ MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused
  return nchars0 - nchars;
 }
 #endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
 #undef MY_FUNCTION_NAME
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -186,6 +186,7 @@ static const uchar sort_order_sjis[]=
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _sjis
 #define IS_8BIT_CHAR(x)       issjiskata(x)
 #define IS_MB1_CHAR(x)        ((uchar) (x) < 0x80 || issjiskata(x))
 #define IS_MB2_CHAR(x,y)      (issjishead(x) && issjistail(y))
 #define DEFINE_ASIAN_ROUTINES
 #include "ctype-mb.ic"
@@ -1088,90 +1089,6 @@ static MY_UNICASE_INFO my_caseinfo_sjis=
 };
 static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
 				      const uchar **a_res, size_t a_length,
 				      const uchar **b_res, size_t b_length)
 {
  const uchar *a= *a_res, *b= *b_res;
  const uchar *a_end= a + a_length;
  const uchar *b_end= b + b_length;
  while (a < a_end && b < b_end)
  {
    if (ismbchar_sjis(cs,(char*) a, (char*) a_end) &&
 	ismbchar_sjis(cs,(char*) b, (char*) b_end))
    {
      uint a_char= sjiscode(*a, *(a+1));
      uint b_char= sjiscode(*b, *(b+1));
      if (a_char != b_char)
 	return (int) a_char - (int) b_char;
      a += 2;
      b += 2;
    } else
    {
      if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b])
 	return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b];
      a++;
      b++;
    }
  }
  *a_res= a;
  *b_res= b;
  return 0;
 }
 static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
                             const uchar *a, size_t a_length, 
                             const uchar *b, size_t b_length,
                             my_bool b_is_prefix)
 {
  int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
  if (b_is_prefix && a_length > b_length)
    a_length= b_length;
  return res ? res : (int) (a_length - b_length);
 }
 static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)),
 			       const uchar *a, size_t a_length, 
 			       const uchar *b, size_t b_length,
                               my_bool diff_if_only_endspace_difference)
 {
  const uchar *a_end= a + a_length, *b_end= b + b_length;
  int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
  diff_if_only_endspace_difference= 0;
 #endif
  if (!res && (a != a_end || b != b_end))
  {
    int swap= 1;
    if (diff_if_only_endspace_difference)
      res= 1;                                   /* Assume 'a' is bigger */
    /*
      Check the next not space character of the longer key. If it's < ' ',
      then it's smaller than the other key.
    */
    if (a == a_end)
    {
      /* put shorter key in a */
      a_end= b_end;
      a= b;
      swap= -1;				/* swap sign of result */
      res= -res;
    }
    for (; a < a_end ; a++)
    {
      if (*a != ' ')
 	return (*a < ' ') ? -swap : swap;
    }
  }
  return res;
 }
 /* SJIS->Unicode conversion table */
 static uint16 sjis_to_unicode[65536]=
 {
@@ -34099,15 +34016,36 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
 }
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+/*
  sjis_chinese_ci and sjis_bin sort character blocks in this order:
  1. [00..7F]                - 7BIT characters (ASCII)
  2. [81..9F][40..7E,80..FC] - MB2 characters, part1
  3. [A1..DF]                - 8BIT characters (Kana)
  4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
 */
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _sjis_japanese_ci
 #define WEIGHT_PAD_SPACE     (256 * (int) ' ')
 #define WEIGHT_MB1(x)        (256 * (int) sort_order_sjis[(uchar) (x)])
 #define WEIGHT_MB2(x,y)      (sjiscode(x, y))
 #include "strcoll.ic"
 #define MY_FUNCTION_NAME(x)   my_ ## x ## _sjis_bin
 #define WEIGHT_PAD_SPACE     (256 * (int) ' ')
 #define WEIGHT_MB1(x)        (256 * (int) (uchar) (x))
 #define WEIGHT_MB2(x,y)      (sjiscode(x, y))
 #include "strcoll.ic"
 static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci=
 {
-  NULL,			/* init */
+  NULL,                 /* init */
-  my_strnncoll_sjis,
+  my_strnncoll_sjis_japanese_ci,
-  my_strnncollsp_sjis,
+  my_strnncollsp_sjis_japanese_ci,
  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
  my_like_range_mb,
-  my_wildcmp_mb,	/* wildcmp  */
+  my_wildcmp_mb,
  my_strcasecmp_8bit,
  my_instr_mb,
  my_hash_sort_simple,
@@ -34115,6 +34053,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
 };
 static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
 {
  NULL,                 /* init */
  my_strnncoll_sjis_bin,
  my_strnncollsp_sjis_bin,
  my_strnxfrm_mb,
  my_strnxfrmlen_simple,
  my_like_range_mb,
  my_wildcmp_mb_bin,
  my_strcasecmp_mb_bin,
  my_instr_mb,
  my_hash_sort_mb_bin,
  my_propagate_simple
 };
 static MY_CHARSET_HANDLER my_charset_handler=
 {
  NULL,			/* init */
@@ -34179,7 +34133,7 @@ struct charset_info_st my_charset_sjis_japanese_ci=
    1,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_ci_handler
+    &my_collation_handler_sjis_japanese_ci
 };
 struct charset_info_st my_charset_sjis_bin=
@@ -34211,7 +34165,7 @@ struct charset_info_st my_charset_sjis_bin=
    1,                  /* escape_with_backslash_is_dangerous */
    1,                  /* levels_for_order   */
    &my_charset_handler,
-    &my_collation_mb_bin_handler
+    &my_collation_handler_sjis_bin
 };
 #endif
--- a/strings/strcoll.ic
+++ b/strings/strcoll.ic
@@ -0,0 +1,231 @@
 /*
   Copyright (c) 2015, MariaDB Foundation
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #ifndef MY_FUNCTION_NAME
 #error MY_FUNCTION_NAME is not defined
 #endif
 /*
  The weight for automatically padded spaces when comparing strings with
  the PAD SPACE property.
  Should normally be equal to the weight of a regular space.
 */
 #ifndef WEIGHT_PAD_SPACE
 #define WEIGHT_PAD_SPACE  (' ')
 #endif
 /*
  Weight of an illegal byte, must follow these rules:
  1. Must be greater than weight of any normal character in the collation.
  2. Two different bad bytes must have different weights and must be
     compared in their binary order.
  Depends on mbmaxlen of the character set, as well as how the collation
  sorts various single-byte and multi-byte character blocks.
  The macro below is the default definition, it is suitable for mbmaxlen=2
  character sets that sort all multi-byte characters after all single-byte
  characters: big5, euckr, gb2312, gbk.
  All mbmaxlen>2 character sets must provide their own definitions.
  All collations that have a more complex order (than just MB1 followed by MB2)
  must also provide their own definitions (see definitions for
  cp932_japanese_ci and sjis_japanese_ci as examples of a more complex order).
 */
 #ifndef WEIGHT_ILSEQ
 #define WEIGHT_ILSEQ(x)   (0xFF00 + (x))
 #endif
 /**
  Scan a valid character, or a bad byte, or an auto-padded space
  from a string and calculate the weight of the scanned sequence.
  @param [OUT] weight - the weight is returned here
  @param str          - the string
  @param end          - the end of the string
  @return             - the number of bytes scanned
  The including source file must define the following macros:
  IS_MB1_CHAR(x)
  IS_MB2_CHAR(x,y)
  WEIGHT_PAD_SPACE
  WEIGHT_MB1(x)
  WEIGHT_MB2(x,y)
  WEIGHT_ILSEQ(x)
 */
 static inline uint
 MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end)
 {
  if (str >= end)
  {
    *weight= WEIGHT_PAD_SPACE;
    return 0;
  }
  if (IS_MB1_CHAR(*str))
  {
    *weight= WEIGHT_MB1(*str);           /* A valid single byte character*/
    return 1;
  }
  if (str + 2 > end)                     /* The string ended unexpectedly */
    goto bad;                            /* Treat as a bad byte */
  if (IS_MB2_CHAR(str[0], str[1]))
  {
    *weight= WEIGHT_MB2(str[0], str[1]);
    return 2;                            /* A valid two-byte character */
  }
 bad:
  *weight= WEIGHT_ILSEQ(str[0]);         /* Bad byte */
  return 1;
 }
 /**
  Compare two strings according to the collation,
  without handling the PAD SPACE property.
  Note, cs->coll->strnncoll() is usually used to compare identifiers.
  Perhaps we should eventually (in 10.2?) create a new collation 
  my_charset_utf8_general_ci_no_pad and have only one comparison function
  in MY_COLLATION_HANDLER.
  @param cs          - the character set and collation
  @param a           - the left string
  @param a_length    - the length of the left string
  @param b           - the right string
  @param b_length    - the length of the right string
  @param b_is_prefix - if the caller wants to check if "b" is a prefix of "a"
  @return            - the comparison result
 */
 static int
 MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs __attribute__((unused)),
                            const uchar *a, size_t a_length, 
                            const uchar *b, size_t b_length,
                            my_bool b_is_prefix)
 {
  const uchar *a_end= a + a_length;
  const uchar *b_end= b + b_length;
  for ( ; ; )
  {
    int a_weight, b_weight, res;
    uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
    uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
    /*
      a_wlen  b_wlen Comment
      ------  ------ -------
      0       0      Strings ended simultaneously, "a" and "b" are equal.
      0       >0     "a" is a prefix of "b", so "a" is smaller.
      >0      0      "b" is a prefix of "a", check b_is_prefix.
      >0      >0     Two weights were scanned, check weight difference.
    */
    if (!a_wlen)
      return b_wlen ? -b_weight : 0;
    if (!b_wlen)
      return b_is_prefix ? 0 : a_weight;
    if ((res= (a_weight - b_weight)))
      return res;
    /*
      None of the strings has ended yet.
    */
    DBUG_ASSERT(a < a_end);
    DBUG_ASSERT(b < b_end);
    a+= a_wlen;
    b+= b_wlen;
  }
  DBUG_ASSERT(0);
  return 0;
 }
 /**
  Compare two strings according to the collation, with PAD SPACE handling.
  @param cs          - the character set and collation
  @param a           - the left string
  @param a_length    - the length of the left string
  @param b           - the right string
  @param b_length    - the length of the right string
  @param diff_if_only_endspace_difference - not used in the code.
                       TODO: this should be eventually removed (in 10.2?)
  @return            - the comparison result
 */
 static int
 MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
                              const uchar *a, size_t a_length, 
                              const uchar *b, size_t b_length,
                              my_bool diff_if_only_endspace_difference
                              __attribute__((unused)))
 {
  const uchar *a_end= a + a_length;
  const uchar *b_end= b + b_length;
  for ( ; ; )
  {
    int a_weight, b_weight, res;
    uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
    uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
    if ((res= (a_weight - b_weight)))
    {
      /*
        Got two different weights. Each weight can be generated by either of:
        - a real character
        - a bad byte sequence or an incomplete byte sequence
        - an auto-generated trailing space (PAD SPACE)
        It does not matter how exactly each weight was generated.
        Just return the weight difference.
      */
      return res;
    }
    if (!a_wlen && !b_wlen)
    {
      /*
        Got two auto-generated trailing spaces, i.e.
        both strings have now ended, so they are equal.
      */
      DBUG_ASSERT(a == a_end);
      DBUG_ASSERT(b == b_end);
      return 0;
    }
    /*
      At least one of the strings has not ended yet, continue comparison.
    */
    DBUG_ASSERT(a < a_end || b < b_end);
    a+= a_wlen;
    b+= b_wlen;
  }
  DBUG_ASSERT(0);
  return 0;
 }
 /*
  We usually include this file at least two times from the same source file,
  for the _ci and the _bin collations. Prepare for the second inclusion.
 */
 #undef MY_FUNCTION_NAME
 #undef WEIGHT_ILSEQ
 #undef WEIGHT_MB1
 #undef WEIGHT_MB2
 #undef WEIGHT_PAD_SPACE
--- a/unittest/strings/strings-t.c
+++ b/unittest/strings/strings-t.c
@@ -95,11 +95,361 @@ static CHARSET_INFO *charset_list[]=
 };
 typedef struct
 {
  const char *a;
  size_t alen;
  const char *b;
  size_t blen;
  int res;
 } STRNNCOLL_PARAM;
 #define CSTR(x)  (x),(sizeof(x)-1)
 /*
  Byte sequence types used in the tests:
    8BIT     - a 8 bit byte (>=00x80) which makes a single byte characters
    MB2      - two bytes that make a valid character
    H2       - a byte which is a valid MB2 head byte
    T2       - a byte which is a valid MB2 tail byte
    ILSEQ    - a byte which makes an illegal sequence
    H2+ILSEQ - a sequence that starts with a valid H2 byte,
               but not followed by a valid T2 byte.
  Charset H2               T2                      8BIT
  ------- ---------------- ---------------         -------- 
  big5    [A1..F9]         [40..7E,A1..FE]
  euckr   [81..FE]         [41..5A,61..7A,81..FE]
  gb2312  [A1..F7]         [A1..FE]
  gbk     [81..FE]         [40..7E,80..FE]
  cp932   [81..9F,E0..FC]  [40..7E,80..FC]         [A1..DF]
  sjis    [81..9F,E0..FC]  [40..7E,80..FC]         [A1..DF]
  Essential byte sequences in various character sets:
  Sequence  big5   cp932      euckr  gb2312    gbk   sjis
  --------  ----   -----      -----  ------    ---   ----
  80        ILSEQ  ILSEQ      ILSEQ  ILSEQ     ILSEQ ILSEQ
  81        ILSEQ  H2         H2     ILSEQ     H2    H2
  A1        H2     8BIT       H2     H2        H2    8BIT
  A1A1      MB2    8BIT+8BIT  MB2    MB2       MB2   8BIT+8BIT
  E0E0      MB2    MB2        MB2    MB2       MB2   MB2
  F9FE      MB2    H2+ILSEQ   MB2    ILSEQ+T2  MB2   H2+ILSEQ
 */
 /*
  For character sets that have the following byte sequences:
    80   - ILSEQ
    81   - ILSEQ or H2
    F9   - ILSEQ or H2
    A1A1 - MB2 or 8BIT+8BIT
    E0E0 - MB2
 */
 STRNNCOLL_PARAM strcoll_mb2_common[]=
 {
  /* Compare two good sequences */
  {CSTR(""),         CSTR(""),           0},
  {CSTR(""),         CSTR(" "),          0},
  {CSTR(""),         CSTR("A"),         -1},
  {CSTR(""),         CSTR("a"),         -1},
  {CSTR(""),         CSTR("\xA1\xA1"),  -1},
  {CSTR(""),         CSTR("\xE0\xE0"),  -1},
  {CSTR(" "),        CSTR(""),          0},
  {CSTR(" "),        CSTR(" "),         0},
  {CSTR(" "),        CSTR("A"),        -1},
  {CSTR(" "),        CSTR("a"),        -1},
  {CSTR(" "),        CSTR("\xA1\xA1"), -1},
  {CSTR(" "),        CSTR("\xE0\xE0"), -1},
  {CSTR("a"),        CSTR(""),          1},
  {CSTR("a"),        CSTR(" "),         1},
  {CSTR("a"),        CSTR("a"),         0},
  {CSTR("a"),        CSTR("\xA1\xA1"), -1},
  {CSTR("a"),        CSTR("\xE0\xE0"), -1},
  {CSTR("\xA1\xA1"), CSTR("\xA1\xA1"),  0},
  {CSTR("\xA1\xA1"), CSTR("\xE0\xE0"), -1},
  /* Compare a good character to an illegal or an incomplete sequence */
  {CSTR(""),         CSTR("\x80"),     -1},
  {CSTR(""),         CSTR("\x81"),     -1},
  {CSTR(""),         CSTR("\xF9"),     -1},
  {CSTR(" "),        CSTR("\x80"),     -1},
  {CSTR(" "),        CSTR("\x81"),     -1},
  {CSTR(" "),        CSTR("\xF9"),     -1},
  {CSTR("a"),        CSTR("\x80"),     -1},
  {CSTR("a"),        CSTR("\x81"),     -1},
  {CSTR("a"),        CSTR("\xF9"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\x80"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\x81"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\xF9"),     -1},
  {CSTR("\xE0\xE0"), CSTR("\x80"),     -1},
  {CSTR("\xE0\xE0"), CSTR("\x81"),     -1},
  {CSTR("\xE0\xE0"), CSTR("\xF9"),     -1},
  /* Compare two bad/incomplete sequences */
  {CSTR("\x80"),     CSTR("\x80"),      0},
  {CSTR("\x80"),     CSTR("\x81"),     -1},
  {CSTR("\x80"),     CSTR("\xF9"),     -1},
  {CSTR("\x81"),     CSTR("\x81"),      0},
  {CSTR("\x81"),     CSTR("\xF9"),     -1},
  {NULL, 0, NULL, 0, 0}
 };
 /*
  For character sets that have good mb2 characters A1A1 and F9FE
 */
 STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
 {
  /* Compare two good characters */
  {CSTR(""),         CSTR("\xF9\xFE"), -1},
  {CSTR(" "),        CSTR("\xF9\xFE"), -1},
  {CSTR("a")       , CSTR("\xF9\xFE"), -1},
  {CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
  {CSTR("\xF9\xFE"), CSTR("\xF9\xFE"),  0},
  /* Compare a good character to an illegal or an incomplete sequence */
  {CSTR(""),         CSTR("\xA1"),     -1},
  {CSTR(""),         CSTR("\xF9"),     -1},
  {CSTR("a"),        CSTR("\xA1"),     -1},
  {CSTR("a"),        CSTR("\xF9"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\xA1"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\xF9"),     -1},
  {CSTR("\xF9\xFE"), CSTR("\x80"),     -1},
  {CSTR("\xF9\xFE"), CSTR("\x81"),     -1},
  {CSTR("\xF9\xFE"), CSTR("\xA1"),     -1},
  {CSTR("\xF9\xFE"), CSTR("\xF9"),     -1},
  /* Compare two bad/incomplete sequences */
  {CSTR("\x80"),     CSTR("\xA1"),     -1},
  {CSTR("\x80"),     CSTR("\xF9"),     -1},
  {NULL, 0, NULL, 0, 0}
 };
 /*
  For character sets that have:
    A1A1 - a good mb2 character
    F9FE - a bad sequence
 */
 STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
 {
  /* Compare a good character to an illegal or an incomplete sequence */
  {CSTR(""),         CSTR("\xF9\xFE"), -1},
  {CSTR(" "),        CSTR("\xF9\xFE"), -1},
  {CSTR("a")       , CSTR("\xF9\xFE"), -1},
  {CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
  {CSTR(""),         CSTR("\xA1"),     -1},
  {CSTR(""),         CSTR("\xF9"),     -1},
  {CSTR("a"),        CSTR("\xA1"),     -1},
  {CSTR("a"),        CSTR("\xF9"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\xA1"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\xF9"),     -1},
  /* Compare two bad/incomplete sequences */
  {CSTR("\xF9\xFE"), CSTR("\x80"),     1},
  {CSTR("\xF9\xFE"), CSTR("\x81"),     1},
  {CSTR("\xF9\xFE"), CSTR("\xA1"),     1},
  {CSTR("\xF9\xFE"), CSTR("\xF9"),     1},
  {CSTR("\x80"),     CSTR("\xA1"),     -1},
  {CSTR("\x80"),     CSTR("\xF9"),     -1},
  {CSTR("\xF9\xFE"), CSTR("\xF9\xFE"),  0},
  {NULL, 0, NULL, 0, 0}
 };
 /*
  For character sets that have:
    80   - ILSEQ or H2
    81   - ILSEQ or H2
    A1   - 8BIT
    F9   - ILSEQ or H2
    F9FE - a bad sequence (ILSEQ+XX or H2+ILSEQ)
 */
 STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
 {
  /* Compare two good characters */
  {CSTR(""),         CSTR("\xA1"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\xA1"),      1},
  /* Compare a good character to an illegal or an incomplete sequence */
  {CSTR(""),         CSTR("\xF9"),     -1},
  {CSTR(""),         CSTR("\xF9\xFE"), -1},
  {CSTR(" "),        CSTR("\xF9\xFE"), -1},
  {CSTR("a"),        CSTR("\xF9\xFE"), -1},
  {CSTR("a"),        CSTR("\xA1"),     -1},
  {CSTR("a"),        CSTR("\xF9"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\xF9"),     -1},
  {CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
  {CSTR("\xF9\xFE"), CSTR("\x80"),     1},
  {CSTR("\xF9\xFE"), CSTR("\x81"),     1},
  {CSTR("\xF9\xFE"), CSTR("\xA1"),     1},
  {CSTR("\xF9\xFE"), CSTR("\xF9"),     1},
  {CSTR("\x80"),     CSTR("\xA1"),      1},
  /* Compare two bad/incomplete sequences */
  {CSTR("\x80"),     CSTR("\xF9"),     -1},
  {CSTR("\xF9\xFE"), CSTR("\xF9\xFE"),  0},
  {NULL, 0, NULL, 0, 0}
 };
 /*
  For character sets (e.g. cp932 and sjis) that have:
    8181 - a valid MB2 character
    A1   - a valid 8BIT character
    E0E0 - a valid MB2 character
  and sort in this order:
    8181 < A1 < E0E0
 */
 STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
 {
  {CSTR("\x81\x81"), CSTR("\xA1"),     -1},
  {CSTR("\x81\x81"), CSTR("\xE0\xE0"), -1},
  {CSTR("\xA1"),     CSTR("\xE0\xE0"), -1},
  {NULL, 0, NULL, 0, 0}
 };
 static void
 str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
 {
  char *dstend= dst + dstlen;
  const char *srcend= src + srclen;
  for (*dst= '\0' ; dst + 3 < dstend && src < srcend; )
  {
    sprintf(dst, "%02X", (unsigned char) src[0]);
    dst+=2;
    src++;
  }
 }
 /*
  Check if the two comparison result are semantically equal:
  both are negative, both are positive, or both are zero.
 */
 static int
 eqres(int ares, int bres)
 {
  return (ares < 0 && bres < 0) ||
         (ares > 0 && bres > 0) ||
         (ares == 0 && bres == 0);
 }
 static int
 strcollsp(CHARSET_INFO *cs, const STRNNCOLL_PARAM *param)
 {
  int failed= 0;
  const STRNNCOLL_PARAM *p;
  diag("%-20s %-10s %-10s %10s %10s", "Collation", "a", "b", "ExpectSign", "Actual");
  for (p= param; p->a; p++)
  {
    char ahex[64], bhex[64];
    int res= cs->coll->strnncollsp(cs, (uchar *) p->a, p->alen,
                                       (uchar *) p->b, p->blen, 0);
    str2hex(ahex, sizeof(ahex), p->a, p->alen);
    str2hex(bhex, sizeof(bhex), p->b, p->blen);
    diag("%-20s %-10s %-10s %10d %10d%s",
         cs->name, ahex, bhex, p->res, res,
         eqres(res, p->res) ? "" : " FAILED");
    if (!eqres(res, p->res))
    {
      failed++;
    }
    else
    {
      /* Test in reverse order */
      res= cs->coll->strnncollsp(cs, (uchar *) p->b, p->blen,
                                     (uchar *) p->a, p->alen, 0);
      if (!eqres(res, -p->res))
      {
        diag("Comparison in reverse order failed. Expected %d, got %d",
             -p->res, res);
        failed++;
      }
    }
  }
  return failed;
 }
 static int
 test_strcollsp()
 {
  int failed= 0;
 #ifdef HAVE_CHARSET_big5
  failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_common);
  failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
  failed+= strcollsp(&my_charset_big5_bin,        strcoll_mb2_common);
  failed+= strcollsp(&my_charset_big5_bin,        strcoll_mb2_A1A1_mb2_F9FE);
 #endif
 #ifdef HAVE_CHARSET_cp932
  failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb2_common);
  failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb1_A1_bad_F9FE);
  failed+= strcollsp(&my_charset_cp932_bin,         strcoll_mb2_common);
  failed+= strcollsp(&my_charset_cp932_bin,         strcoll_mb1_A1_bad_F9FE);
  failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_8181_A1_E0E0);
  failed+= strcollsp(&my_charset_cp932_bin,         strcoll_8181_A1_E0E0);
 #endif
 #ifdef HAVE_CHARSET_euckr
  failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_common);
  failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_A1A1_mb2_F9FE);
  failed+= strcollsp(&my_charset_euckr_bin,       strcoll_mb2_common);
  failed+= strcollsp(&my_charset_euckr_bin,       strcoll_mb2_A1A1_mb2_F9FE);
 #endif
 #ifdef HAVE_CHARSET_gb2312
  failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_common);
  failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_A1A1_bad_F9FE);
  failed+= strcollsp(&my_charset_gb2312_bin,        strcoll_mb2_common);
  failed+= strcollsp(&my_charset_gb2312_bin,        strcoll_mb2_A1A1_bad_F9FE);
 #endif
 #ifdef HAVE_CHARSET_gbk
  failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_common);
  failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
  failed+= strcollsp(&my_charset_gbk_bin,        strcoll_mb2_common);
  failed+= strcollsp(&my_charset_gbk_bin,        strcoll_mb2_A1A1_mb2_F9FE);
 #endif
 #ifdef HAVE_CHARSET_sjis
  failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb2_common);
  failed+= strcollsp(&my_charset_sjis_bin,         strcoll_mb2_common);
  failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb1_A1_bad_F9FE);
  failed+= strcollsp(&my_charset_sjis_bin,         strcoll_mb1_A1_bad_F9FE);
  failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_8181_A1_E0E0);
  failed+= strcollsp(&my_charset_sjis_bin,         strcoll_8181_A1_E0E0);
 #endif
  return failed;
 }
 int main()
 {
  size_t i, failed= 0;
-  plan(1);
+  plan(2);
  diag("Testing my_like_range_xxx() functions");
  for (i= 0; i < array_elements(charset_list); i++)
@@ -112,5 +462,10 @@ int main()
    }
  }
  ok(failed == 0, "Testing my_like_range_xxx() functions");
  diag("Testing cs->coll->strnncollsp()");
  failed= test_strcollsp();
  ok(failed == 0, "Testing cs->coll->strnncollsp()");
  return exit_status();
 }