mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-8214 Asian MB2 charsets: compare broken bytes as "greater than any non-broken character"
This commit is contained in:
@@ -49,6 +49,7 @@
|
|||||||
#define big5tail(e) ((uchar)(e&0xff))
|
#define big5tail(e) ((uchar)(e&0xff))
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
|
||||||
|
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
|
||||||
#define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y))
|
#define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y))
|
||||||
#define DEFINE_ASIAN_ROUTINES
|
#define DEFINE_ASIAN_ROUTINES
|
||||||
#include "ctype-mb.ic"
|
#include "ctype-mb.ic"
|
||||||
@@ -849,89 +850,6 @@ static uint16 big5strokexfrm(uint16 i)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static int my_strnncoll_big5_internal(const uchar **a_res,
|
|
||||||
const uchar **b_res, size_t length)
|
|
||||||
{
|
|
||||||
const uchar *a= *a_res, *b= *b_res;
|
|
||||||
|
|
||||||
while (length--)
|
|
||||||
{
|
|
||||||
if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1)))
|
|
||||||
{
|
|
||||||
if (*a != *b || *(a+1) != *(b+1))
|
|
||||||
return ((int) big5code(*a,*(a+1)) -
|
|
||||||
(int) big5code(*b,*(b+1)));
|
|
||||||
a+= 2;
|
|
||||||
b+= 2;
|
|
||||||
length--;
|
|
||||||
}
|
|
||||||
else if (sort_order_big5[*a++] !=
|
|
||||||
sort_order_big5[*b++])
|
|
||||||
return ((int) sort_order_big5[a[-1]] -
|
|
||||||
(int) sort_order_big5[b[-1]]);
|
|
||||||
}
|
|
||||||
*a_res= a;
|
|
||||||
*b_res= b;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Compare strings */
|
|
||||||
|
|
||||||
static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const uchar *a, size_t a_length,
|
|
||||||
const uchar *b, size_t b_length,
|
|
||||||
my_bool b_is_prefix)
|
|
||||||
{
|
|
||||||
size_t length= MY_MIN(a_length, b_length);
|
|
||||||
int res= my_strnncoll_big5_internal(&a, &b, length);
|
|
||||||
return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* compare strings, ignore end space */
|
|
||||||
|
|
||||||
static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)),
|
|
||||||
const uchar *a, size_t a_length,
|
|
||||||
const uchar *b, size_t b_length,
|
|
||||||
my_bool diff_if_only_endspace_difference)
|
|
||||||
{
|
|
||||||
size_t length= MY_MIN(a_length, b_length);
|
|
||||||
int res= my_strnncoll_big5_internal(&a, &b, length);
|
|
||||||
|
|
||||||
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
|
||||||
diff_if_only_endspace_difference= 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!res && a_length != b_length)
|
|
||||||
{
|
|
||||||
const uchar *end;
|
|
||||||
int swap= 1;
|
|
||||||
if (diff_if_only_endspace_difference)
|
|
||||||
res= 1; /* Assume 'a' is bigger */
|
|
||||||
/*
|
|
||||||
Check the next not space character of the longer key. If it's < ' ',
|
|
||||||
then it's smaller than the other key.
|
|
||||||
*/
|
|
||||||
if (a_length < b_length)
|
|
||||||
{
|
|
||||||
/* put longer key in a */
|
|
||||||
a_length= b_length;
|
|
||||||
a= b;
|
|
||||||
swap= -1; /* swap sign of result */
|
|
||||||
res= -res;
|
|
||||||
}
|
|
||||||
for (end= a + a_length-length; a < end ; a++)
|
|
||||||
{
|
|
||||||
if (*a != ' ')
|
|
||||||
return (*a < ' ') ? -swap : swap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
my_strnxfrm_big5(CHARSET_INFO *cs,
|
my_strnxfrm_big5(CHARSET_INFO *cs,
|
||||||
uchar *dst, size_t dstlen, uint nweights,
|
uchar *dst, size_t dstlen, uint nweights,
|
||||||
@@ -6853,11 +6771,23 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5_chinese_ci
|
||||||
|
#define WEIGHT_MB1(x) (sort_order_big5[(uchar) (x)])
|
||||||
|
#define WEIGHT_MB2(x,y) (big5code(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5_bin
|
||||||
|
#define WEIGHT_MB1(x) ((uchar) (x))
|
||||||
|
#define WEIGHT_MB2(x,y) (big5code(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_big5,
|
my_strnncoll_big5_chinese_ci,
|
||||||
my_strnncollsp_big5,
|
my_strnncollsp_big5_chinese_ci,
|
||||||
my_strnxfrm_big5,
|
my_strnxfrm_big5,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
@@ -6868,6 +6798,23 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
|
|||||||
my_propagate_simple
|
my_propagate_simple
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
|
||||||
|
{
|
||||||
|
NULL, /* init */
|
||||||
|
my_strnncoll_big5_bin,
|
||||||
|
my_strnncollsp_big5_bin,
|
||||||
|
my_strnxfrm_mb,
|
||||||
|
my_strnxfrmlen_simple,
|
||||||
|
my_like_range_mb,
|
||||||
|
my_wildcmp_mb_bin,
|
||||||
|
my_strcasecmp_mb_bin,
|
||||||
|
my_instr_mb,
|
||||||
|
my_hash_sort_mb_bin,
|
||||||
|
my_propagate_simple
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static MY_CHARSET_HANDLER my_charset_big5_handler=
|
static MY_CHARSET_HANDLER my_charset_big5_handler=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
@@ -6931,7 +6878,7 @@ struct charset_info_st my_charset_big5_chinese_ci=
|
|||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_big5_handler,
|
&my_charset_big5_handler,
|
||||||
&my_collation_big5_chinese_ci_handler
|
&my_collation_handler_big5_chinese_ci
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -6964,7 +6911,7 @@ struct charset_info_st my_charset_big5_bin=
|
|||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_big5_handler,
|
&my_charset_big5_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_handler_big5_bin
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@@ -185,6 +185,7 @@ static const uchar sort_order_cp932[]=
|
|||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932
|
||||||
#define IS_8BIT_CHAR(x) iscp932kata(x)
|
#define IS_8BIT_CHAR(x) iscp932kata(x)
|
||||||
|
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || iscp932kata(x))
|
||||||
#define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y))
|
#define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y))
|
||||||
#define DEFINE_ASIAN_ROUTINES
|
#define DEFINE_ASIAN_ROUTINES
|
||||||
#include "ctype-mb.ic"
|
#include "ctype-mb.ic"
|
||||||
@@ -1717,90 +1718,6 @@ MY_UNICASE_INFO my_caseinfo_cp932=
|
|||||||
my_caseinfo_pages_cp932
|
my_caseinfo_pages_cp932
|
||||||
};
|
};
|
||||||
|
|
||||||
static int my_strnncoll_cp932_internal(CHARSET_INFO *cs,
|
|
||||||
const uchar **a_res, size_t a_length,
|
|
||||||
const uchar **b_res, size_t b_length)
|
|
||||||
{
|
|
||||||
const uchar *a= *a_res, *b= *b_res;
|
|
||||||
const uchar *a_end= a + a_length;
|
|
||||||
const uchar *b_end= b + b_length;
|
|
||||||
while (a < a_end && b < b_end)
|
|
||||||
{
|
|
||||||
if (ismbchar_cp932(cs,(char*) a, (char*) a_end) &&
|
|
||||||
ismbchar_cp932(cs,(char*) b, (char*) b_end))
|
|
||||||
{
|
|
||||||
uint a_char= cp932code(*a, *(a+1));
|
|
||||||
uint b_char= cp932code(*b, *(b+1));
|
|
||||||
if (a_char != b_char)
|
|
||||||
return a_char - b_char;
|
|
||||||
a += 2;
|
|
||||||
b += 2;
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
if (sort_order_cp932[(uchar)*a] != sort_order_cp932[(uchar)*b])
|
|
||||||
return sort_order_cp932[(uchar)*a] - sort_order_cp932[(uchar)*b];
|
|
||||||
a++;
|
|
||||||
b++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*a_res= a;
|
|
||||||
*b_res= b;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int my_strnncoll_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const uchar *a, size_t a_length,
|
|
||||||
const uchar *b, size_t b_length,
|
|
||||||
my_bool b_is_prefix)
|
|
||||||
{
|
|
||||||
int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
|
|
||||||
if (b_is_prefix && a_length > b_length)
|
|
||||||
a_length= b_length;
|
|
||||||
return res ? res : (int) (a_length - b_length);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const uchar *a, size_t a_length,
|
|
||||||
const uchar *b, size_t b_length,
|
|
||||||
my_bool diff_if_only_endspace_difference
|
|
||||||
__attribute__((unused)))
|
|
||||||
{
|
|
||||||
const uchar *a_end= a + a_length;
|
|
||||||
const uchar *b_end= b + b_length;
|
|
||||||
int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
|
|
||||||
|
|
||||||
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
|
||||||
diff_if_only_endspace_difference= 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!res && (a != a_end || b != b_end))
|
|
||||||
{
|
|
||||||
int swap= 1;
|
|
||||||
if (diff_if_only_endspace_difference)
|
|
||||||
res= 1; /* Assume 'a' is bigger */
|
|
||||||
/*
|
|
||||||
Check the next not space character of the longer key. If it's < ' ',
|
|
||||||
then it's smaller than the other key.
|
|
||||||
*/
|
|
||||||
if (a == a_end)
|
|
||||||
{
|
|
||||||
/* put shorter key in a */
|
|
||||||
a_end= b_end;
|
|
||||||
a= b;
|
|
||||||
swap= -1; /* swap sign of result */
|
|
||||||
res= -res;
|
|
||||||
}
|
|
||||||
for (; a < a_end ; a++)
|
|
||||||
{
|
|
||||||
if (*a != (uchar) ' ')
|
|
||||||
return (*a < (uchar) ' ') ? -swap : swap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static const uint16 cp932_to_unicode[65536]=
|
static const uint16 cp932_to_unicode[65536]=
|
||||||
{
|
{
|
||||||
@@ -34720,15 +34637,36 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
/*
|
||||||
|
cp932_chinese_ci and cp932_bin sort character blocks in this order:
|
||||||
|
1. [00..7F] - 7BIT characters (ASCII)
|
||||||
|
2. [81..9F][40..7E,80..FC] - MB2 characters, part1
|
||||||
|
3. [A1..DF] - 8BIT characters (Kana)
|
||||||
|
4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
|
||||||
|
*/
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932_japanese_ci
|
||||||
|
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
|
||||||
|
#define WEIGHT_MB1(x) (256 * (int) sort_order_cp932[(uchar) (x)])
|
||||||
|
#define WEIGHT_MB2(x,y) (cp932code(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932_bin
|
||||||
|
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
|
||||||
|
#define WEIGHT_MB1(x) (256 * (int) (uchar) (x))
|
||||||
|
#define WEIGHT_MB2(x,y) (cp932code(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_cp932,
|
my_strnncoll_cp932_japanese_ci,
|
||||||
my_strnncollsp_cp932,
|
my_strnncollsp_cp932_japanese_ci,
|
||||||
my_strnxfrm_mb,
|
my_strnxfrm_mb,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb, /* wildcmp */
|
my_wildcmp_mb,
|
||||||
my_strcasecmp_8bit,
|
my_strcasecmp_8bit,
|
||||||
my_instr_mb,
|
my_instr_mb,
|
||||||
my_hash_sort_simple,
|
my_hash_sort_simple,
|
||||||
@@ -34736,6 +34674,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
|
||||||
|
{
|
||||||
|
NULL, /* init */
|
||||||
|
my_strnncoll_cp932_bin,
|
||||||
|
my_strnncollsp_cp932_bin,
|
||||||
|
my_strnxfrm_mb,
|
||||||
|
my_strnxfrmlen_simple,
|
||||||
|
my_like_range_mb,
|
||||||
|
my_wildcmp_mb_bin,
|
||||||
|
my_strcasecmp_mb_bin,
|
||||||
|
my_instr_mb,
|
||||||
|
my_hash_sort_mb_bin,
|
||||||
|
my_propagate_simple
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static MY_CHARSET_HANDLER my_charset_handler=
|
static MY_CHARSET_HANDLER my_charset_handler=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
@@ -34800,7 +34754,7 @@ struct charset_info_st my_charset_cp932_japanese_ci=
|
|||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_ci_handler
|
&my_collation_handler_cp932_japanese_ci
|
||||||
};
|
};
|
||||||
|
|
||||||
struct charset_info_st my_charset_cp932_bin=
|
struct charset_info_st my_charset_cp932_bin=
|
||||||
@@ -34832,7 +34786,7 @@ struct charset_info_st my_charset_cp932_bin=
|
|||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_handler_cp932_bin
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -201,8 +201,10 @@ static const uchar sort_order_euc_kr[]=
|
|||||||
iseuc_kr_tail2(c) || \
|
iseuc_kr_tail2(c) || \
|
||||||
iseuc_kr_tail3(c))
|
iseuc_kr_tail3(c))
|
||||||
|
|
||||||
|
#define euckrcode(c,d) (((uchar)(c) <<8) | (uchar)(d))
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr
|
||||||
|
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
|
||||||
#define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y))
|
#define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y))
|
||||||
#define DEFINE_ASIAN_ROUTINES
|
#define DEFINE_ASIAN_ROUTINES
|
||||||
#include "ctype-mb.ic"
|
#include "ctype-mb.ic"
|
||||||
@@ -9938,21 +9940,50 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_korean_ci
|
||||||
|
#define WEIGHT_MB1(x) (sort_order_euc_kr[(uchar) (x)])
|
||||||
|
#define WEIGHT_MB2(x,y) (euckrcode(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_bin
|
||||||
|
#define WEIGHT_MB1(x) ((uchar) (x))
|
||||||
|
#define WEIGHT_MB2(x,y) (euckrcode(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_simple, /* strnncoll */
|
my_strnncoll_euckr_korean_ci,
|
||||||
my_strnncollsp_simple,
|
my_strnncollsp_euckr_korean_ci,
|
||||||
my_strnxfrm_mb, /* strnxfrm */
|
my_strnxfrm_mb,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_mb, /* like_range */
|
my_like_range_mb,
|
||||||
my_wildcmp_mb, /* wildcmp */
|
my_wildcmp_mb,
|
||||||
my_strcasecmp_mb,
|
my_strcasecmp_mb,
|
||||||
my_instr_mb,
|
my_instr_mb,
|
||||||
my_hash_sort_simple,
|
my_hash_sort_simple,
|
||||||
my_propagate_simple
|
my_propagate_simple
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
|
||||||
|
{
|
||||||
|
NULL, /* init */
|
||||||
|
my_strnncoll_euckr_bin,
|
||||||
|
my_strnncollsp_euckr_bin,
|
||||||
|
my_strnxfrm_mb,
|
||||||
|
my_strnxfrmlen_simple,
|
||||||
|
my_like_range_mb,
|
||||||
|
my_wildcmp_mb_bin,
|
||||||
|
my_strcasecmp_mb_bin,
|
||||||
|
my_instr_mb,
|
||||||
|
my_hash_sort_mb_bin,
|
||||||
|
my_propagate_simple
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static MY_CHARSET_HANDLER my_charset_handler=
|
static MY_CHARSET_HANDLER my_charset_handler=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
@@ -10017,7 +10048,7 @@ struct charset_info_st my_charset_euckr_korean_ci=
|
|||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_ci_handler
|
&my_collation_handler_euckr_korean_ci
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -10050,7 +10081,7 @@ struct charset_info_st my_charset_euckr_bin=
|
|||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_handler_euckr_bin
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -163,9 +163,11 @@ static const uchar sort_order_gb2312[]=
|
|||||||
|
|
||||||
#define isgb2312head(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xf7)
|
#define isgb2312head(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xf7)
|
||||||
#define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
|
#define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
|
||||||
|
#define gb2312code(c,d) (((uchar)(c) <<8) | (uchar)(d))
|
||||||
|
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312
|
||||||
|
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
|
||||||
#define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y))
|
#define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y))
|
||||||
#define DEFINE_ASIAN_ROUTINES
|
#define DEFINE_ASIAN_ROUTINES
|
||||||
#include "ctype-mb.ic"
|
#include "ctype-mb.ic"
|
||||||
@@ -6341,11 +6343,23 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_chinese_ci
|
||||||
|
#define WEIGHT_MB1(x) (sort_order_gb2312[(uchar) (x)])
|
||||||
|
#define WEIGHT_MB2(x,y) (gb2312code(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_bin
|
||||||
|
#define WEIGHT_MB1(x) ((uchar) (x))
|
||||||
|
#define WEIGHT_MB2(x,y) (gb2312code(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_simple, /* strnncoll */
|
my_strnncoll_gb2312_chinese_ci,
|
||||||
my_strnncollsp_simple,
|
my_strnncollsp_gb2312_chinese_ci,
|
||||||
my_strnxfrm_mb, /* strnxfrm */
|
my_strnxfrm_mb, /* strnxfrm */
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_mb, /* like_range */
|
my_like_range_mb, /* like_range */
|
||||||
@@ -6356,6 +6370,24 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||||||
my_propagate_simple
|
my_propagate_simple
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
|
||||||
|
{
|
||||||
|
NULL, /* init */
|
||||||
|
my_strnncoll_gb2312_bin,
|
||||||
|
my_strnncollsp_gb2312_bin,
|
||||||
|
my_strnxfrm_mb,
|
||||||
|
my_strnxfrmlen_simple,
|
||||||
|
my_like_range_mb,
|
||||||
|
my_wildcmp_mb_bin,
|
||||||
|
my_strcasecmp_mb_bin,
|
||||||
|
my_instr_mb,
|
||||||
|
my_hash_sort_mb_bin,
|
||||||
|
my_propagate_simple
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static MY_CHARSET_HANDLER my_charset_handler=
|
static MY_CHARSET_HANDLER my_charset_handler=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
@@ -6420,9 +6452,10 @@ struct charset_info_st my_charset_gb2312_chinese_ci=
|
|||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_ci_handler
|
&my_collation_handler_gb2312_chinese_ci
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct charset_info_st my_charset_gb2312_bin=
|
struct charset_info_st my_charset_gb2312_bin=
|
||||||
{
|
{
|
||||||
86,0,0, /* number */
|
86,0,0, /* number */
|
||||||
@@ -6452,7 +6485,7 @@ struct charset_info_st my_charset_gb2312_bin=
|
|||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_handler_gb2312_bin
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -44,6 +44,7 @@
|
|||||||
#define gbktail(e) ((uchar)(e&0xff))
|
#define gbktail(e) ((uchar)(e&0xff))
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk
|
||||||
|
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
|
||||||
#define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y))
|
#define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y))
|
||||||
#define DEFINE_ASIAN_ROUTINES
|
#define DEFINE_ASIAN_ROUTINES
|
||||||
#include "ctype-mb.ic"
|
#include "ctype-mb.ic"
|
||||||
@@ -3450,87 +3451,6 @@ static uint16 gbksortorder(uint16 i)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res,
|
|
||||||
size_t length)
|
|
||||||
{
|
|
||||||
const uchar *a= *a_res, *b= *b_res;
|
|
||||||
uint a_char,b_char;
|
|
||||||
|
|
||||||
while (length--)
|
|
||||||
{
|
|
||||||
if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
|
|
||||||
{
|
|
||||||
a_char= gbkcode(*a,*(a+1));
|
|
||||||
b_char= gbkcode(*b,*(b+1));
|
|
||||||
if (a_char != b_char)
|
|
||||||
return ((int) gbksortorder((uint16) a_char) -
|
|
||||||
(int) gbksortorder((uint16) b_char));
|
|
||||||
a+= 2;
|
|
||||||
b+= 2;
|
|
||||||
length--;
|
|
||||||
}
|
|
||||||
else if (sort_order_gbk[*a++] != sort_order_gbk[*b++])
|
|
||||||
return ((int) sort_order_gbk[a[-1]] -
|
|
||||||
(int) sort_order_gbk[b[-1]]);
|
|
||||||
}
|
|
||||||
*a_res= a;
|
|
||||||
*b_res= b;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const uchar *a, size_t a_length,
|
|
||||||
const uchar *b, size_t b_length,
|
|
||||||
my_bool b_is_prefix)
|
|
||||||
{
|
|
||||||
size_t length= MY_MIN(a_length, b_length);
|
|
||||||
int res= my_strnncoll_gbk_internal(&a, &b, length);
|
|
||||||
return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
|
|
||||||
const uchar *a, size_t a_length,
|
|
||||||
const uchar *b, size_t b_length,
|
|
||||||
my_bool diff_if_only_endspace_difference)
|
|
||||||
{
|
|
||||||
size_t length= MY_MIN(a_length, b_length);
|
|
||||||
int res= my_strnncoll_gbk_internal(&a, &b, length);
|
|
||||||
|
|
||||||
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
|
||||||
diff_if_only_endspace_difference= 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!res && a_length != b_length)
|
|
||||||
{
|
|
||||||
const uchar *end;
|
|
||||||
int swap= 1;
|
|
||||||
if (diff_if_only_endspace_difference)
|
|
||||||
res= 1; /* Assume 'a' is bigger */
|
|
||||||
/*
|
|
||||||
Check the next not space character of the longer key. If it's < ' ',
|
|
||||||
then it's smaller than the other key.
|
|
||||||
*/
|
|
||||||
if (a_length < b_length)
|
|
||||||
{
|
|
||||||
/* put shorter key in a */
|
|
||||||
a_length= b_length;
|
|
||||||
a= b;
|
|
||||||
swap= -1; /* swap sign of result */
|
|
||||||
res= -res;
|
|
||||||
}
|
|
||||||
for (end= a + a_length-length; a < end ; a++)
|
|
||||||
{
|
|
||||||
if (*a != ' ')
|
|
||||||
return (*a < ' ') ? -swap : swap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
my_strnxfrm_gbk(CHARSET_INFO *cs,
|
my_strnxfrm_gbk(CHARSET_INFO *cs,
|
||||||
uchar *dst, size_t dstlen, uint nweights,
|
uchar *dst, size_t dstlen, uint nweights,
|
||||||
@@ -10735,11 +10655,23 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_chinese_ci
|
||||||
|
#define WEIGHT_MB1(x) (sort_order_gbk[(uchar) (x)])
|
||||||
|
#define WEIGHT_MB2(x,y) (gbksortorder(gbkcode(x,y)))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_bin
|
||||||
|
#define WEIGHT_MB1(x) ((uchar) (x))
|
||||||
|
#define WEIGHT_MB2(x,y) (gbkcode(x,y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_gbk,
|
my_strnncoll_gbk_chinese_ci,
|
||||||
my_strnncollsp_gbk,
|
my_strnncollsp_gbk_chinese_ci,
|
||||||
my_strnxfrm_gbk,
|
my_strnxfrm_gbk,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
@@ -10750,6 +10682,24 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||||||
my_propagate_simple
|
my_propagate_simple
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
|
||||||
|
{
|
||||||
|
NULL, /* init */
|
||||||
|
my_strnncoll_gbk_bin,
|
||||||
|
my_strnncollsp_gbk_bin,
|
||||||
|
my_strnxfrm_mb,
|
||||||
|
my_strnxfrmlen_simple,
|
||||||
|
my_like_range_mb,
|
||||||
|
my_wildcmp_mb_bin,
|
||||||
|
my_strcasecmp_mb_bin,
|
||||||
|
my_instr_mb,
|
||||||
|
my_hash_sort_mb_bin,
|
||||||
|
my_propagate_simple
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static MY_CHARSET_HANDLER my_charset_handler=
|
static MY_CHARSET_HANDLER my_charset_handler=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
@@ -10814,7 +10764,7 @@ struct charset_info_st my_charset_gbk_chinese_ci=
|
|||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_ci_handler
|
&my_collation_handler_gbk_chinese_ci
|
||||||
};
|
};
|
||||||
|
|
||||||
struct charset_info_st my_charset_gbk_bin=
|
struct charset_info_st my_charset_gbk_bin=
|
||||||
@@ -10846,7 +10796,7 @@ struct charset_info_st my_charset_gbk_bin=
|
|||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_handler_gbk_bin
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@@ -256,3 +256,5 @@ MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused
|
|||||||
return nchars0 - nchars;
|
return nchars0 - nchars;
|
||||||
}
|
}
|
||||||
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
|
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
|
||||||
|
|
||||||
|
#undef MY_FUNCTION_NAME
|
||||||
|
@@ -186,6 +186,7 @@ static const uchar sort_order_sjis[]=
|
|||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
|
||||||
#define IS_8BIT_CHAR(x) issjiskata(x)
|
#define IS_8BIT_CHAR(x) issjiskata(x)
|
||||||
|
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || issjiskata(x))
|
||||||
#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
|
#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
|
||||||
#define DEFINE_ASIAN_ROUTINES
|
#define DEFINE_ASIAN_ROUTINES
|
||||||
#include "ctype-mb.ic"
|
#include "ctype-mb.ic"
|
||||||
@@ -1088,90 +1089,6 @@ static MY_UNICASE_INFO my_caseinfo_sjis=
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
|
|
||||||
const uchar **a_res, size_t a_length,
|
|
||||||
const uchar **b_res, size_t b_length)
|
|
||||||
{
|
|
||||||
const uchar *a= *a_res, *b= *b_res;
|
|
||||||
const uchar *a_end= a + a_length;
|
|
||||||
const uchar *b_end= b + b_length;
|
|
||||||
while (a < a_end && b < b_end)
|
|
||||||
{
|
|
||||||
if (ismbchar_sjis(cs,(char*) a, (char*) a_end) &&
|
|
||||||
ismbchar_sjis(cs,(char*) b, (char*) b_end))
|
|
||||||
{
|
|
||||||
uint a_char= sjiscode(*a, *(a+1));
|
|
||||||
uint b_char= sjiscode(*b, *(b+1));
|
|
||||||
if (a_char != b_char)
|
|
||||||
return (int) a_char - (int) b_char;
|
|
||||||
a += 2;
|
|
||||||
b += 2;
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b])
|
|
||||||
return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b];
|
|
||||||
a++;
|
|
||||||
b++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*a_res= a;
|
|
||||||
*b_res= b;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const uchar *a, size_t a_length,
|
|
||||||
const uchar *b, size_t b_length,
|
|
||||||
my_bool b_is_prefix)
|
|
||||||
{
|
|
||||||
int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
|
|
||||||
if (b_is_prefix && a_length > b_length)
|
|
||||||
a_length= b_length;
|
|
||||||
return res ? res : (int) (a_length - b_length);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|
||||||
const uchar *a, size_t a_length,
|
|
||||||
const uchar *b, size_t b_length,
|
|
||||||
my_bool diff_if_only_endspace_difference)
|
|
||||||
{
|
|
||||||
const uchar *a_end= a + a_length, *b_end= b + b_length;
|
|
||||||
int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
|
|
||||||
|
|
||||||
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
|
||||||
diff_if_only_endspace_difference= 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!res && (a != a_end || b != b_end))
|
|
||||||
{
|
|
||||||
int swap= 1;
|
|
||||||
if (diff_if_only_endspace_difference)
|
|
||||||
res= 1; /* Assume 'a' is bigger */
|
|
||||||
/*
|
|
||||||
Check the next not space character of the longer key. If it's < ' ',
|
|
||||||
then it's smaller than the other key.
|
|
||||||
*/
|
|
||||||
if (a == a_end)
|
|
||||||
{
|
|
||||||
/* put shorter key in a */
|
|
||||||
a_end= b_end;
|
|
||||||
a= b;
|
|
||||||
swap= -1; /* swap sign of result */
|
|
||||||
res= -res;
|
|
||||||
}
|
|
||||||
for (; a < a_end ; a++)
|
|
||||||
{
|
|
||||||
if (*a != ' ')
|
|
||||||
return (*a < ' ') ? -swap : swap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* SJIS->Unicode conversion table */
|
/* SJIS->Unicode conversion table */
|
||||||
static uint16 sjis_to_unicode[65536]=
|
static uint16 sjis_to_unicode[65536]=
|
||||||
{
|
{
|
||||||
@@ -34099,15 +34016,36 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
/*
|
||||||
|
sjis_chinese_ci and sjis_bin sort character blocks in this order:
|
||||||
|
1. [00..7F] - 7BIT characters (ASCII)
|
||||||
|
2. [81..9F][40..7E,80..FC] - MB2 characters, part1
|
||||||
|
3. [A1..DF] - 8BIT characters (Kana)
|
||||||
|
4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
|
||||||
|
*/
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_japanese_ci
|
||||||
|
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
|
||||||
|
#define WEIGHT_MB1(x) (256 * (int) sort_order_sjis[(uchar) (x)])
|
||||||
|
#define WEIGHT_MB2(x,y) (sjiscode(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_bin
|
||||||
|
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
|
||||||
|
#define WEIGHT_MB1(x) (256 * (int) (uchar) (x))
|
||||||
|
#define WEIGHT_MB2(x,y) (sjiscode(x, y))
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_sjis,
|
my_strnncoll_sjis_japanese_ci,
|
||||||
my_strnncollsp_sjis,
|
my_strnncollsp_sjis_japanese_ci,
|
||||||
my_strnxfrm_mb,
|
my_strnxfrm_mb,
|
||||||
my_strnxfrmlen_simple,
|
my_strnxfrmlen_simple,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb, /* wildcmp */
|
my_wildcmp_mb,
|
||||||
my_strcasecmp_8bit,
|
my_strcasecmp_8bit,
|
||||||
my_instr_mb,
|
my_instr_mb,
|
||||||
my_hash_sort_simple,
|
my_hash_sort_simple,
|
||||||
@@ -34115,6 +34053,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
|
||||||
|
{
|
||||||
|
NULL, /* init */
|
||||||
|
my_strnncoll_sjis_bin,
|
||||||
|
my_strnncollsp_sjis_bin,
|
||||||
|
my_strnxfrm_mb,
|
||||||
|
my_strnxfrmlen_simple,
|
||||||
|
my_like_range_mb,
|
||||||
|
my_wildcmp_mb_bin,
|
||||||
|
my_strcasecmp_mb_bin,
|
||||||
|
my_instr_mb,
|
||||||
|
my_hash_sort_mb_bin,
|
||||||
|
my_propagate_simple
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static MY_CHARSET_HANDLER my_charset_handler=
|
static MY_CHARSET_HANDLER my_charset_handler=
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
@@ -34179,7 +34133,7 @@ struct charset_info_st my_charset_sjis_japanese_ci=
|
|||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_ci_handler
|
&my_collation_handler_sjis_japanese_ci
|
||||||
};
|
};
|
||||||
|
|
||||||
struct charset_info_st my_charset_sjis_bin=
|
struct charset_info_st my_charset_sjis_bin=
|
||||||
@@ -34211,7 +34165,7 @@ struct charset_info_st my_charset_sjis_bin=
|
|||||||
1, /* escape_with_backslash_is_dangerous */
|
1, /* escape_with_backslash_is_dangerous */
|
||||||
1, /* levels_for_order */
|
1, /* levels_for_order */
|
||||||
&my_charset_handler,
|
&my_charset_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_handler_sjis_bin
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
231
strings/strcoll.ic
Normal file
231
strings/strcoll.ic
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2015, MariaDB Foundation
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; version 2 of the License.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef MY_FUNCTION_NAME
|
||||||
|
#error MY_FUNCTION_NAME is not defined
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
The weight for automatically padded spaces when comparing strings with
|
||||||
|
the PAD SPACE property.
|
||||||
|
Should normally be equal to the weight of a regular space.
|
||||||
|
*/
|
||||||
|
#ifndef WEIGHT_PAD_SPACE
|
||||||
|
#define WEIGHT_PAD_SPACE (' ')
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Weight of an illegal byte, must follow these rules:
|
||||||
|
1. Must be greater than weight of any normal character in the collation.
|
||||||
|
2. Two different bad bytes must have different weights and must be
|
||||||
|
compared in their binary order.
|
||||||
|
|
||||||
|
Depends on mbmaxlen of the character set, as well as how the collation
|
||||||
|
sorts various single-byte and multi-byte character blocks.
|
||||||
|
|
||||||
|
The macro below is the default definition, it is suitable for mbmaxlen=2
|
||||||
|
character sets that sort all multi-byte characters after all single-byte
|
||||||
|
characters: big5, euckr, gb2312, gbk.
|
||||||
|
|
||||||
|
All mbmaxlen>2 character sets must provide their own definitions.
|
||||||
|
All collations that have a more complex order (than just MB1 followed by MB2)
|
||||||
|
must also provide their own definitions (see definitions for
|
||||||
|
cp932_japanese_ci and sjis_japanese_ci as examples of a more complex order).
|
||||||
|
*/
|
||||||
|
#ifndef WEIGHT_ILSEQ
|
||||||
|
#define WEIGHT_ILSEQ(x) (0xFF00 + (x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Scan a valid character, or a bad byte, or an auto-padded space
|
||||||
|
from a string and calculate the weight of the scanned sequence.
|
||||||
|
|
||||||
|
@param [OUT] weight - the weight is returned here
|
||||||
|
@param str - the string
|
||||||
|
@param end - the end of the string
|
||||||
|
@return - the number of bytes scanned
|
||||||
|
|
||||||
|
The including source file must define the following macros:
|
||||||
|
IS_MB1_CHAR(x)
|
||||||
|
IS_MB2_CHAR(x,y)
|
||||||
|
WEIGHT_PAD_SPACE
|
||||||
|
WEIGHT_MB1(x)
|
||||||
|
WEIGHT_MB2(x,y)
|
||||||
|
WEIGHT_ILSEQ(x)
|
||||||
|
*/
|
||||||
|
static inline uint
|
||||||
|
MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end)
|
||||||
|
{
|
||||||
|
if (str >= end)
|
||||||
|
{
|
||||||
|
*weight= WEIGHT_PAD_SPACE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IS_MB1_CHAR(*str))
|
||||||
|
{
|
||||||
|
*weight= WEIGHT_MB1(*str); /* A valid single byte character*/
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str + 2 > end) /* The string ended unexpectedly */
|
||||||
|
goto bad; /* Treat as a bad byte */
|
||||||
|
|
||||||
|
if (IS_MB2_CHAR(str[0], str[1]))
|
||||||
|
{
|
||||||
|
*weight= WEIGHT_MB2(str[0], str[1]);
|
||||||
|
return 2; /* A valid two-byte character */
|
||||||
|
}
|
||||||
|
|
||||||
|
bad:
|
||||||
|
*weight= WEIGHT_ILSEQ(str[0]); /* Bad byte */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Compare two strings according to the collation,
|
||||||
|
without handling the PAD SPACE property.
|
||||||
|
|
||||||
|
Note, cs->coll->strnncoll() is usually used to compare identifiers.
|
||||||
|
Perhaps we should eventually (in 10.2?) create a new collation
|
||||||
|
my_charset_utf8_general_ci_no_pad and have only one comparison function
|
||||||
|
in MY_COLLATION_HANDLER.
|
||||||
|
|
||||||
|
@param cs - the character set and collation
|
||||||
|
@param a - the left string
|
||||||
|
@param a_length - the length of the left string
|
||||||
|
@param b - the right string
|
||||||
|
@param b_length - the length of the right string
|
||||||
|
@param b_is_prefix - if the caller wants to check if "b" is a prefix of "a"
|
||||||
|
@return - the comparison result
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
|
const uchar *a, size_t a_length,
|
||||||
|
const uchar *b, size_t b_length,
|
||||||
|
my_bool b_is_prefix)
|
||||||
|
{
|
||||||
|
const uchar *a_end= a + a_length;
|
||||||
|
const uchar *b_end= b + b_length;
|
||||||
|
for ( ; ; )
|
||||||
|
{
|
||||||
|
int a_weight, b_weight, res;
|
||||||
|
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
|
||||||
|
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
|
||||||
|
/*
|
||||||
|
a_wlen b_wlen Comment
|
||||||
|
------ ------ -------
|
||||||
|
0 0 Strings ended simultaneously, "a" and "b" are equal.
|
||||||
|
0 >0 "a" is a prefix of "b", so "a" is smaller.
|
||||||
|
>0 0 "b" is a prefix of "a", check b_is_prefix.
|
||||||
|
>0 >0 Two weights were scanned, check weight difference.
|
||||||
|
*/
|
||||||
|
if (!a_wlen)
|
||||||
|
return b_wlen ? -b_weight : 0;
|
||||||
|
|
||||||
|
if (!b_wlen)
|
||||||
|
return b_is_prefix ? 0 : a_weight;
|
||||||
|
|
||||||
|
if ((res= (a_weight - b_weight)))
|
||||||
|
return res;
|
||||||
|
/*
|
||||||
|
None of the strings has ended yet.
|
||||||
|
*/
|
||||||
|
DBUG_ASSERT(a < a_end);
|
||||||
|
DBUG_ASSERT(b < b_end);
|
||||||
|
a+= a_wlen;
|
||||||
|
b+= b_wlen;
|
||||||
|
}
|
||||||
|
DBUG_ASSERT(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Compare two strings according to the collation, with PAD SPACE handling.
|
||||||
|
|
||||||
|
@param cs - the character set and collation
|
||||||
|
@param a - the left string
|
||||||
|
@param a_length - the length of the left string
|
||||||
|
@param b - the right string
|
||||||
|
@param b_length - the length of the right string
|
||||||
|
@param diff_if_only_endspace_difference - not used in the code.
|
||||||
|
TODO: this should be eventually removed (in 10.2?)
|
||||||
|
@return - the comparison result
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
|
const uchar *a, size_t a_length,
|
||||||
|
const uchar *b, size_t b_length,
|
||||||
|
my_bool diff_if_only_endspace_difference
|
||||||
|
__attribute__((unused)))
|
||||||
|
{
|
||||||
|
const uchar *a_end= a + a_length;
|
||||||
|
const uchar *b_end= b + b_length;
|
||||||
|
for ( ; ; )
|
||||||
|
{
|
||||||
|
int a_weight, b_weight, res;
|
||||||
|
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
|
||||||
|
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
|
||||||
|
if ((res= (a_weight - b_weight)))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Got two different weights. Each weight can be generated by either of:
|
||||||
|
- a real character
|
||||||
|
- a bad byte sequence or an incomplete byte sequence
|
||||||
|
- an auto-generated trailing space (PAD SPACE)
|
||||||
|
It does not matter how exactly each weight was generated.
|
||||||
|
Just return the weight difference.
|
||||||
|
*/
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
if (!a_wlen && !b_wlen)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Got two auto-generated trailing spaces, i.e.
|
||||||
|
both strings have now ended, so they are equal.
|
||||||
|
*/
|
||||||
|
DBUG_ASSERT(a == a_end);
|
||||||
|
DBUG_ASSERT(b == b_end);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
At least one of the strings has not ended yet, continue comparison.
|
||||||
|
*/
|
||||||
|
DBUG_ASSERT(a < a_end || b < b_end);
|
||||||
|
a+= a_wlen;
|
||||||
|
b+= b_wlen;
|
||||||
|
}
|
||||||
|
DBUG_ASSERT(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
We usually include this file at least two times from the same source file,
|
||||||
|
for the _ci and the _bin collations. Prepare for the second inclusion.
|
||||||
|
*/
|
||||||
|
#undef MY_FUNCTION_NAME
|
||||||
|
#undef WEIGHT_ILSEQ
|
||||||
|
#undef WEIGHT_MB1
|
||||||
|
#undef WEIGHT_MB2
|
||||||
|
#undef WEIGHT_PAD_SPACE
|
@@ -95,11 +95,361 @@ static CHARSET_INFO *charset_list[]=
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
const char *a;
|
||||||
|
size_t alen;
|
||||||
|
const char *b;
|
||||||
|
size_t blen;
|
||||||
|
int res;
|
||||||
|
} STRNNCOLL_PARAM;
|
||||||
|
|
||||||
|
|
||||||
|
#define CSTR(x) (x),(sizeof(x)-1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
Byte sequence types used in the tests:
|
||||||
|
8BIT - a 8 bit byte (>=00x80) which makes a single byte characters
|
||||||
|
MB2 - two bytes that make a valid character
|
||||||
|
H2 - a byte which is a valid MB2 head byte
|
||||||
|
T2 - a byte which is a valid MB2 tail byte
|
||||||
|
ILSEQ - a byte which makes an illegal sequence
|
||||||
|
H2+ILSEQ - a sequence that starts with a valid H2 byte,
|
||||||
|
but not followed by a valid T2 byte.
|
||||||
|
|
||||||
|
Charset H2 T2 8BIT
|
||||||
|
------- ---------------- --------------- --------
|
||||||
|
big5 [A1..F9] [40..7E,A1..FE]
|
||||||
|
euckr [81..FE] [41..5A,61..7A,81..FE]
|
||||||
|
gb2312 [A1..F7] [A1..FE]
|
||||||
|
gbk [81..FE] [40..7E,80..FE]
|
||||||
|
|
||||||
|
cp932 [81..9F,E0..FC] [40..7E,80..FC] [A1..DF]
|
||||||
|
sjis [81..9F,E0..FC] [40..7E,80..FC] [A1..DF]
|
||||||
|
|
||||||
|
|
||||||
|
Essential byte sequences in various character sets:
|
||||||
|
|
||||||
|
Sequence big5 cp932 euckr gb2312 gbk sjis
|
||||||
|
-------- ---- ----- ----- ------ --- ----
|
||||||
|
80 ILSEQ ILSEQ ILSEQ ILSEQ ILSEQ ILSEQ
|
||||||
|
81 ILSEQ H2 H2 ILSEQ H2 H2
|
||||||
|
A1 H2 8BIT H2 H2 H2 8BIT
|
||||||
|
A1A1 MB2 8BIT+8BIT MB2 MB2 MB2 8BIT+8BIT
|
||||||
|
E0E0 MB2 MB2 MB2 MB2 MB2 MB2
|
||||||
|
F9FE MB2 H2+ILSEQ MB2 ILSEQ+T2 MB2 H2+ILSEQ
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
For character sets that have the following byte sequences:
|
||||||
|
80 - ILSEQ
|
||||||
|
81 - ILSEQ or H2
|
||||||
|
F9 - ILSEQ or H2
|
||||||
|
A1A1 - MB2 or 8BIT+8BIT
|
||||||
|
E0E0 - MB2
|
||||||
|
*/
|
||||||
|
STRNNCOLL_PARAM strcoll_mb2_common[]=
|
||||||
|
{
|
||||||
|
/* Compare two good sequences */
|
||||||
|
{CSTR(""), CSTR(""), 0},
|
||||||
|
{CSTR(""), CSTR(" "), 0},
|
||||||
|
{CSTR(""), CSTR("A"), -1},
|
||||||
|
{CSTR(""), CSTR("a"), -1},
|
||||||
|
{CSTR(""), CSTR("\xA1\xA1"), -1},
|
||||||
|
{CSTR(""), CSTR("\xE0\xE0"), -1},
|
||||||
|
|
||||||
|
{CSTR(" "), CSTR(""), 0},
|
||||||
|
{CSTR(" "), CSTR(" "), 0},
|
||||||
|
{CSTR(" "), CSTR("A"), -1},
|
||||||
|
{CSTR(" "), CSTR("a"), -1},
|
||||||
|
{CSTR(" "), CSTR("\xA1\xA1"), -1},
|
||||||
|
{CSTR(" "), CSTR("\xE0\xE0"), -1},
|
||||||
|
|
||||||
|
{CSTR("a"), CSTR(""), 1},
|
||||||
|
{CSTR("a"), CSTR(" "), 1},
|
||||||
|
{CSTR("a"), CSTR("a"), 0},
|
||||||
|
{CSTR("a"), CSTR("\xA1\xA1"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xE0\xE0"), -1},
|
||||||
|
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xA1\xA1"), 0},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xE0\xE0"), -1},
|
||||||
|
|
||||||
|
/* Compare a good character to an illegal or an incomplete sequence */
|
||||||
|
{CSTR(""), CSTR("\x80"), -1},
|
||||||
|
{CSTR(""), CSTR("\x81"), -1},
|
||||||
|
{CSTR(""), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{CSTR(" "), CSTR("\x80"), -1},
|
||||||
|
{CSTR(" "), CSTR("\x81"), -1},
|
||||||
|
{CSTR(" "), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{CSTR("a"), CSTR("\x80"), -1},
|
||||||
|
{CSTR("a"), CSTR("\x81"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\x80"), -1},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\x81"), -1},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{CSTR("\xE0\xE0"), CSTR("\x80"), -1},
|
||||||
|
{CSTR("\xE0\xE0"), CSTR("\x81"), -1},
|
||||||
|
{CSTR("\xE0\xE0"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
/* Compare two bad/incomplete sequences */
|
||||||
|
{CSTR("\x80"), CSTR("\x80"), 0},
|
||||||
|
{CSTR("\x80"), CSTR("\x81"), -1},
|
||||||
|
{CSTR("\x80"), CSTR("\xF9"), -1},
|
||||||
|
{CSTR("\x81"), CSTR("\x81"), 0},
|
||||||
|
{CSTR("\x81"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{NULL, 0, NULL, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
For character sets that have good mb2 characters A1A1 and F9FE
|
||||||
|
*/
|
||||||
|
STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
|
||||||
|
{
|
||||||
|
/* Compare two good characters */
|
||||||
|
{CSTR(""), CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR(" "), CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR("a") , CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
|
||||||
|
|
||||||
|
/* Compare a good character to an illegal or an incomplete sequence */
|
||||||
|
{CSTR(""), CSTR("\xA1"), -1},
|
||||||
|
{CSTR(""), CSTR("\xF9"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\x80"), -1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\x81"), -1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
/* Compare two bad/incomplete sequences */
|
||||||
|
{CSTR("\x80"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("\x80"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{NULL, 0, NULL, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
For character sets that have:
|
||||||
|
A1A1 - a good mb2 character
|
||||||
|
F9FE - a bad sequence
|
||||||
|
*/
|
||||||
|
STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
|
||||||
|
{
|
||||||
|
/* Compare a good character to an illegal or an incomplete sequence */
|
||||||
|
{CSTR(""), CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR(" "), CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR("a") , CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
|
||||||
|
|
||||||
|
{CSTR(""), CSTR("\xA1"), -1},
|
||||||
|
{CSTR(""), CSTR("\xF9"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
/* Compare two bad/incomplete sequences */
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\x80"), 1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\x81"), 1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xA1"), 1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xF9"), 1},
|
||||||
|
{CSTR("\x80"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("\x80"), CSTR("\xF9"), -1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
|
||||||
|
|
||||||
|
{NULL, 0, NULL, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
For character sets that have:
|
||||||
|
80 - ILSEQ or H2
|
||||||
|
81 - ILSEQ or H2
|
||||||
|
A1 - 8BIT
|
||||||
|
F9 - ILSEQ or H2
|
||||||
|
F9FE - a bad sequence (ILSEQ+XX or H2+ILSEQ)
|
||||||
|
*/
|
||||||
|
STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
|
||||||
|
{
|
||||||
|
/* Compare two good characters */
|
||||||
|
{CSTR(""), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xA1"), 1},
|
||||||
|
|
||||||
|
/* Compare a good character to an illegal or an incomplete sequence */
|
||||||
|
{CSTR(""), CSTR("\xF9"), -1},
|
||||||
|
{CSTR(""), CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR(" "), CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xF9\xFE"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("a"), CSTR("\xF9"), -1},
|
||||||
|
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
|
||||||
|
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
|
||||||
|
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\x80"), 1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\x81"), 1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xA1"), 1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xF9"), 1},
|
||||||
|
|
||||||
|
{CSTR("\x80"), CSTR("\xA1"), 1},
|
||||||
|
|
||||||
|
/* Compare two bad/incomplete sequences */
|
||||||
|
{CSTR("\x80"), CSTR("\xF9"), -1},
|
||||||
|
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
|
||||||
|
|
||||||
|
{NULL, 0, NULL, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
For character sets (e.g. cp932 and sjis) that have:
|
||||||
|
8181 - a valid MB2 character
|
||||||
|
A1 - a valid 8BIT character
|
||||||
|
E0E0 - a valid MB2 character
|
||||||
|
and sort in this order:
|
||||||
|
8181 < A1 < E0E0
|
||||||
|
*/
|
||||||
|
STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
|
||||||
|
{
|
||||||
|
{CSTR("\x81\x81"), CSTR("\xA1"), -1},
|
||||||
|
{CSTR("\x81\x81"), CSTR("\xE0\xE0"), -1},
|
||||||
|
{CSTR("\xA1"), CSTR("\xE0\xE0"), -1},
|
||||||
|
|
||||||
|
{NULL, 0, NULL, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
|
||||||
|
{
|
||||||
|
char *dstend= dst + dstlen;
|
||||||
|
const char *srcend= src + srclen;
|
||||||
|
for (*dst= '\0' ; dst + 3 < dstend && src < srcend; )
|
||||||
|
{
|
||||||
|
sprintf(dst, "%02X", (unsigned char) src[0]);
|
||||||
|
dst+=2;
|
||||||
|
src++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Check if the two comparison result are semantically equal:
|
||||||
|
both are negative, both are positive, or both are zero.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
eqres(int ares, int bres)
|
||||||
|
{
|
||||||
|
return (ares < 0 && bres < 0) ||
|
||||||
|
(ares > 0 && bres > 0) ||
|
||||||
|
(ares == 0 && bres == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
strcollsp(CHARSET_INFO *cs, const STRNNCOLL_PARAM *param)
|
||||||
|
{
|
||||||
|
int failed= 0;
|
||||||
|
const STRNNCOLL_PARAM *p;
|
||||||
|
diag("%-20s %-10s %-10s %10s %10s", "Collation", "a", "b", "ExpectSign", "Actual");
|
||||||
|
for (p= param; p->a; p++)
|
||||||
|
{
|
||||||
|
char ahex[64], bhex[64];
|
||||||
|
int res= cs->coll->strnncollsp(cs, (uchar *) p->a, p->alen,
|
||||||
|
(uchar *) p->b, p->blen, 0);
|
||||||
|
str2hex(ahex, sizeof(ahex), p->a, p->alen);
|
||||||
|
str2hex(bhex, sizeof(bhex), p->b, p->blen);
|
||||||
|
diag("%-20s %-10s %-10s %10d %10d%s",
|
||||||
|
cs->name, ahex, bhex, p->res, res,
|
||||||
|
eqres(res, p->res) ? "" : " FAILED");
|
||||||
|
if (!eqres(res, p->res))
|
||||||
|
{
|
||||||
|
failed++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Test in reverse order */
|
||||||
|
res= cs->coll->strnncollsp(cs, (uchar *) p->b, p->blen,
|
||||||
|
(uchar *) p->a, p->alen, 0);
|
||||||
|
if (!eqres(res, -p->res))
|
||||||
|
{
|
||||||
|
diag("Comparison in reverse order failed. Expected %d, got %d",
|
||||||
|
-p->res, res);
|
||||||
|
failed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return failed;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
test_strcollsp()
|
||||||
|
{
|
||||||
|
int failed= 0;
|
||||||
|
#ifdef HAVE_CHARSET_big5
|
||||||
|
failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
|
||||||
|
failed+= strcollsp(&my_charset_big5_bin, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_big5_bin, strcoll_mb2_A1A1_mb2_F9FE);
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_cp932
|
||||||
|
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb1_A1_bad_F9FE);
|
||||||
|
failed+= strcollsp(&my_charset_cp932_bin, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_cp932_bin, strcoll_mb1_A1_bad_F9FE);
|
||||||
|
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_8181_A1_E0E0);
|
||||||
|
failed+= strcollsp(&my_charset_cp932_bin, strcoll_8181_A1_E0E0);
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_euckr
|
||||||
|
failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_A1A1_mb2_F9FE);
|
||||||
|
failed+= strcollsp(&my_charset_euckr_bin, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_euckr_bin, strcoll_mb2_A1A1_mb2_F9FE);
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_gb2312
|
||||||
|
failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_A1A1_bad_F9FE);
|
||||||
|
failed+= strcollsp(&my_charset_gb2312_bin, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_gb2312_bin, strcoll_mb2_A1A1_bad_F9FE);
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_gbk
|
||||||
|
failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
|
||||||
|
failed+= strcollsp(&my_charset_gbk_bin, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_gbk_bin, strcoll_mb2_A1A1_mb2_F9FE);
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_CHARSET_sjis
|
||||||
|
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_sjis_bin, strcoll_mb2_common);
|
||||||
|
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb1_A1_bad_F9FE);
|
||||||
|
failed+= strcollsp(&my_charset_sjis_bin, strcoll_mb1_A1_bad_F9FE);
|
||||||
|
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_8181_A1_E0E0);
|
||||||
|
failed+= strcollsp(&my_charset_sjis_bin, strcoll_8181_A1_E0E0);
|
||||||
|
#endif
|
||||||
|
return failed;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
size_t i, failed= 0;
|
size_t i, failed= 0;
|
||||||
|
|
||||||
plan(1);
|
plan(2);
|
||||||
diag("Testing my_like_range_xxx() functions");
|
diag("Testing my_like_range_xxx() functions");
|
||||||
|
|
||||||
for (i= 0; i < array_elements(charset_list); i++)
|
for (i= 0; i < array_elements(charset_list); i++)
|
||||||
@@ -112,5 +462,10 @@ int main()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
ok(failed == 0, "Testing my_like_range_xxx() functions");
|
ok(failed == 0, "Testing my_like_range_xxx() functions");
|
||||||
|
|
||||||
|
diag("Testing cs->coll->strnncollsp()");
|
||||||
|
failed= test_strcollsp();
|
||||||
|
ok(failed == 0, "Testing cs->coll->strnncollsp()");
|
||||||
|
|
||||||
return exit_status();
|
return exit_status();
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user