diff --git a/include/m_ctype.h b/include/m_ctype.h index 66f1a02b7ab..4ed5c6eec6a 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -112,6 +112,8 @@ typedef struct charset_info_st int (*ismbchar)(struct charset_info_st *, const char *, const char *); my_bool (*ismbhead)(struct charset_info_st *, uint); int (*mbcharlen)(struct charset_info_st *, uint); + uint (*numchars)(struct charset_info_st *, const char *b, const char *e); + uint (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos); /* Unicode convertion */ int (*mb_wc)(struct charset_info_st *cs,my_wc_t *wc, @@ -252,6 +254,9 @@ int my_wildcmp_8bit(CHARSET_INFO *, const char *wildstr,const char *wildend, int escape, int w_one, int w_many); +uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e); +uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos); + #ifdef USE_MB /* Functions for multibyte charsets */ @@ -266,6 +271,9 @@ int my_wildcmp_mb(CHARSET_INFO *, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many); +uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e); +uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos); + #endif #define _U 01 /* Upper case */ diff --git a/mysys/charset.c b/mysys/charset.c index 5f95889a231..a192eab8eb6 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -84,6 +84,8 @@ static void simple_cs_init_functions(CHARSET_INFO *cs) cs->strntod = my_strntod_8bit; cs->scan = my_scan_8bit; cs->mbmaxlen = 1; + cs->numchars = my_numchars_8bit; + cs->charpos = my_charpos_8bit; } diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 89a25666bd8..7827b8714ec 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6254,6 +6254,8 @@ CHARSET_INFO my_charset_big5 = ismbchar_big5, ismbhead_big5, mbcharlen_big5, + my_numchars_mb, + my_charpos_mb, my_mb_wc_big5, /* mb_wc */ my_wc_mb_big5, /* wc_mb */ my_caseup_str_mb, diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 1b9c67c10a9..4d428828826 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -292,6 +292,8 @@ CHARSET_INFO my_charset_bin = NULL, /* ismbchar */ NULL, /* ismbhead */ NULL, /* mbcharlen */ + my_numchars_8bit, + my_charpos_8bit, my_mb_wc_bin, /* mb_wc */ my_wc_mb_bin, /* wc_mb */ my_caseup_str_bin, /* caseup_str */ diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 9d6b8fe2131..f42ab66fed9 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -628,6 +628,8 @@ CHARSET_INFO my_charset_czech = NULL, /* ismbchar */ NULL, /* ismbhead */ NULL, /* mbcharlen */ + my_numchars_8bit, + my_charpos_8bit, my_mb_wc_8bit, /* mb_wc */ my_wc_mb_8bit, /* wc_mb */ my_caseup_str_8bit, diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 938f215e2de..e49bf6a38fd 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8662,6 +8662,8 @@ CHARSET_INFO my_charset_euc_kr = ismbchar_euc_kr, ismbhead_euc_kr, mbcharlen_euc_kr, + my_numchars_mb, + my_charpos_mb, my_mb_wc_euc_kr, /* mb_wc */ my_wc_mb_euc_kr, /* wc_mb */ my_caseup_str_mb, diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index 6100c74860b..3e3cd38c063 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -2828,6 +2828,8 @@ CHARSET_INFO compiled_charsets[] = { NULL, /* ismbchar */ NULL, /* ismbhead */ NULL, /* mbcharlen */ + my_numchars_8bit, + my_charpos_8bit, my_mb_wc_8bit, /* mb_wc */ my_wc_mb_8bit, /* wc_mb */ my_caseup_str_8bit, @@ -2877,6 +2879,8 @@ CHARSET_INFO compiled_charsets[] = { NULL, /* ismbchar */ NULL, /* ismbhead */ NULL, /* mbcharlen */ + my_numchars_8bit, + my_charpos_8bit, my_mb_wc_8bit, /* mb_wc */ my_wc_mb_8bit, /* wc_mb */ my_caseup_str_8bit, @@ -2925,6 +2929,8 @@ CHARSET_INFO compiled_charsets[] = { NULL, /* ismbchar */ NULL, /* ismbhead */ NULL, /* mbcharlen */ + my_numchars_8bit, + my_charpos_8bit, my_mb_wc_8bit, /* mb_wc */ my_wc_mb_8bit, /* wc_mb */ my_caseup_str_8bit, @@ -3937,7 +3943,8 @@ CHARSET_INFO compiled_charsets[] = { NULL, NULL, NULL, - + NULL, + NULL, NULL, /* mb_wc */ NULL, /* wc_mb */ diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index ff2dcd381c1..9cc19dc46ef 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5712,6 +5712,8 @@ CHARSET_INFO my_charset_gb2312 = ismbchar_gb2312, ismbhead_gb2312, mbcharlen_gb2312, + my_numchars_mb, + my_charpos_mb, my_mb_wc_gb2312, /* mb_wc */ my_wc_mb_gb2312, /* wc_mb */ my_caseup_str_mb, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index a7a29a4af7a..42a3686475a 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -9909,6 +9909,8 @@ CHARSET_INFO my_charset_gbk = ismbchar_gbk, ismbhead_gbk, mbcharlen_gbk, + my_numchars_mb, + my_charpos_mb, my_mb_wc_gbk, /* mb_wc */ my_wc_mb_gbk, /* wc_mb */ my_caseup_str_mb, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 588eff0cab1..518418a9050 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -198,6 +198,8 @@ CHARSET_INFO my_charset_latin1 = NULL, /* ismbchar */ NULL, /* ismbhead */ NULL, /* mbcharlen */ + my_numchars_8bit, + my_charpos_8bit, my_mb_wc_latin1, /* mb_wc */ my_wc_mb_latin1, /* wc_mb */ my_caseup_str_8bit, diff --git a/strings/ctype-latin1_de.c b/strings/ctype-latin1_de.c index 0a6de1ca86b..93c8ba54bc0 100644 --- a/strings/ctype-latin1_de.c +++ b/strings/ctype-latin1_de.c @@ -369,6 +369,8 @@ CHARSET_INFO my_charset_latin1_de = NULL, /* ismbchar */ NULL, /* ismbhead */ NULL, /* mbcharlen */ + my_numchars_8bit, + my_charpos_8bit, my_mb_wc_8bit, /* mb_wc */ my_wc_mb_8bit, /* wc_mb */ my_caseup_str_8bit, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 4c8471d4217..f5b7000cb18 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -276,5 +276,33 @@ int my_wildcmp_mb(CHARSET_INFO *cs, return (str != str_end ? 1 : 0); } +uint my_numchars_mb(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e) +{ + register uint32 n=0,mblen; + while (b < e) + { + b+= (mblen= my_ismbchar(cs,b,e)) ? mblen : 1; + ++n; + } + return n; +} + +uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused)), + uint pos) +{ + uint res=0, mblen; + const char *b0; + + while (pos && b