mirror of
https://github.com/MariaDB/server.git
synced 2026-01-06 05:22:24 +03:00
Adding a shared include file ctype-mb.ic and removing a number
of very similar copies of my_well_formed_len_xxx(), implemented for big5, cp932, euckr, eucjpms, gb2312m gbk, sjis, ujis.
This commit is contained in:
@@ -34,6 +34,7 @@
|
||||
|
||||
/*
|
||||
Support for Chinese(BIG5) characters, by jou@nematic.ieo.nctu.edu.tw
|
||||
CP950 and HKSCS additional characters are also accepted.
|
||||
modified by Wei He (hewei@mail.ied.ac.cn)
|
||||
modified by Alex Barkov <bar@udm.net>
|
||||
*/
|
||||
@@ -47,6 +48,12 @@
|
||||
#define big5head(e) ((uchar)(e>>8))
|
||||
#define big5tail(e) ((uchar)(e&0xff))
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
|
||||
#define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static const uchar ctype_big5[257] =
|
||||
{
|
||||
0, /* For standard library */
|
||||
@@ -6843,42 +6850,6 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Returns a well formed length of a BIG5 string.
|
||||
CP950 and HKSCS additional characters are also accepted.
|
||||
*/
|
||||
static
|
||||
size_t my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
const char *emb= e - 1; /* Last possible end of an MB character */
|
||||
|
||||
*error= 0;
|
||||
while (pos-- && b < e)
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if ((b < emb) && isbig5code((uchar)*b, (uchar)b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t) (b - b0);
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
||||
@@ -176,10 +176,18 @@ static const uchar sort_order_cp932[]=
|
||||
(uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377'
|
||||
};
|
||||
|
||||
#define iscp932head(c) ((0x81<=(c) && (c)<=0x9f) || \
|
||||
((0xe0<=(c)) && (c)<=0xfc))
|
||||
#define iscp932tail(c) ((0x40<=(c) && (c)<=0x7e) || \
|
||||
(0x80<=(c) && (c)<=0xfc))
|
||||
#define iscp932head(c) ((0x81 <= (uchar) (c) && (uchar) (c) <= 0x9f) || \
|
||||
(0xe0 <= (uchar) (c) && (uchar) (c) <= 0xfc))
|
||||
#define iscp932tail(c) ((0x40 <= (uchar) (c) && (uchar) (c) <= 0x7e) || \
|
||||
(0x80 <= (uchar) (c) && (uchar) (c) <= 0xfc))
|
||||
|
||||
#define iscp932kata(c) (0xA1 <= (uchar) (c) && (uchar) (c) <= 0xDF)
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932
|
||||
#define IS_8BIT_CHAR(x) iscp932kata(x)
|
||||
#define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint ismbchar_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
||||
@@ -34711,50 +34719,6 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
||||
return clen;
|
||||
}
|
||||
|
||||
/*
|
||||
Returns a well formed length of a cp932 string.
|
||||
cp932 additional characters are also accepted.
|
||||
*/
|
||||
|
||||
static
|
||||
size_t my_well_formed_len_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
*error= 0;
|
||||
while (pos-- && b < e)
|
||||
{
|
||||
/*
|
||||
Cast to int8 for extra safety.
|
||||
"char" can be unsigned by default
|
||||
on some platforms.
|
||||
*/
|
||||
if (((int8)b[0]) >= 0)
|
||||
{
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if (iscp932head((uchar)*b) && (e-b)>1 && iscp932tail((uchar)b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
}
|
||||
else if (((uchar)*b) >= 0xA1 && ((uchar)*b) <= 0xDF)
|
||||
{
|
||||
/* Half width kana */
|
||||
b++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t) (b - b0);
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
{
|
||||
|
||||
@@ -202,6 +202,12 @@ static const uchar sort_order_euc_kr[]=
|
||||
iseuc_kr_tail3(c))
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr
|
||||
#define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char* p, const char *e)
|
||||
{
|
||||
@@ -9929,41 +9935,6 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Returns well formed length of a EUC-KR string.
|
||||
*/
|
||||
static size_t
|
||||
my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
const char *emb= e - 1; /* Last possible end of an MB character */
|
||||
|
||||
*error= 0;
|
||||
while (pos-- && b < e)
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if (b < emb && iseuc_kr_head(*b) && iseuc_kr_tail(b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t) (b - b0);
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
||||
@@ -180,10 +180,26 @@ static const uchar sort_order_eucjpms[]=
|
||||
};
|
||||
|
||||
|
||||
#define iseucjpms(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xfe))
|
||||
#define iskata(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xdf))
|
||||
#define iseucjpms_ss2(c) (((c)&0xff) == 0x8e)
|
||||
#define iseucjpms_ss3(c) (((c)&0xff) == 0x8f)
|
||||
/*
|
||||
EUCJPMS encoding subcomponents:
|
||||
[x00-x7F] # ASCII/JIS-Roman (one-byte/character)
|
||||
[x8E][xA1-xDF] # half-width katakana (two bytes/char)
|
||||
[x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
|
||||
[xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
|
||||
*/
|
||||
#define iseucjpms(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xfe)
|
||||
#define iskata(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xdf)
|
||||
#define iseucjpms_ss2(c) ((uchar) (c) == 0x8e)
|
||||
#define iseucjpms_ss3(c) ((uchar) (c) == 0x8f)
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms
|
||||
#define IS_MB2_JIS(x,y) (iseucjpms(x) && iseucjpms(y))
|
||||
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
|
||||
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
||||
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
|
||||
#define WELL_FORMED_LEN
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
|
||||
@@ -67416,61 +67432,6 @@ my_wc_mb_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
EUCJPMS encoding subcomponents:
|
||||
[x00-x7F] # ASCII/JIS-Roman (one-byte/character)
|
||||
[x8E][xA1-xDF] # half-width katakana (two bytes/char)
|
||||
[x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
|
||||
[xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
|
||||
*/
|
||||
|
||||
static
|
||||
size_t my_well_formed_len_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *beg, const char *end, size_t pos,
|
||||
int *error)
|
||||
{
|
||||
const uchar *b= (uchar *) beg;
|
||||
*error=0;
|
||||
|
||||
for ( ; pos && b < (uchar*) end; pos--, b++)
|
||||
{
|
||||
char *chbeg;
|
||||
uint ch= *b;
|
||||
|
||||
if (ch <= 0x7F) /* one byte */
|
||||
continue;
|
||||
|
||||
chbeg= (char *) b++;
|
||||
if (b >= (uchar *) end) /* need more bytes */
|
||||
return (uint) (chbeg - beg); /* unexpected EOL */
|
||||
|
||||
if (iseucjpms_ss2(ch)) /* [x8E][xA1-xDF] */
|
||||
{
|
||||
if (iskata(*b))
|
||||
continue;
|
||||
*error=1;
|
||||
return (uint) (chbeg - beg); /* invalid sequence */
|
||||
}
|
||||
|
||||
if (iseucjpms_ss3(ch)) /* [x8F][xA1-xFE][xA1-xFE] */
|
||||
{
|
||||
ch= *b++;
|
||||
if (b >= (uchar*) end)
|
||||
{
|
||||
*error= 1;
|
||||
return (uint)(chbeg - beg); /* unexpected EOL */
|
||||
}
|
||||
}
|
||||
|
||||
if (iseucjpms(ch) && iseucjpms(*b)) /* [xA1-xFE][xA1-xFE] */
|
||||
continue;
|
||||
*error=1;
|
||||
return (size_t) (chbeg - beg); /* invalid sequence */
|
||||
}
|
||||
return (size_t) (b - (uchar *) beg);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
size_t my_numcells_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *str, const char *str_end)
|
||||
|
||||
@@ -165,6 +165,12 @@ static const uchar sort_order_gb2312[]=
|
||||
#define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312
|
||||
#define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint ismbchar_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char* p, const char *e)
|
||||
{
|
||||
@@ -6332,41 +6338,6 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Returns well formed length of a EUC-KR string.
|
||||
*/
|
||||
static size_t
|
||||
my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
const char *emb= e - 1; /* Last possible end of an MB character */
|
||||
|
||||
*error= 0;
|
||||
while (pos-- && b < e)
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t) (b - b0);
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
||||
@@ -43,6 +43,12 @@
|
||||
#define gbkhead(e) ((uchar)(e>>8))
|
||||
#define gbktail(e) ((uchar)(e&0xff))
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk
|
||||
#define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static const uchar ctype_gbk[257] =
|
||||
{
|
||||
0, /* For standard library */
|
||||
@@ -10726,43 +10732,6 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Returns well formed length of a GBK string.
|
||||
*/
|
||||
static
|
||||
size_t my_well_formed_len_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
const char *emb= e - 1; /* Last possible end of an MB character */
|
||||
|
||||
*error= 0;
|
||||
while (pos-- && b < e)
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if ((b < emb) && isgbkcode((uchar)*b, (uchar)b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t) (b - b0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
||||
94
strings/ctype-mb.ic
Normal file
94
strings/ctype-mb.ic
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright (c) 2015, MariaDB Foundation
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
|
||||
#ifndef MY_FUNCTION_NAME
|
||||
#error MY_FUNCTION_NAME is not defined
|
||||
#endif
|
||||
|
||||
#if defined(IS_MB3_CHAR) && !defined(IS_MB2_CHAR)
|
||||
#error IS_MB3_CHAR is defined, while IS_MB2_CHAR is not!
|
||||
#endif
|
||||
|
||||
#if defined(IS_MB4_CHAR) && !defined(IS_MB3_CHAR)
|
||||
#error IS_MB4_CHAR is defined, while IS_MB3_CHAR is not!
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef WELL_FORMED_LEN
|
||||
/**
|
||||
Returns well formed length of a character string with
|
||||
variable character length for character sets with:
|
||||
- mbminlen == 1
|
||||
- mbmaxlen == 2, 3, or 4
|
||||
*/
|
||||
static size_t
|
||||
MY_FUNCTION_NAME(well_formed_len)(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t nchars, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
|
||||
DBUG_ASSERT(cs->mbminlen == 1);
|
||||
DBUG_ASSERT(cs->mbmaxlen <= 4);
|
||||
|
||||
for (*error= 0 ; b < e && nchars-- ; )
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
b++; /* Single byte ASCII character */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
|
||||
{
|
||||
b+= 2; /* Double byte character */
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef IS_MB3_CHAR
|
||||
if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
|
||||
{
|
||||
b+= 3; /* Three-byte character */
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_MB4_CHAR
|
||||
if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
|
||||
{
|
||||
b+= 4; /* Four-byte character */
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_8BIT_CHAR
|
||||
if (IS_8BIT_CHAR(b[0]))
|
||||
{
|
||||
b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
return b - b0;
|
||||
}
|
||||
|
||||
#endif /* WELL_FORMED_LEN */
|
||||
@@ -176,10 +176,19 @@ static const uchar sort_order_sjis[]=
|
||||
(uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377'
|
||||
};
|
||||
|
||||
#define issjishead(c) ((0x81<=(c) && (c)<=0x9f) || \
|
||||
((0xe0<=(c)) && (c)<=0xfc))
|
||||
#define issjistail(c) ((0x40<=(c) && (c)<=0x7e) || \
|
||||
(0x80<=(c) && (c)<=0xfc))
|
||||
#define issjishead(c) ((0x81 <= (uchar) (c) && (uchar) (c) <= 0x9f) || \
|
||||
(0xe0 <= (uchar) (c) && (uchar) (c) <= 0xfc))
|
||||
#define issjistail(c) ((0x40 <= (uchar) (c) && (uchar) (c) <= 0x7e) || \
|
||||
(0x80 <= (uchar) (c) && (uchar) (c) <= 0xfc))
|
||||
|
||||
#define issjiskata(c) ((0xA1 <= (uchar) (c) && (uchar) (c) <= 0xDF))
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
|
||||
#define IS_8BIT_CHAR(x) issjiskata(x)
|
||||
#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
||||
@@ -34089,44 +34098,6 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
||||
return clen;
|
||||
}
|
||||
|
||||
/*
|
||||
Returns a well formed length of a SJIS string.
|
||||
CP932 additional characters are also accepted.
|
||||
*/
|
||||
static
|
||||
size_t my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
*error= 0;
|
||||
while (pos-- && b < e)
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if (issjishead((uchar)*b) && (e-b)>1 && issjistail((uchar)b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
}
|
||||
else if (((uchar)*b) >= 0xA1 && ((uchar)*b) <= 0xDF)
|
||||
{
|
||||
/* Half width kana */
|
||||
b++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Wrong byte sequence */
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t) (b - b0);
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
{
|
||||
|
||||
@@ -179,10 +179,26 @@ static const uchar sort_order_ujis[]=
|
||||
};
|
||||
|
||||
|
||||
#define isujis(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xfe))
|
||||
#define iskata(c) ((0xa1<=((c)&0xff) && ((c)&0xff)<=0xdf))
|
||||
#define isujis_ss2(c) (((c)&0xff) == 0x8e)
|
||||
#define isujis_ss3(c) (((c)&0xff) == 0x8f)
|
||||
/*
|
||||
EUC-JP encoding subcomponents:
|
||||
[x00-x7F] # ASCII/JIS-Roman (one-byte/character)
|
||||
[x8E][xA1-xDF] # half-width katakana (two bytes/char)
|
||||
[x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
|
||||
[xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
|
||||
*/
|
||||
|
||||
#define isujis(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xfe)
|
||||
#define iskata(c) (0xa1 <= (uchar) (c) && (uchar) (c) <= 0xdf)
|
||||
#define isujis_ss2(c) ((uchar) (c) == 0x8e)
|
||||
#define isujis_ss3(c) ((uchar) (c) == 0x8f)
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis
|
||||
#define IS_MB2_JIS(x,y) (isujis(x) && isujis(y))
|
||||
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
|
||||
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
||||
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
|
||||
#define WELL_FORMED_LEN
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)),
|
||||
@@ -201,63 +217,6 @@ static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
EUC-JP encoding subcomponents:
|
||||
[x00-x7F] # ASCII/JIS-Roman (one-byte/character)
|
||||
[x8E][xA1-xDF] # half-width katakana (two bytes/char)
|
||||
[x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
|
||||
[xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
|
||||
*/
|
||||
|
||||
static
|
||||
size_t my_well_formed_len_ujis(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *beg, const char *end,
|
||||
size_t pos, int *error)
|
||||
{
|
||||
const uchar *b= (uchar *) beg;
|
||||
|
||||
for ( *error= 0 ; pos && b < (uchar*) end; pos--, b++)
|
||||
{
|
||||
char *chbeg;
|
||||
uint ch= *b;
|
||||
|
||||
if (ch <= 0x7F) /* one byte */
|
||||
continue;
|
||||
|
||||
chbeg= (char *) b++;
|
||||
if (b >= (uchar *) end) /* need more bytes */
|
||||
{
|
||||
*error= 1;
|
||||
return (size_t) (chbeg - beg); /* unexpected EOL */
|
||||
}
|
||||
|
||||
if (isujis_ss2(ch)) /* [x8E][xA1-xDF] */
|
||||
{
|
||||
if (iskata(*b))
|
||||
continue;
|
||||
*error= 1;
|
||||
return (size_t) (chbeg - beg); /* invalid sequence */
|
||||
}
|
||||
|
||||
if (isujis_ss3(ch)) /* [x8F][xA1-xFE][xA1-xFE] */
|
||||
{
|
||||
ch= *b++;
|
||||
if (b >= (uchar*) end)
|
||||
{
|
||||
*error= 1;
|
||||
return (size_t) (chbeg - beg); /* unexpected EOL */
|
||||
}
|
||||
}
|
||||
|
||||
if (isujis(ch) && isujis(*b)) /* [xA1-xFE][xA1-xFE] */
|
||||
continue;
|
||||
*error= 1;
|
||||
return (size_t) (chbeg - beg); /* invalid sequence */
|
||||
}
|
||||
return (size_t) (b - (uchar *) beg);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
size_t my_numcells_eucjp(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *str, const char *str_end)
|
||||
|
||||
Reference in New Issue
Block a user