mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
Regex library is switched to use new ctype tools
to allow usage of many character sets at a time. include/m_ctype.h: Added condition to simplify migrating from old ctype Added new style toupper, tolower which accepts charset in first argument regex/debug.c: Added charset argument regex/debug.ih: added charset argument regex/engine.c: added charset argument regex/engine.ih: added charset arguent regex/main.c: added charset argument regex/regcomp.c: added CHARSET_INFO field regex/regcomp.ih: Added charset argument regex/regex.h: Added #include <m_ctype.h> for CHARSET_INFO Added charset argument for regcomp() regex/regex2.h: New charset argument for ISWORD() regex/regexec.c: New charset argument regex/reginit.c: Move to new style ctype. However still needs fixes: instead of single static cclass variable, each charset must have it's own variable. sql/item_cmpfunc.cc: Pass charset field into regcomp() This will be fixed tommorow to use String->charset instead of default_charset_info
This commit is contained in:
@ -180,6 +180,7 @@ extern const char *compiled_charset_name(uint charset_number);
|
||||
#define _B 0100 /* Blank */
|
||||
#define _X 0200 /* heXadecimal digit */
|
||||
|
||||
#ifndef HIDE_OLD_CTYPE
|
||||
#define my_ctype (default_charset_info->ctype)
|
||||
#define my_to_upper (default_charset_info->to_upper)
|
||||
#define my_to_lower (default_charset_info->to_lower)
|
||||
@ -201,6 +202,8 @@ extern const char *compiled_charset_name(uint charset_number);
|
||||
#define isprint(c) ((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N | _B))
|
||||
#define isgraph(c) ((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N))
|
||||
#define iscntrl(c) ((my_ctype+1)[(uchar) (c)] & _C)
|
||||
#endif
|
||||
|
||||
#define isascii(c) (!((c) & ~0177))
|
||||
#define toascii(c) ((c) & 0177)
|
||||
|
||||
@ -208,6 +211,8 @@ extern const char *compiled_charset_name(uint charset_number);
|
||||
#undef ctype
|
||||
#endif /* ctype */
|
||||
|
||||
#define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)])
|
||||
#define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)])
|
||||
#define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_U | _L))
|
||||
#define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _U)
|
||||
#define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _L)
|
||||
|
@ -45,7 +45,7 @@ FILE *d;
|
||||
if (g->nplus > 0)
|
||||
fprintf(d, ", nplus %ld", (long)g->nplus);
|
||||
fprintf(d, "\n");
|
||||
s_print(g, d);
|
||||
s_print(r->charset, g, d);
|
||||
for (i = 0; i < g->ncategories; i++) {
|
||||
nincat[i] = 0;
|
||||
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
|
||||
@ -58,7 +58,7 @@ FILE *d;
|
||||
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
|
||||
if (g->categories[c] == i)
|
||||
break;
|
||||
fprintf(d, ", %d=%s", i, regchar(c,buf));
|
||||
fprintf(d, ", %d=%s", i, regchar(r->charset,c,buf));
|
||||
}
|
||||
fprintf(d, "\n");
|
||||
for (i = 1; i < g->ncategories; i++)
|
||||
@ -68,14 +68,14 @@ FILE *d;
|
||||
for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */
|
||||
if (c <= CHAR_MAX && g->categories[c] == i) {
|
||||
if (last < 0) {
|
||||
fprintf(d, "%s", regchar(c,buf));
|
||||
fprintf(d, "%s", regchar(r->charset,c,buf));
|
||||
last = c;
|
||||
}
|
||||
} else {
|
||||
if (last >= 0) {
|
||||
if (last != c-1)
|
||||
fprintf(d, "-%s",
|
||||
regchar(c-1,buf));
|
||||
regchar(r->charset,c-1,buf));
|
||||
last = -1;
|
||||
}
|
||||
}
|
||||
@ -88,7 +88,8 @@ FILE *d;
|
||||
== static void s_print(register struct re_guts *g, FILE *d);
|
||||
*/
|
||||
static void
|
||||
s_print(g, d)
|
||||
s_print(charset, g, d)
|
||||
CHARSET_INFO *charset;
|
||||
register struct re_guts *g;
|
||||
FILE *d;
|
||||
{
|
||||
@ -127,7 +128,7 @@ FILE *d;
|
||||
if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
|
||||
fprintf(d, "\\%c", (char)opnd);
|
||||
else
|
||||
fprintf(d, "%s", regchar((char)opnd,buf));
|
||||
fprintf(d, "%s", regchar(charset,(char)opnd,buf));
|
||||
break;
|
||||
case OBOL:
|
||||
fprintf(d, "^");
|
||||
@ -151,14 +152,14 @@ FILE *d;
|
||||
for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */
|
||||
if (CHIN(cs, i) && i < g->csetsize) {
|
||||
if (last < 0) {
|
||||
fprintf(d, "%s", regchar(i,buf));
|
||||
fprintf(d, "%s", regchar(charset,i,buf));
|
||||
last = i;
|
||||
}
|
||||
} else {
|
||||
if (last >= 0) {
|
||||
if (last != i-1)
|
||||
fprintf(d, "-%s",
|
||||
regchar(i-1,buf));
|
||||
regchar(charset,i-1,buf));
|
||||
last = -1;
|
||||
}
|
||||
}
|
||||
@ -230,12 +231,13 @@ FILE *d;
|
||||
== static char *regchar(int ch);
|
||||
*/
|
||||
static char * /* -> representation */
|
||||
regchar(ch,buf)
|
||||
regchar(charset,ch,buf)
|
||||
CHARSET_INFO *charset;
|
||||
int ch;
|
||||
char *buf;
|
||||
{
|
||||
|
||||
if (isprint(ch) || ch == ' ')
|
||||
if (my_isprint(charset,ch) || ch == ' ')
|
||||
sprintf(buf, "%c", ch);
|
||||
else
|
||||
sprintf(buf, "\\%o", ch);
|
||||
|
@ -5,8 +5,8 @@ extern "C" {
|
||||
|
||||
/* === debug.c === */
|
||||
void regprint(regex_t *r, FILE *d);
|
||||
static void s_print(register struct re_guts *g, FILE *d);
|
||||
static char *regchar(int ch,char *buf);
|
||||
static void s_print(CHARSET_INFO *charset, register struct re_guts *g, FILE *d);
|
||||
static char *regchar(CHARSET_INFO *charset, int ch,char *buf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -63,7 +63,8 @@ struct match {
|
||||
== size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
*/
|
||||
static int /* 0 success, REG_NOMATCH failure */
|
||||
matcher(g, str, nmatch, pmatch, eflags)
|
||||
matcher(charset,g, str, nmatch, pmatch, eflags)
|
||||
CHARSET_INFO *charset;
|
||||
register struct re_guts *g;
|
||||
char *str;
|
||||
size_t nmatch;
|
||||
@ -120,7 +121,7 @@ int eflags;
|
||||
|
||||
/* this loop does only one repetition except for backrefs */
|
||||
for (;;) {
|
||||
endp = fast(m, start, stop, gf, gl);
|
||||
endp = fast(charset, m, start, stop, gf, gl);
|
||||
if (endp == NULL) { /* a miss */
|
||||
if (m->pmatch != NULL)
|
||||
free((char *)m->pmatch);
|
||||
@ -136,7 +137,7 @@ int eflags;
|
||||
assert(m->coldp != NULL);
|
||||
for (;;) {
|
||||
NOTE("finding start");
|
||||
endp = slow(m, m->coldp, stop, gf, gl);
|
||||
endp = slow(charset, m, m->coldp, stop, gf, gl);
|
||||
if (endp != NULL)
|
||||
break;
|
||||
assert(m->coldp < m->endp);
|
||||
@ -159,7 +160,7 @@ int eflags;
|
||||
m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
|
||||
if (!g->backrefs && !(m->eflags®_BACKR)) {
|
||||
NOTE("dissecting");
|
||||
dp = dissect(m, m->coldp, endp, gf, gl);
|
||||
dp = dissect(charset, m, m->coldp, endp, gf, gl);
|
||||
} else {
|
||||
if (g->nplus > 0 && m->lastpos == NULL)
|
||||
m->lastpos = (char **)malloc((g->nplus+1) *
|
||||
@ -170,7 +171,7 @@ int eflags;
|
||||
return(REG_ESPACE);
|
||||
}
|
||||
NOTE("backref dissect");
|
||||
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
|
||||
dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
|
||||
}
|
||||
if (dp != NULL)
|
||||
break;
|
||||
@ -182,7 +183,7 @@ int eflags;
|
||||
if (dp != NULL || endp <= m->coldp)
|
||||
break; /* defeat */
|
||||
NOTE("backoff");
|
||||
endp = slow(m, m->coldp, endp-1, gf, gl);
|
||||
endp = slow(charset, m, m->coldp, endp-1, gf, gl);
|
||||
if (endp == NULL)
|
||||
break; /* defeat */
|
||||
/* try it on a shorter possibility */
|
||||
@ -193,7 +194,7 @@ int eflags;
|
||||
}
|
||||
#endif
|
||||
NOTE("backoff dissect");
|
||||
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
|
||||
dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
|
||||
}
|
||||
assert(dp == NULL || dp == endp);
|
||||
if (dp != NULL) /* found a shorter one */
|
||||
@ -235,7 +236,8 @@ int eflags;
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* == stop (success) always */
|
||||
dissect(m, start, stop, startst, stopst)
|
||||
dissect(charset, m, start, stop, startst, stopst)
|
||||
CHARSET_INFO *charset;
|
||||
register struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
@ -299,10 +301,10 @@ sopno stopst;
|
||||
stp = stop;
|
||||
for (;;) {
|
||||
/* how long could this one be? */
|
||||
rest = slow(m, sp, stp, ss, es);
|
||||
rest = slow(charset, m, sp, stp, ss, es);
|
||||
assert(rest != NULL); /* it did match */
|
||||
/* could the rest match the rest? */
|
||||
tail = slow(m, rest, stop, es, stopst);
|
||||
tail = slow(charset, m, rest, stop, es, stopst);
|
||||
if (tail == stop)
|
||||
break; /* yes! */
|
||||
/* no -- try a shorter match for this one */
|
||||
@ -312,8 +314,8 @@ sopno stopst;
|
||||
ssub = ss + 1;
|
||||
esub = es - 1;
|
||||
/* did innards match? */
|
||||
if (slow(m, sp, rest, ssub, esub) != NULL) {
|
||||
dp = dissect(m, sp, rest, ssub, esub);
|
||||
if (slow(charset, m, sp, rest, ssub, esub) != NULL) {
|
||||
dp = dissect(charset, m, sp, rest, ssub, esub);
|
||||
assert(dp == rest);
|
||||
} else /* no */
|
||||
assert(sp == rest);
|
||||
@ -323,10 +325,10 @@ sopno stopst;
|
||||
stp = stop;
|
||||
for (;;) {
|
||||
/* how long could this one be? */
|
||||
rest = slow(m, sp, stp, ss, es);
|
||||
rest = slow(charset, m, sp, stp, ss, es);
|
||||
assert(rest != NULL); /* it did match */
|
||||
/* could the rest match the rest? */
|
||||
tail = slow(m, rest, stop, es, stopst);
|
||||
tail = slow(charset, m, rest, stop, es, stopst);
|
||||
if (tail == stop)
|
||||
break; /* yes! */
|
||||
/* no -- try a shorter match for this one */
|
||||
@ -338,7 +340,7 @@ sopno stopst;
|
||||
ssp = sp;
|
||||
oldssp = ssp;
|
||||
for (;;) { /* find last match of innards */
|
||||
sep = slow(m, ssp, rest, ssub, esub);
|
||||
sep = slow(charset, m, ssp, rest, ssub, esub);
|
||||
if (sep == NULL || sep == ssp)
|
||||
break; /* failed or matched null */
|
||||
oldssp = ssp; /* on to next try */
|
||||
@ -350,8 +352,8 @@ sopno stopst;
|
||||
ssp = oldssp;
|
||||
}
|
||||
assert(sep == rest); /* must exhaust substring */
|
||||
assert(slow(m, ssp, sep, ssub, esub) == rest);
|
||||
dp = dissect(m, ssp, sep, ssub, esub);
|
||||
assert(slow(charset, m, ssp, sep, ssub, esub) == rest);
|
||||
dp = dissect(charset, m, ssp, sep, ssub, esub);
|
||||
assert(dp == sep);
|
||||
sp = rest;
|
||||
break;
|
||||
@ -359,10 +361,10 @@ sopno stopst;
|
||||
stp = stop;
|
||||
for (;;) {
|
||||
/* how long could this one be? */
|
||||
rest = slow(m, sp, stp, ss, es);
|
||||
rest = slow(charset, m, sp, stp, ss, es);
|
||||
assert(rest != NULL); /* it did match */
|
||||
/* could the rest match the rest? */
|
||||
tail = slow(m, rest, stop, es, stopst);
|
||||
tail = slow(charset, m, rest, stop, es, stopst);
|
||||
if (tail == stop)
|
||||
break; /* yes! */
|
||||
/* no -- try a shorter match for this one */
|
||||
@ -373,7 +375,7 @@ sopno stopst;
|
||||
esub = ss + OPND(m->g->strip[ss]) - 1;
|
||||
assert(OP(m->g->strip[esub]) == OOR1);
|
||||
for (;;) { /* find first matching branch */
|
||||
if (slow(m, sp, rest, ssub, esub) == rest)
|
||||
if (slow(charset, m, sp, rest, ssub, esub) == rest)
|
||||
break; /* it matched all of it */
|
||||
/* that one missed, try next one */
|
||||
assert(OP(m->g->strip[esub]) == OOR1);
|
||||
@ -386,7 +388,7 @@ sopno stopst;
|
||||
else
|
||||
assert(OP(m->g->strip[esub]) == O_CH);
|
||||
}
|
||||
dp = dissect(m, sp, rest, ssub, esub);
|
||||
dp = dissect(charset, m, sp, rest, ssub, esub);
|
||||
assert(dp == rest);
|
||||
sp = rest;
|
||||
break;
|
||||
@ -423,7 +425,8 @@ sopno stopst;
|
||||
== char *stop, sopno startst, sopno stopst, sopno lev);
|
||||
*/
|
||||
static char * /* == stop (success) or NULL (failure) */
|
||||
backref(m, start, stop, startst, stopst, lev)
|
||||
backref(charset,m, start, stop, startst, stopst, lev)
|
||||
CHARSET_INFO *charset;
|
||||
register struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
@ -486,8 +489,8 @@ sopno lev; /* PLUS nesting level */
|
||||
(sp < m->endp && *(sp-1) == '\n' &&
|
||||
(m->g->cflags®_NEWLINE)) ||
|
||||
(sp > m->beginp &&
|
||||
!ISWORD(*(sp-1))) ) &&
|
||||
(sp < m->endp && ISWORD(*sp)) )
|
||||
!ISWORD(charset,*(sp-1))) ) &&
|
||||
(sp < m->endp && ISWORD(charset,*sp)) )
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
@ -496,8 +499,8 @@ sopno lev; /* PLUS nesting level */
|
||||
if (( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
|
||||
(sp < m->endp && *sp == '\n' &&
|
||||
(m->g->cflags®_NEWLINE)) ||
|
||||
(sp < m->endp && !ISWORD(*sp)) ) &&
|
||||
(sp > m->beginp && ISWORD(*(sp-1))) )
|
||||
(sp < m->endp && !ISWORD(charset,*sp)) ) &&
|
||||
(sp > m->beginp && ISWORD(charset,*(sp-1))) )
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
@ -543,28 +546,28 @@ sopno lev; /* PLUS nesting level */
|
||||
return(NULL);
|
||||
while (m->g->strip[ss] != SOP(O_BACK, i))
|
||||
ss++;
|
||||
return(backref(m, sp+len, stop, ss+1, stopst, lev));
|
||||
return(backref(charset, m, sp+len, stop, ss+1, stopst, lev));
|
||||
break;
|
||||
case OQUEST_: /* to null or not */
|
||||
dp = backref(m, sp, stop, ss+1, stopst, lev);
|
||||
dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
|
||||
if (dp != NULL)
|
||||
return(dp); /* not */
|
||||
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
|
||||
return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev));
|
||||
break;
|
||||
case OPLUS_:
|
||||
assert(m->lastpos != NULL);
|
||||
assert(lev+1 <= m->g->nplus);
|
||||
m->lastpos[lev+1] = sp;
|
||||
return(backref(m, sp, stop, ss+1, stopst, lev+1));
|
||||
return(backref(charset, m, sp, stop, ss+1, stopst, lev+1));
|
||||
break;
|
||||
case O_PLUS:
|
||||
if (sp == m->lastpos[lev]) /* last pass matched null */
|
||||
return(backref(m, sp, stop, ss+1, stopst, lev-1));
|
||||
return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
|
||||
/* try another pass */
|
||||
m->lastpos[lev] = sp;
|
||||
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
|
||||
dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev);
|
||||
if (dp == NULL)
|
||||
return(backref(m, sp, stop, ss+1, stopst, lev-1));
|
||||
return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
|
||||
else
|
||||
return(dp);
|
||||
break;
|
||||
@ -573,7 +576,7 @@ sopno lev; /* PLUS nesting level */
|
||||
esub = ss + OPND(s) - 1;
|
||||
assert(OP(m->g->strip[esub]) == OOR1);
|
||||
for (;;) { /* find first matching branch */
|
||||
dp = backref(m, sp, stop, ssub, esub, lev);
|
||||
dp = backref(charset, m, sp, stop, ssub, esub, lev);
|
||||
if (dp != NULL)
|
||||
return(dp);
|
||||
/* that one missed, try next one */
|
||||
@ -594,7 +597,7 @@ sopno lev; /* PLUS nesting level */
|
||||
assert(0 < i && i <= m->g->nsub);
|
||||
offsave = m->pmatch[i].rm_so;
|
||||
m->pmatch[i].rm_so = sp - m->offp;
|
||||
dp = backref(m, sp, stop, ss+1, stopst, lev);
|
||||
dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
|
||||
if (dp != NULL)
|
||||
return(dp);
|
||||
m->pmatch[i].rm_so = offsave;
|
||||
@ -605,7 +608,7 @@ sopno lev; /* PLUS nesting level */
|
||||
assert(0 < i && i <= m->g->nsub);
|
||||
offsave = m->pmatch[i].rm_eo;
|
||||
m->pmatch[i].rm_eo = sp - m->offp;
|
||||
dp = backref(m, sp, stop, ss+1, stopst, lev);
|
||||
dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
|
||||
if (dp != NULL)
|
||||
return(dp);
|
||||
m->pmatch[i].rm_eo = offsave;
|
||||
@ -628,7 +631,8 @@ sopno lev; /* PLUS nesting level */
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* where tentative match ended, or NULL */
|
||||
fast(m, start, stop, startst, stopst)
|
||||
fast(charset, m, start, stop, startst, stopst)
|
||||
CHARSET_INFO *charset;
|
||||
register struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
@ -678,12 +682,12 @@ sopno stopst;
|
||||
}
|
||||
|
||||
/* how about a word boundary? */
|
||||
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
|
||||
(c != OUT && ISWORD(c)) ) {
|
||||
if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
|
||||
(c != OUT && ISWORD(charset,c)) ) {
|
||||
flagch = BOW;
|
||||
}
|
||||
if ( (lastc != OUT && ISWORD(lastc)) &&
|
||||
(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
|
||||
if ( (lastc != OUT && ISWORD(charset,lastc)) &&
|
||||
(flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
|
||||
flagch = EOW;
|
||||
}
|
||||
if (flagch == BOW || flagch == EOW) {
|
||||
@ -719,7 +723,8 @@ sopno stopst;
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* where it ended */
|
||||
slow(m, start, stop, startst, stopst)
|
||||
slow(charset, m, start, stop, startst, stopst)
|
||||
CHARSET_INFO *charset;
|
||||
register struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
@ -767,12 +772,12 @@ sopno stopst;
|
||||
}
|
||||
|
||||
/* how about a word boundary? */
|
||||
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
|
||||
(c != OUT && ISWORD(c)) ) {
|
||||
if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
|
||||
(c != OUT && ISWORD(charset,c)) ) {
|
||||
flagch = BOW;
|
||||
}
|
||||
if ( (lastc != OUT && ISWORD(lastc)) &&
|
||||
(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
|
||||
if ( (lastc != OUT && ISWORD(charset,lastc)) &&
|
||||
(flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
|
||||
flagch = EOW;
|
||||
}
|
||||
if (flagch == BOW || flagch == EOW) {
|
||||
|
@ -4,11 +4,11 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/* === engine.c === */
|
||||
static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
|
||||
static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static int matcher(CHARSET_INFO *charset,register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
static char *dissect(CHARSET_INFO *charset,register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static char *backref(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
|
||||
static char *fast(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static char *slow(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
|
||||
#define BOL (OUT+1)
|
||||
#define EOL (BOL+1)
|
||||
|
@ -74,7 +74,7 @@ char *argv[];
|
||||
exit(status);
|
||||
}
|
||||
|
||||
err = regcomp(&re, argv[optind++], copts);
|
||||
err = regcomp(&re, argv[optind++], copts, default_charset_info);
|
||||
if (err) {
|
||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||||
fprintf(stderr, "error %s, %d/%d `%s'\n",
|
||||
@ -226,7 +226,7 @@ int opts; /* may not match f1 */
|
||||
strcpy(f0copy, f0);
|
||||
re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
|
||||
fixstr(f0copy);
|
||||
err = regcomp(&re, f0copy, opts);
|
||||
err = regcomp(&re, f0copy, opts, default_charset_info);
|
||||
if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
|
||||
/* unexpected error or wrong error */
|
||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
||||
|
@ -28,6 +28,7 @@ struct parse {
|
||||
# define NPAREN 10 /* we need to remember () 1-9 for back refs */
|
||||
sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
|
||||
sopno pend[NPAREN]; /* -> ) ([0] unused) */
|
||||
CHARSET_INFO *charset; /* for ctype things */
|
||||
};
|
||||
|
||||
#include "regcomp.ih"
|
||||
@ -99,10 +100,11 @@ static int never = 0; /* for use in asserts; shuts lint up */
|
||||
= #define REG_DUMP 0200
|
||||
*/
|
||||
int /* 0 success, otherwise REG_something */
|
||||
regcomp(preg, pattern, cflags)
|
||||
regcomp(preg, pattern, cflags, charset)
|
||||
regex_t *preg;
|
||||
const char *pattern;
|
||||
int cflags;
|
||||
CHARSET_INFO *charset;
|
||||
{
|
||||
struct parse pa;
|
||||
register struct re_guts *g;
|
||||
@ -116,6 +118,7 @@ int cflags;
|
||||
#endif
|
||||
|
||||
regex_init(); /* Init cclass if neaded */
|
||||
preg->charset=charset;
|
||||
cflags = GOODFLAGS(cflags);
|
||||
if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
|
||||
return(REG_INVARG);
|
||||
@ -146,6 +149,7 @@ int cflags;
|
||||
p->end = p->next + len;
|
||||
p->error = 0;
|
||||
p->ncsalloc = 0;
|
||||
p->charset = preg->charset;
|
||||
for (i = 0; i < NPAREN; i++) {
|
||||
p->pbegin[i] = 0;
|
||||
p->pend[i] = 0;
|
||||
@ -327,7 +331,7 @@ register struct parse *p;
|
||||
ordinary(p, c);
|
||||
break;
|
||||
case '{': /* okay as ordinary except if digit follows */
|
||||
if(REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT)) {}
|
||||
if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), REG_BADRPT)) {}
|
||||
/* FALLTHROUGH */
|
||||
default:
|
||||
ordinary(p, c);
|
||||
@ -339,7 +343,8 @@ register struct parse *p;
|
||||
c = PEEK();
|
||||
/* we call { a repetition if followed by a digit */
|
||||
if (!( c == '*' || c == '+' || c == '?' ||
|
||||
(c == '{' && MORE2() && isdigit(PEEK2())) ))
|
||||
(c == '{' && MORE2() &&
|
||||
my_isdigit(p->charset,PEEK2())) ))
|
||||
return; /* no repetition, we're done */
|
||||
NEXT();
|
||||
|
||||
@ -368,7 +373,7 @@ register struct parse *p;
|
||||
case '{':
|
||||
count = p_count(p);
|
||||
if (EAT(',')) {
|
||||
if (isdigit(PEEK())) {
|
||||
if (my_isdigit(p->charset,PEEK())) {
|
||||
count2 = p_count(p);
|
||||
if(REQUIRE(count <= count2, REG_BADBR)) {}
|
||||
} else /* single number with comma */
|
||||
@ -389,7 +394,8 @@ register struct parse *p;
|
||||
return;
|
||||
c = PEEK();
|
||||
if (!( c == '*' || c == '+' || c == '?' ||
|
||||
(c == '{' && MORE2() && isdigit(PEEK2())) ) )
|
||||
(c == '{' && MORE2() &&
|
||||
my_isdigit(p->charset,PEEK2())) ) )
|
||||
return;
|
||||
SETERROR(REG_BADRPT);
|
||||
}
|
||||
@ -546,7 +552,7 @@ int starordinary; /* is a leading * an ordinary character? */
|
||||
} else if (EATTWO('\\', '{')) {
|
||||
count = p_count(p);
|
||||
if (EAT(',')) {
|
||||
if (MORE() && isdigit(PEEK())) {
|
||||
if (MORE() && my_isdigit(p->charset,PEEK())) {
|
||||
count2 = p_count(p);
|
||||
if(REQUIRE(count <= count2, REG_BADBR)) {}
|
||||
} else /* single number with comma */
|
||||
@ -577,7 +583,7 @@ register struct parse *p;
|
||||
register int count = 0;
|
||||
register int ndigits = 0;
|
||||
|
||||
while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
|
||||
while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) {
|
||||
count = count*10 + (GETNEXT() - '0');
|
||||
ndigits++;
|
||||
}
|
||||
@ -632,8 +638,8 @@ register struct parse *p;
|
||||
register int ci;
|
||||
|
||||
for (i = p->g->csetsize - 1; i >= 0; i--)
|
||||
if (CHIN(cs, i) && isalpha(i)) {
|
||||
ci = othercase(i);
|
||||
if (CHIN(cs, i) && my_isalpha(p->charset,i)) {
|
||||
ci = othercase(p->charset,i);
|
||||
if (ci != i)
|
||||
CHadd(cs, ci);
|
||||
}
|
||||
@ -744,7 +750,7 @@ register cset *cs;
|
||||
register char *u;
|
||||
register char c;
|
||||
|
||||
while (MORE() && isalpha(PEEK()))
|
||||
while (MORE() && my_isalpha(p->charset,PEEK()))
|
||||
NEXT();
|
||||
len = p->next - sp;
|
||||
for (cp = cclasses; cp->name != NULL; cp++)
|
||||
@ -837,14 +843,15 @@ int endc; /* name ended by endc,']' */
|
||||
== static char othercase(int ch);
|
||||
*/
|
||||
static char /* if no counterpart, return ch */
|
||||
othercase(ch)
|
||||
othercase(charset,ch)
|
||||
CHARSET_INFO *charset;
|
||||
int ch;
|
||||
{
|
||||
assert(isalpha(ch));
|
||||
if (isupper(ch))
|
||||
return(tolower(ch));
|
||||
else if (islower(ch))
|
||||
return(toupper(ch));
|
||||
assert(my_isalpha(charset,ch));
|
||||
if (my_isupper(charset,ch))
|
||||
return(my_tolower(charset,ch));
|
||||
else if (my_islower(charset,ch))
|
||||
return(my_toupper(charset,ch));
|
||||
else /* peculiar, but could happen */
|
||||
return(ch);
|
||||
}
|
||||
@ -887,7 +894,8 @@ register int ch;
|
||||
{
|
||||
register cat_t *cap = p->g->categories;
|
||||
|
||||
if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
|
||||
if ((p->g->cflags®_ICASE) && my_isalpha(p->charset,ch) &&
|
||||
othercase(p->charset,ch) != ch)
|
||||
bothcases(p, ch);
|
||||
else {
|
||||
EMIT(OCHAR, (unsigned char)ch);
|
||||
|
@ -16,7 +16,7 @@ static void p_b_cclass(register struct parse *p, register cset *cs);
|
||||
static void p_b_eclass(register struct parse *p, register cset *cs);
|
||||
static char p_b_symbol(register struct parse *p);
|
||||
static char p_b_coll_elem(register struct parse *p, int endc);
|
||||
static char othercase(int ch);
|
||||
static char othercase(CHARSET_INFO *charset,int ch);
|
||||
static void bothcases(register struct parse *p, int ch);
|
||||
static void ordinary(register struct parse *p, register int ch);
|
||||
static void nonnewline(register struct parse *p);
|
||||
|
@ -5,6 +5,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "m_ctype.h"
|
||||
|
||||
/* === regex2.h === */
|
||||
#ifdef _WIN64
|
||||
typedef __int64 regoff_t;
|
||||
@ -17,6 +19,7 @@ typedef struct {
|
||||
size_t re_nsub; /* number of parenthesized subexpressions */
|
||||
const char *re_endp; /* end pointer for REG_PEND */
|
||||
struct re_guts *re_g; /* none of your business :-) */
|
||||
CHARSET_INFO *charset; /* For ctype things */
|
||||
} regex_t;
|
||||
typedef struct {
|
||||
regoff_t rm_so; /* start of match */
|
||||
@ -25,7 +28,7 @@ typedef struct {
|
||||
|
||||
|
||||
/* === regcomp.c === */
|
||||
extern int regcomp(regex_t *, const char *, int);
|
||||
extern int regcomp(regex_t *, const char *, int, CHARSET_INFO *charset);
|
||||
#define REG_BASIC 0000
|
||||
#define REG_EXTENDED 0001
|
||||
#define REG_ICASE 0002
|
||||
|
@ -140,6 +140,6 @@ struct re_guts {
|
||||
/* misc utilities */
|
||||
#undef OUT /* May be defined in windows */
|
||||
#define OUT (CHAR_MAX+1) /* a non-character value */
|
||||
#define ISWORD(c) (isalnum(c) || (c) == '_')
|
||||
#define ISWORD(s,c) (my_isalnum(s,c) || (c) == '_')
|
||||
|
||||
#endif /* __regex2_h__ */
|
||||
|
@ -133,7 +133,7 @@ int eflags;
|
||||
|
||||
if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) &&
|
||||
!(eflags®_LARGE))
|
||||
return(smatcher(g, (char *)str, nmatch, pmatch, eflags));
|
||||
return(smatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags));
|
||||
else
|
||||
return(lmatcher(g, (char *)str, nmatch, pmatch, eflags));
|
||||
return(lmatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags));
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ void regex_init()
|
||||
char buff[CCLASS_LAST][256];
|
||||
int count[CCLASS_LAST];
|
||||
uint i;
|
||||
CHARSET_INFO *cs=default_charset_info;
|
||||
|
||||
if (!regex_inited)
|
||||
{
|
||||
@ -20,27 +21,27 @@ void regex_init()
|
||||
|
||||
for (i=1 ; i<= 255; i++)
|
||||
{
|
||||
if (isalnum(i))
|
||||
if (my_isalnum(cs,i))
|
||||
buff[CCLASS_ALNUM][count[CCLASS_ALNUM]++]=(char) i;
|
||||
if (isalpha(i))
|
||||
if (my_isalpha(cs,i))
|
||||
buff[CCLASS_ALPHA][count[CCLASS_ALPHA]++]=(char) i;
|
||||
if (iscntrl(i))
|
||||
if (my_iscntrl(cs,i))
|
||||
buff[CCLASS_CNTRL][count[CCLASS_CNTRL]++]=(char) i;
|
||||
if (isdigit(i))
|
||||
if (my_isdigit(cs,i))
|
||||
buff[CCLASS_DIGIT][count[CCLASS_DIGIT]++]=(char) i;
|
||||
if (isgraph(i))
|
||||
if (my_isgraph(cs,i))
|
||||
buff[CCLASS_GRAPH][count[CCLASS_GRAPH]++]=(char) i;
|
||||
if (islower(i))
|
||||
if (my_islower(cs,i))
|
||||
buff[CCLASS_LOWER][count[CCLASS_LOWER]++]=(char) i;
|
||||
if (isprint(i))
|
||||
if (my_isprint(cs,i))
|
||||
buff[CCLASS_PRINT][count[CCLASS_PRINT]++]=(char) i;
|
||||
if (ispunct(i))
|
||||
if (my_ispunct(cs,i))
|
||||
buff[CCLASS_PUNCT][count[CCLASS_PUNCT]++]=(char) i;
|
||||
if (isspace(i))
|
||||
if (my_isspace(cs,i))
|
||||
buff[CCLASS_SPACE][count[CCLASS_SPACE]++]=(char) i;
|
||||
if (isupper(i))
|
||||
if (my_isupper(cs,i))
|
||||
buff[CCLASS_UPPER][count[CCLASS_UPPER]++]=(char) i;
|
||||
if (isxdigit(i))
|
||||
if (my_isxdigit(cs,i))
|
||||
buff[CCLASS_XDIGIT][count[CCLASS_XDIGIT]++]=(char) i;
|
||||
}
|
||||
buff[CCLASS_BLANK][0]=' ';
|
||||
|
@ -1278,7 +1278,8 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables)
|
||||
int error;
|
||||
if ((error=regcomp(&preg,res->c_ptr(),
|
||||
binary ? REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE)))
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE,
|
||||
default_charset_info)))
|
||||
{
|
||||
(void) regerror(error,&preg,buff,sizeof(buff));
|
||||
my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
|
||||
@ -1325,7 +1326,8 @@ longlong Item_func_regex::val_int()
|
||||
}
|
||||
if (regcomp(&preg,res2->c_ptr(),
|
||||
binary ? REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE))
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE,
|
||||
default_charset_info))
|
||||
|
||||
{
|
||||
null_value=1;
|
||||
|
Reference in New Issue
Block a user