mirror of
https://github.com/postgres/postgres.git
synced 2025-07-02 09:02:37 +03:00
From: t-ishii@sra.co.jp
Included are patches intended for allowing PostgreSQL to handle multi-byte charachter sets such as EUC(Extende Unix Code), Unicode and Mule internal code. With the MB patch you can use multi-byte character sets in regexp and LIKE. The encoding system chosen is determined at the compile time. To enable the MB extension, you need to define a variable "MB" in Makefile.global or in Makefile.custom. For further information please take a look at README.mb under doc directory. (Note that unlike "jp patch" I do not use modified GNU regexp any more. I changed Henry Spencer's regexp coming with PostgreSQL.)
This commit is contained in:
@ -73,11 +73,11 @@ struct match
|
||||
struct re_guts *g;
|
||||
int eflags;
|
||||
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
|
||||
char *offp; /* offsets work from here */
|
||||
char *beginp; /* start of string -- virtual NUL precedes */
|
||||
char *endp; /* end of string -- virtual NUL here */
|
||||
char *coldp; /* can be no match starting before here */
|
||||
char **lastpos; /* [nplus+1] */
|
||||
pg_wchar *offp; /* offsets work from here */
|
||||
pg_wchar *beginp; /* start of string -- virtual NUL precedes */
|
||||
pg_wchar *endp; /* end of string -- virtual NUL here */
|
||||
pg_wchar *coldp; /* can be no match starting before here */
|
||||
pg_wchar **lastpos; /* [nplus+1] */
|
||||
STATEVARS;
|
||||
states st; /* current states */
|
||||
states fresh; /* states for a fresh start */
|
||||
@ -93,19 +93,19 @@ extern "C"
|
||||
|
||||
/* === engine.c === */
|
||||
static int
|
||||
matcher(struct re_guts * g, char *string, size_t nmatch,
|
||||
matcher(struct re_guts * g, pg_wchar *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags);
|
||||
static char *
|
||||
dissect(struct match * m, char *start, char *stop,
|
||||
static pg_wchar *
|
||||
dissect(struct match * m, pg_wchar *start, pg_wchar *stop,
|
||||
sopno startst, sopno stopst);
|
||||
static char *
|
||||
backref(struct match * m, char *start, char *stop,
|
||||
static pg_wchar *
|
||||
backref(struct match * m, pg_wchar *start, pg_wchar *stop,
|
||||
sopno startst, sopno stopst, sopno lev);
|
||||
static char *
|
||||
fast(struct match * m, char *start, char *stop,
|
||||
static pg_wchar *
|
||||
fast(struct match * m, pg_wchar *start, pg_wchar *stop,
|
||||
sopno startst, sopno stopst);
|
||||
static char *
|
||||
slow(struct match * m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static pg_wchar *
|
||||
slow(struct match * m, pg_wchar *start, pg_wchar *stop, sopno startst, sopno stopst);
|
||||
static states
|
||||
step(struct re_guts * g, sopno start,
|
||||
sopno stop, states bef, int ch, states aft);
|
||||
@ -116,20 +116,35 @@ extern "C"
|
||||
#define BOW (BOL+4)
|
||||
#define EOW (BOL+5)
|
||||
#define CODEMAX (BOL+5) /* highest code used */
|
||||
#define NONCHAR(c) ((c) > CHAR_MAX)
|
||||
#define NNONCHAR (CODEMAX-CHAR_MAX)
|
||||
|
||||
#ifdef MB
|
||||
# if MB == MULE_INTERNAL
|
||||
# define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */
|
||||
# define NNONCHAR (CODEMAX-16777216)
|
||||
# elif MB == EUC_JP || MB == EUC_CN || MB == EUC_KR || MB == EUC_TW
|
||||
# define NONCHAR(c) ((c) > USHRT_MAX)
|
||||
# define NNONCHAR (CODEMAX-USHRT_MAX)
|
||||
# elif MB == UNICODE
|
||||
# define NONCHAR(c) ((c) > USHRT_MAX)
|
||||
# define NNONCHAR (CODEMAX-USHRT_MAX)
|
||||
# endif
|
||||
#else
|
||||
# define NONCHAR(c) ((c) > CHAR_MAX)
|
||||
# define NNONCHAR (CODEMAX-CHAR_MAX)
|
||||
#endif
|
||||
|
||||
#ifdef REDEBUG
|
||||
static void
|
||||
print(struct match * m, char *caption, states st, int ch, FILE *d);
|
||||
print(struct match * m, pg_wchar *caption, states st, int ch, FILE *d);
|
||||
#endif
|
||||
#ifdef REDEBUG
|
||||
static void
|
||||
at(struct match * m, char *title, char *start, char *stop,
|
||||
at(struct match * m, pg_wchar *title, pg_wchar *start, pg_wchar *stop,
|
||||
sopno startst, sopno stopst);
|
||||
#endif
|
||||
#ifdef REDEBUG
|
||||
static char *
|
||||
pchar(int ch);
|
||||
static pg_wchar *
|
||||
p_char(int ch);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -150,26 +165,26 @@ extern "C"
|
||||
|
||||
/*
|
||||
- matcher - the actual matching engine
|
||||
== static int matcher(struct re_guts *g, char *string, \
|
||||
== static int matcher(struct re_guts *g, pg_wchar *string, \
|
||||
== size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
*/
|
||||
static int /* 0 success, REG_NOMATCH failure */
|
||||
matcher(g, string, nmatch, pmatch, eflags)
|
||||
struct re_guts *g;
|
||||
char *string;
|
||||
pg_wchar *string;
|
||||
size_t nmatch;
|
||||
regmatch_t pmatch[];
|
||||
int eflags;
|
||||
{
|
||||
char *endp;
|
||||
pg_wchar *endp;
|
||||
int i;
|
||||
struct match mv;
|
||||
struct match *m = &mv;
|
||||
char *dp;
|
||||
pg_wchar *dp;
|
||||
const sopno gf = g->firststate + 1; /* +1 for OEND */
|
||||
const sopno gl = g->laststate;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
|
||||
/* simplify the situation where possible */
|
||||
if (g->cflags & REG_NOSUB)
|
||||
@ -182,7 +197,11 @@ int eflags;
|
||||
else
|
||||
{
|
||||
start = string;
|
||||
#ifdef MB
|
||||
stop = start + pg_wchar_strlen(start);
|
||||
#else
|
||||
stop = start + strlen(start);
|
||||
#endif
|
||||
}
|
||||
if (stop < start)
|
||||
return (REG_INVARG);
|
||||
@ -192,7 +211,11 @@ int eflags;
|
||||
{
|
||||
for (dp = start; dp < stop; dp++)
|
||||
if (*dp == g->must[0] && stop - dp >= g->mlen &&
|
||||
#ifdef MB
|
||||
memcmp(dp, g->must, (size_t) (g->mlen * sizeof(pg_wchar))) == 0)
|
||||
#else
|
||||
memcmp(dp, g->must, (size_t) g->mlen) == 0)
|
||||
#endif
|
||||
break;
|
||||
if (dp == stop) /* we didn't find g->must */
|
||||
return (REG_NOMATCH);
|
||||
@ -258,8 +281,8 @@ int eflags;
|
||||
else
|
||||
{
|
||||
if (g->nplus > 0 && m->lastpos == NULL)
|
||||
m->lastpos = (char **) malloc((g->nplus + 1) *
|
||||
sizeof(char *));
|
||||
m->lastpos = (pg_wchar **) malloc((g->nplus + 1) *
|
||||
sizeof(pg_wchar *));
|
||||
if (g->nplus > 0 && m->lastpos == NULL)
|
||||
{
|
||||
free(m->pmatch);
|
||||
@ -324,9 +347,9 @@ int eflags;
|
||||
}
|
||||
|
||||
if (m->pmatch != NULL)
|
||||
free((char *) m->pmatch);
|
||||
free((pg_wchar *) m->pmatch);
|
||||
if (m->lastpos != NULL)
|
||||
free((char *) m->lastpos);
|
||||
free((pg_wchar *) m->lastpos);
|
||||
STATETEARDOWN(m);
|
||||
return (0);
|
||||
}
|
||||
@ -336,27 +359,27 @@ int eflags;
|
||||
== static char *dissect(struct match *m, char *start, \
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* == stop (success) always */
|
||||
static pg_wchar * /* == stop (success) always */
|
||||
dissect(m, start, stop, startst, stopst)
|
||||
struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
{
|
||||
int i;
|
||||
sopno ss; /* start sop of current subRE */
|
||||
sopno es; /* end sop of current subRE */
|
||||
char *sp; /* start of string matched by it */
|
||||
char *stp; /* string matched by it cannot pass here */
|
||||
char *rest; /* start of rest of string */
|
||||
char *tail; /* string unmatched by rest of RE */
|
||||
pg_wchar *sp; /* start of string matched by it */
|
||||
pg_wchar *stp; /* string matched by it cannot pass here */
|
||||
pg_wchar *rest; /* start of rest of string */
|
||||
pg_wchar *tail; /* string unmatched by rest of RE */
|
||||
sopno ssub; /* start sop of subsubRE */
|
||||
sopno esub; /* end sop of subsubRE */
|
||||
char *ssp; /* start of string matched by subsubRE */
|
||||
char *sep; /* end of string matched by subsubRE */
|
||||
char *oldssp; /* previous ssp */
|
||||
char *dp;
|
||||
pg_wchar *ssp; /* start of string matched by subsubRE */
|
||||
pg_wchar *sep; /* end of string matched by subsubRE */
|
||||
pg_wchar *oldssp; /* previous ssp */
|
||||
pg_wchar *dp;
|
||||
|
||||
AT("diss", start, stop, startst, stopst);
|
||||
sp = start;
|
||||
@ -536,22 +559,22 @@ sopno stopst;
|
||||
== static char *backref(struct match *m, char *start, \
|
||||
== char *stop, sopno startst, sopno stopst, sopno lev);
|
||||
*/
|
||||
static char * /* == stop (success) or NULL (failure) */
|
||||
static pg_wchar * /* == stop (success) or NULL (failure) */
|
||||
backref(m, start, stop, startst, stopst, lev)
|
||||
struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
sopno lev; /* PLUS nesting level */
|
||||
{
|
||||
int i;
|
||||
sopno ss; /* start sop of current subRE */
|
||||
char *sp; /* start of string matched by it */
|
||||
pg_wchar *sp; /* start of string matched by it */
|
||||
sopno ssub; /* start sop of subsubRE */
|
||||
sopno esub; /* end sop of subsubRE */
|
||||
char *ssp; /* start of string matched by subsubRE */
|
||||
char *dp;
|
||||
pg_wchar *ssp; /* start of string matched by subsubRE */
|
||||
pg_wchar *dp;
|
||||
size_t len;
|
||||
int hard;
|
||||
sop s;
|
||||
@ -567,7 +590,7 @@ sopno lev; /* PLUS nesting level */
|
||||
switch (OP(s = m->g->strip[ss]))
|
||||
{
|
||||
case OCHAR:
|
||||
if (sp == stop || *sp++ != (char) OPND(s))
|
||||
if (sp == stop || *sp++ != (pg_wchar) OPND(s))
|
||||
return (NULL);
|
||||
break;
|
||||
case OANY:
|
||||
@ -750,23 +773,23 @@ sopno lev; /* PLUS nesting level */
|
||||
== static char *fast(struct match *m, char *start, \
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* where tentative match ended, or NULL */
|
||||
static pg_wchar * /* where tentative match ended, or NULL */
|
||||
fast(m, start, stop, startst, stopst)
|
||||
struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
{
|
||||
states st = m->st;
|
||||
states fresh = m->fresh;
|
||||
states tmp = m->tmp;
|
||||
char *p = start;
|
||||
pg_wchar *p = start;
|
||||
int c = (start == m->beginp) ? OUT : *(start - 1);
|
||||
int lastc; /* previous c */
|
||||
int flagch;
|
||||
int i;
|
||||
char *coldp; /* last p after which no match was
|
||||
pg_wchar *coldp; /* last p after which no match was
|
||||
* underway */
|
||||
|
||||
CLEAR(st);
|
||||
@ -849,23 +872,23 @@ sopno stopst;
|
||||
== static char *slow(struct match *m, char *start, \
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* where it ended */
|
||||
static pg_wchar * /* where it ended */
|
||||
slow(m, start, stop, startst, stopst)
|
||||
struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
{
|
||||
states st = m->st;
|
||||
states empty = m->empty;
|
||||
states tmp = m->tmp;
|
||||
char *p = start;
|
||||
pg_wchar *p = start;
|
||||
int c = (start == m->beginp) ? OUT : *(start - 1);
|
||||
int lastc; /* previous c */
|
||||
int flagch;
|
||||
int i;
|
||||
char *matchp; /* last p at which a match ended */
|
||||
pg_wchar *matchp; /* last p at which a match ended */
|
||||
|
||||
AT("slow", start, stop, startst, stopst);
|
||||
CLEAR(st);
|
||||
@ -978,8 +1001,8 @@ states aft; /* states already known reachable after */
|
||||
break;
|
||||
case OCHAR:
|
||||
/* only characters can match */
|
||||
assert(!NONCHAR(ch) || ch != (char) OPND(s));
|
||||
if (ch == (char) OPND(s))
|
||||
assert(!NONCHAR(ch) || ch != (pg_wchar) OPND(s));
|
||||
if (ch == (pg_wchar) OPND(s))
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OBOL:
|
||||
@ -1082,7 +1105,7 @@ states aft; /* states already known reachable after */
|
||||
static void
|
||||
print(m, caption, st, ch, d)
|
||||
struct match *m;
|
||||
char *caption;
|
||||
pg_wchar *caption;
|
||||
states st;
|
||||
int ch;
|
||||
FILE *d;
|
||||
@ -1109,16 +1132,16 @@ FILE *d;
|
||||
/*
|
||||
- at - print current situation
|
||||
== #ifdef REDEBUG
|
||||
== static void at(struct match *m, char *title, char *start, char *stop, \
|
||||
== static void at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, \
|
||||
== sopno startst, sopno stopst);
|
||||
== #endif
|
||||
*/
|
||||
static void
|
||||
at(m, title, start, stop, startst, stopst)
|
||||
struct match *m;
|
||||
char *title;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *title;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
{
|
||||
@ -1143,13 +1166,24 @@ sopno stopst;
|
||||
* a matching debug.o, and this is convenient. It all disappears in
|
||||
* the non-debug compilation anyway, so it doesn't matter much.
|
||||
*/
|
||||
static char * /* -> representation */
|
||||
|
||||
|
||||
static int pg_isprint(int c)
|
||||
{
|
||||
#ifdef MB
|
||||
return(c >= 0 && c <= UCHAR_MAX && isprint(c));
|
||||
#else
|
||||
return(isprint(c));
|
||||
#endif
|
||||
}
|
||||
|
||||
static pg_wchar * /* -> representation */
|
||||
pchar(ch)
|
||||
int ch;
|
||||
{
|
||||
static char pbuf[10];
|
||||
static pg_wchar pbuf[10];
|
||||
|
||||
if (isprint(ch) || ch == ' ')
|
||||
if (pg_isprint(ch) || ch == ' ')
|
||||
sprintf(pbuf, "%c", ch);
|
||||
else
|
||||
sprintf(pbuf, "\\%o", ch);
|
||||
|
Reference in New Issue
Block a user