mirror of
https://github.com/postgres/postgres.git
synced 2025-06-30 21:42:05 +03:00
From: t-ishii@sra.co.jp
Included are patches intended for allowing PostgreSQL to handle multi-byte charachter sets such as EUC(Extende Unix Code), Unicode and Mule internal code. With the MB patch you can use multi-byte character sets in regexp and LIKE. The encoding system chosen is determined at the compile time. To enable the MB extension, you need to define a variable "MB" in Makefile.global or in Makefile.custom. For further information please take a look at README.mb under doc directory. (Note that unlike "jp patch" I do not use modified GNU regexp any more. I changed Henry Spencer's regexp coming with PostgreSQL.)
This commit is contained in:
@ -4,7 +4,7 @@
|
||||
# Makefile for regex
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $Header: /cvsroot/pgsql/src/backend/regex/Makefile,v 1.4 1997/12/20 00:26:58 scrappy Exp $
|
||||
# $Header: /cvsroot/pgsql/src/backend/regex/Makefile,v 1.5 1998/03/15 07:38:14 scrappy Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
@ -17,6 +17,10 @@ CFLAGS+=$(INCLUDE_OPT)
|
||||
CFLAGS+=-DPOSIX_MISTAKE
|
||||
|
||||
OBJS = regcomp.o regerror.o regexec.o regfree.o
|
||||
ifdef MB
|
||||
OBJS += utils.o wstrcmp.o wstrncmp.o
|
||||
CFLAGS += -DMB=$(MB)
|
||||
endif
|
||||
|
||||
all: SUBSYS.o
|
||||
|
||||
|
@ -73,11 +73,11 @@ struct match
|
||||
struct re_guts *g;
|
||||
int eflags;
|
||||
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
|
||||
char *offp; /* offsets work from here */
|
||||
char *beginp; /* start of string -- virtual NUL precedes */
|
||||
char *endp; /* end of string -- virtual NUL here */
|
||||
char *coldp; /* can be no match starting before here */
|
||||
char **lastpos; /* [nplus+1] */
|
||||
pg_wchar *offp; /* offsets work from here */
|
||||
pg_wchar *beginp; /* start of string -- virtual NUL precedes */
|
||||
pg_wchar *endp; /* end of string -- virtual NUL here */
|
||||
pg_wchar *coldp; /* can be no match starting before here */
|
||||
pg_wchar **lastpos; /* [nplus+1] */
|
||||
STATEVARS;
|
||||
states st; /* current states */
|
||||
states fresh; /* states for a fresh start */
|
||||
@ -93,19 +93,19 @@ extern "C"
|
||||
|
||||
/* === engine.c === */
|
||||
static int
|
||||
matcher(struct re_guts * g, char *string, size_t nmatch,
|
||||
matcher(struct re_guts * g, pg_wchar *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags);
|
||||
static char *
|
||||
dissect(struct match * m, char *start, char *stop,
|
||||
static pg_wchar *
|
||||
dissect(struct match * m, pg_wchar *start, pg_wchar *stop,
|
||||
sopno startst, sopno stopst);
|
||||
static char *
|
||||
backref(struct match * m, char *start, char *stop,
|
||||
static pg_wchar *
|
||||
backref(struct match * m, pg_wchar *start, pg_wchar *stop,
|
||||
sopno startst, sopno stopst, sopno lev);
|
||||
static char *
|
||||
fast(struct match * m, char *start, char *stop,
|
||||
static pg_wchar *
|
||||
fast(struct match * m, pg_wchar *start, pg_wchar *stop,
|
||||
sopno startst, sopno stopst);
|
||||
static char *
|
||||
slow(struct match * m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static pg_wchar *
|
||||
slow(struct match * m, pg_wchar *start, pg_wchar *stop, sopno startst, sopno stopst);
|
||||
static states
|
||||
step(struct re_guts * g, sopno start,
|
||||
sopno stop, states bef, int ch, states aft);
|
||||
@ -116,20 +116,35 @@ extern "C"
|
||||
#define BOW (BOL+4)
|
||||
#define EOW (BOL+5)
|
||||
#define CODEMAX (BOL+5) /* highest code used */
|
||||
#define NONCHAR(c) ((c) > CHAR_MAX)
|
||||
#define NNONCHAR (CODEMAX-CHAR_MAX)
|
||||
|
||||
#ifdef MB
|
||||
# if MB == MULE_INTERNAL
|
||||
# define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */
|
||||
# define NNONCHAR (CODEMAX-16777216)
|
||||
# elif MB == EUC_JP || MB == EUC_CN || MB == EUC_KR || MB == EUC_TW
|
||||
# define NONCHAR(c) ((c) > USHRT_MAX)
|
||||
# define NNONCHAR (CODEMAX-USHRT_MAX)
|
||||
# elif MB == UNICODE
|
||||
# define NONCHAR(c) ((c) > USHRT_MAX)
|
||||
# define NNONCHAR (CODEMAX-USHRT_MAX)
|
||||
# endif
|
||||
#else
|
||||
# define NONCHAR(c) ((c) > CHAR_MAX)
|
||||
# define NNONCHAR (CODEMAX-CHAR_MAX)
|
||||
#endif
|
||||
|
||||
#ifdef REDEBUG
|
||||
static void
|
||||
print(struct match * m, char *caption, states st, int ch, FILE *d);
|
||||
print(struct match * m, pg_wchar *caption, states st, int ch, FILE *d);
|
||||
#endif
|
||||
#ifdef REDEBUG
|
||||
static void
|
||||
at(struct match * m, char *title, char *start, char *stop,
|
||||
at(struct match * m, pg_wchar *title, pg_wchar *start, pg_wchar *stop,
|
||||
sopno startst, sopno stopst);
|
||||
#endif
|
||||
#ifdef REDEBUG
|
||||
static char *
|
||||
pchar(int ch);
|
||||
static pg_wchar *
|
||||
p_char(int ch);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -150,26 +165,26 @@ extern "C"
|
||||
|
||||
/*
|
||||
- matcher - the actual matching engine
|
||||
== static int matcher(struct re_guts *g, char *string, \
|
||||
== static int matcher(struct re_guts *g, pg_wchar *string, \
|
||||
== size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
*/
|
||||
static int /* 0 success, REG_NOMATCH failure */
|
||||
matcher(g, string, nmatch, pmatch, eflags)
|
||||
struct re_guts *g;
|
||||
char *string;
|
||||
pg_wchar *string;
|
||||
size_t nmatch;
|
||||
regmatch_t pmatch[];
|
||||
int eflags;
|
||||
{
|
||||
char *endp;
|
||||
pg_wchar *endp;
|
||||
int i;
|
||||
struct match mv;
|
||||
struct match *m = &mv;
|
||||
char *dp;
|
||||
pg_wchar *dp;
|
||||
const sopno gf = g->firststate + 1; /* +1 for OEND */
|
||||
const sopno gl = g->laststate;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
|
||||
/* simplify the situation where possible */
|
||||
if (g->cflags & REG_NOSUB)
|
||||
@ -182,7 +197,11 @@ int eflags;
|
||||
else
|
||||
{
|
||||
start = string;
|
||||
#ifdef MB
|
||||
stop = start + pg_wchar_strlen(start);
|
||||
#else
|
||||
stop = start + strlen(start);
|
||||
#endif
|
||||
}
|
||||
if (stop < start)
|
||||
return (REG_INVARG);
|
||||
@ -192,7 +211,11 @@ int eflags;
|
||||
{
|
||||
for (dp = start; dp < stop; dp++)
|
||||
if (*dp == g->must[0] && stop - dp >= g->mlen &&
|
||||
#ifdef MB
|
||||
memcmp(dp, g->must, (size_t) (g->mlen * sizeof(pg_wchar))) == 0)
|
||||
#else
|
||||
memcmp(dp, g->must, (size_t) g->mlen) == 0)
|
||||
#endif
|
||||
break;
|
||||
if (dp == stop) /* we didn't find g->must */
|
||||
return (REG_NOMATCH);
|
||||
@ -258,8 +281,8 @@ int eflags;
|
||||
else
|
||||
{
|
||||
if (g->nplus > 0 && m->lastpos == NULL)
|
||||
m->lastpos = (char **) malloc((g->nplus + 1) *
|
||||
sizeof(char *));
|
||||
m->lastpos = (pg_wchar **) malloc((g->nplus + 1) *
|
||||
sizeof(pg_wchar *));
|
||||
if (g->nplus > 0 && m->lastpos == NULL)
|
||||
{
|
||||
free(m->pmatch);
|
||||
@ -324,9 +347,9 @@ int eflags;
|
||||
}
|
||||
|
||||
if (m->pmatch != NULL)
|
||||
free((char *) m->pmatch);
|
||||
free((pg_wchar *) m->pmatch);
|
||||
if (m->lastpos != NULL)
|
||||
free((char *) m->lastpos);
|
||||
free((pg_wchar *) m->lastpos);
|
||||
STATETEARDOWN(m);
|
||||
return (0);
|
||||
}
|
||||
@ -336,27 +359,27 @@ int eflags;
|
||||
== static char *dissect(struct match *m, char *start, \
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* == stop (success) always */
|
||||
static pg_wchar * /* == stop (success) always */
|
||||
dissect(m, start, stop, startst, stopst)
|
||||
struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
{
|
||||
int i;
|
||||
sopno ss; /* start sop of current subRE */
|
||||
sopno es; /* end sop of current subRE */
|
||||
char *sp; /* start of string matched by it */
|
||||
char *stp; /* string matched by it cannot pass here */
|
||||
char *rest; /* start of rest of string */
|
||||
char *tail; /* string unmatched by rest of RE */
|
||||
pg_wchar *sp; /* start of string matched by it */
|
||||
pg_wchar *stp; /* string matched by it cannot pass here */
|
||||
pg_wchar *rest; /* start of rest of string */
|
||||
pg_wchar *tail; /* string unmatched by rest of RE */
|
||||
sopno ssub; /* start sop of subsubRE */
|
||||
sopno esub; /* end sop of subsubRE */
|
||||
char *ssp; /* start of string matched by subsubRE */
|
||||
char *sep; /* end of string matched by subsubRE */
|
||||
char *oldssp; /* previous ssp */
|
||||
char *dp;
|
||||
pg_wchar *ssp; /* start of string matched by subsubRE */
|
||||
pg_wchar *sep; /* end of string matched by subsubRE */
|
||||
pg_wchar *oldssp; /* previous ssp */
|
||||
pg_wchar *dp;
|
||||
|
||||
AT("diss", start, stop, startst, stopst);
|
||||
sp = start;
|
||||
@ -536,22 +559,22 @@ sopno stopst;
|
||||
== static char *backref(struct match *m, char *start, \
|
||||
== char *stop, sopno startst, sopno stopst, sopno lev);
|
||||
*/
|
||||
static char * /* == stop (success) or NULL (failure) */
|
||||
static pg_wchar * /* == stop (success) or NULL (failure) */
|
||||
backref(m, start, stop, startst, stopst, lev)
|
||||
struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
sopno lev; /* PLUS nesting level */
|
||||
{
|
||||
int i;
|
||||
sopno ss; /* start sop of current subRE */
|
||||
char *sp; /* start of string matched by it */
|
||||
pg_wchar *sp; /* start of string matched by it */
|
||||
sopno ssub; /* start sop of subsubRE */
|
||||
sopno esub; /* end sop of subsubRE */
|
||||
char *ssp; /* start of string matched by subsubRE */
|
||||
char *dp;
|
||||
pg_wchar *ssp; /* start of string matched by subsubRE */
|
||||
pg_wchar *dp;
|
||||
size_t len;
|
||||
int hard;
|
||||
sop s;
|
||||
@ -567,7 +590,7 @@ sopno lev; /* PLUS nesting level */
|
||||
switch (OP(s = m->g->strip[ss]))
|
||||
{
|
||||
case OCHAR:
|
||||
if (sp == stop || *sp++ != (char) OPND(s))
|
||||
if (sp == stop || *sp++ != (pg_wchar) OPND(s))
|
||||
return (NULL);
|
||||
break;
|
||||
case OANY:
|
||||
@ -750,23 +773,23 @@ sopno lev; /* PLUS nesting level */
|
||||
== static char *fast(struct match *m, char *start, \
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* where tentative match ended, or NULL */
|
||||
static pg_wchar * /* where tentative match ended, or NULL */
|
||||
fast(m, start, stop, startst, stopst)
|
||||
struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
{
|
||||
states st = m->st;
|
||||
states fresh = m->fresh;
|
||||
states tmp = m->tmp;
|
||||
char *p = start;
|
||||
pg_wchar *p = start;
|
||||
int c = (start == m->beginp) ? OUT : *(start - 1);
|
||||
int lastc; /* previous c */
|
||||
int flagch;
|
||||
int i;
|
||||
char *coldp; /* last p after which no match was
|
||||
pg_wchar *coldp; /* last p after which no match was
|
||||
* underway */
|
||||
|
||||
CLEAR(st);
|
||||
@ -849,23 +872,23 @@ sopno stopst;
|
||||
== static char *slow(struct match *m, char *start, \
|
||||
== char *stop, sopno startst, sopno stopst);
|
||||
*/
|
||||
static char * /* where it ended */
|
||||
static pg_wchar * /* where it ended */
|
||||
slow(m, start, stop, startst, stopst)
|
||||
struct match *m;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
{
|
||||
states st = m->st;
|
||||
states empty = m->empty;
|
||||
states tmp = m->tmp;
|
||||
char *p = start;
|
||||
pg_wchar *p = start;
|
||||
int c = (start == m->beginp) ? OUT : *(start - 1);
|
||||
int lastc; /* previous c */
|
||||
int flagch;
|
||||
int i;
|
||||
char *matchp; /* last p at which a match ended */
|
||||
pg_wchar *matchp; /* last p at which a match ended */
|
||||
|
||||
AT("slow", start, stop, startst, stopst);
|
||||
CLEAR(st);
|
||||
@ -978,8 +1001,8 @@ states aft; /* states already known reachable after */
|
||||
break;
|
||||
case OCHAR:
|
||||
/* only characters can match */
|
||||
assert(!NONCHAR(ch) || ch != (char) OPND(s));
|
||||
if (ch == (char) OPND(s))
|
||||
assert(!NONCHAR(ch) || ch != (pg_wchar) OPND(s));
|
||||
if (ch == (pg_wchar) OPND(s))
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OBOL:
|
||||
@ -1082,7 +1105,7 @@ states aft; /* states already known reachable after */
|
||||
static void
|
||||
print(m, caption, st, ch, d)
|
||||
struct match *m;
|
||||
char *caption;
|
||||
pg_wchar *caption;
|
||||
states st;
|
||||
int ch;
|
||||
FILE *d;
|
||||
@ -1109,16 +1132,16 @@ FILE *d;
|
||||
/*
|
||||
- at - print current situation
|
||||
== #ifdef REDEBUG
|
||||
== static void at(struct match *m, char *title, char *start, char *stop, \
|
||||
== static void at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, \
|
||||
== sopno startst, sopno stopst);
|
||||
== #endif
|
||||
*/
|
||||
static void
|
||||
at(m, title, start, stop, startst, stopst)
|
||||
struct match *m;
|
||||
char *title;
|
||||
char *start;
|
||||
char *stop;
|
||||
pg_wchar *title;
|
||||
pg_wchar *start;
|
||||
pg_wchar *stop;
|
||||
sopno startst;
|
||||
sopno stopst;
|
||||
{
|
||||
@ -1143,13 +1166,24 @@ sopno stopst;
|
||||
* a matching debug.o, and this is convenient. It all disappears in
|
||||
* the non-debug compilation anyway, so it doesn't matter much.
|
||||
*/
|
||||
static char * /* -> representation */
|
||||
|
||||
|
||||
static int pg_isprint(int c)
|
||||
{
|
||||
#ifdef MB
|
||||
return(c >= 0 && c <= UCHAR_MAX && isprint(c));
|
||||
#else
|
||||
return(isprint(c));
|
||||
#endif
|
||||
}
|
||||
|
||||
static pg_wchar * /* -> representation */
|
||||
pchar(ch)
|
||||
int ch;
|
||||
{
|
||||
static char pbuf[10];
|
||||
static pg_wchar pbuf[10];
|
||||
|
||||
if (isprint(ch) || ch == ' ')
|
||||
if (pg_isprint(ch) || ch == ' ')
|
||||
sprintf(pbuf, "%c", ch);
|
||||
else
|
||||
sprintf(pbuf, "\\%o", ch);
|
||||
|
@ -62,8 +62,8 @@ static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
|
||||
*/
|
||||
struct parse
|
||||
{
|
||||
char *next; /* next character in RE */
|
||||
char *end; /* end of string (-> NUL normally) */
|
||||
pg_wchar *next; /* next character in RE */
|
||||
pg_wchar *end; /* end of string (-> NUL normally) */
|
||||
int error; /* has an error been seen? */
|
||||
sop *strip; /* malloced strip */
|
||||
sopno ssize; /* malloced strip size (allocated) */
|
||||
@ -93,7 +93,7 @@ extern "C"
|
||||
static void p_b_term(struct parse * p, cset *cs);
|
||||
static void p_b_cclass(struct parse * p, cset *cs);
|
||||
static void p_b_eclass(struct parse * p, cset *cs);
|
||||
static char p_b_symbol(struct parse * p);
|
||||
static pg_wchar p_b_symbol(struct parse * p);
|
||||
static char p_b_coll_elem(struct parse * p, int endc);
|
||||
static char othercase(int ch);
|
||||
static void bothcases(struct parse * p, int ch);
|
||||
@ -120,6 +120,10 @@ extern "C"
|
||||
static void stripsnug(struct parse * p, struct re_guts * g);
|
||||
static void findmust(struct parse * p, struct re_guts * g);
|
||||
static sopno pluscount(struct parse * p, struct re_guts * g);
|
||||
static int pg_isdigit(int c);
|
||||
static int pg_isalpha(int c);
|
||||
static int pg_isupper(int c);
|
||||
static int pg_islower(int c);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
@ -127,7 +131,7 @@ extern "C"
|
||||
#endif
|
||||
/* ========= end header generated by ./mkh ========= */
|
||||
|
||||
static char nuls[10]; /* place to point scanner in event of
|
||||
static pg_wchar nuls[10]; /* place to point scanner in event of
|
||||
* error */
|
||||
|
||||
/*
|
||||
@ -190,6 +194,9 @@ int cflags;
|
||||
struct parse *p = &pa;
|
||||
int i;
|
||||
size_t len;
|
||||
#ifdef MB
|
||||
pg_wchar *wcp;
|
||||
#endif
|
||||
|
||||
#ifdef REDEBUG
|
||||
#define GOODFLAGS(f) (f)
|
||||
@ -203,12 +210,31 @@ int cflags;
|
||||
|
||||
if (cflags & REG_PEND)
|
||||
{
|
||||
#ifdef MB
|
||||
wcp = preg->patsave;
|
||||
if (preg->re_endp < wcp)
|
||||
return (REG_INVARG);
|
||||
len = preg->re_endp - wcp;
|
||||
#else
|
||||
if (preg->re_endp < pattern)
|
||||
return (REG_INVARG);
|
||||
len = preg->re_endp - pattern;
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
#ifdef MB
|
||||
wcp = (pg_wchar *)malloc((strlen(pattern)+1) * sizeof(pg_wchar));
|
||||
if (wcp == NULL) {
|
||||
return (REG_ESPACE);
|
||||
}
|
||||
preg->patsave = wcp;
|
||||
(void)pg_mb2wchar((unsigned char *)pattern,wcp);
|
||||
len = pg_wchar_strlen(wcp);
|
||||
#else
|
||||
|
||||
len = strlen((char *) pattern);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
len = strlen((char *) pattern);
|
||||
|
||||
/* do the mallocs early so failure handling is easy */
|
||||
g = (struct re_guts *) malloc(sizeof(struct re_guts) +
|
||||
@ -227,7 +253,11 @@ int cflags;
|
||||
|
||||
/* set things up */
|
||||
p->g = g;
|
||||
p->next = (char *) pattern; /* convenience; we do not modify it */
|
||||
#ifdef MB
|
||||
p->next = wcp;
|
||||
#else
|
||||
p->next = pattern; /* convenience; we do not modify it */
|
||||
#endif
|
||||
p->end = p->next + len;
|
||||
p->error = 0;
|
||||
p->ncsalloc = 0;
|
||||
@ -342,7 +372,7 @@ static void
|
||||
p_ere_exp(p)
|
||||
struct parse *p;
|
||||
{
|
||||
char c;
|
||||
pg_wchar c;
|
||||
sopno pos;
|
||||
int count;
|
||||
int count2;
|
||||
@ -420,7 +450,7 @@ struct parse *p;
|
||||
break;
|
||||
case '{': /* okay as ordinary except if digit
|
||||
* follows */
|
||||
REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
|
||||
REQUIRE(!MORE() || !pg_isdigit(PEEK()), REG_BADRPT);
|
||||
/* FALLTHROUGH */
|
||||
default:
|
||||
ordinary(p, c);
|
||||
@ -432,7 +462,7 @@ struct parse *p;
|
||||
c = PEEK();
|
||||
/* we call { a repetition if followed by a digit */
|
||||
if (!(c == '*' || c == '+' || c == '?' ||
|
||||
(c == '{' && MORE2() && isdigit(PEEK2()))))
|
||||
(c == '{' && MORE2() && pg_isdigit(PEEK2()))))
|
||||
return; /* no repetition, we're done */
|
||||
NEXT();
|
||||
|
||||
@ -463,7 +493,7 @@ struct parse *p;
|
||||
count = p_count(p);
|
||||
if (EAT(','))
|
||||
{
|
||||
if (isdigit(PEEK()))
|
||||
if (pg_isdigit(PEEK()))
|
||||
{
|
||||
count2 = p_count(p);
|
||||
REQUIRE(count <= count2, REG_BADBR);
|
||||
@ -490,7 +520,7 @@ struct parse *p;
|
||||
return;
|
||||
c = PEEK();
|
||||
if (!(c == '*' || c == '+' || c == '?' ||
|
||||
(c == '{' && MORE2() && isdigit(PEEK2()))))
|
||||
(c == '{' && MORE2() && pg_isdigit(PEEK2()))))
|
||||
return;
|
||||
SETERROR(REG_BADRPT);
|
||||
}
|
||||
@ -568,7 +598,7 @@ int starordinary; /* is a leading * an ordinary character? */
|
||||
int i;
|
||||
sopno subno;
|
||||
|
||||
#define BACKSL (1<<CHAR_BIT)
|
||||
#define BACKSL (1<<24)
|
||||
|
||||
pos = HERE(); /* repetion op, if any, covers from here */
|
||||
|
||||
@ -577,7 +607,11 @@ int starordinary; /* is a leading * an ordinary character? */
|
||||
if (c == '\\')
|
||||
{
|
||||
REQUIRE(MORE(), REG_EESCAPE);
|
||||
#ifdef MB
|
||||
c = BACKSL | (pg_wchar) GETNEXT();
|
||||
#else
|
||||
c = BACKSL | (unsigned char) GETNEXT();
|
||||
#endif
|
||||
}
|
||||
switch (c)
|
||||
{
|
||||
@ -660,7 +694,7 @@ int starordinary; /* is a leading * an ordinary character? */
|
||||
count = p_count(p);
|
||||
if (EAT(','))
|
||||
{
|
||||
if (MORE() && isdigit(PEEK()))
|
||||
if (MORE() && pg_isdigit(PEEK()))
|
||||
{
|
||||
count2 = p_count(p);
|
||||
REQUIRE(count <= count2, REG_BADBR);
|
||||
@ -698,7 +732,7 @@ struct parse *p;
|
||||
int count = 0;
|
||||
int ndigits = 0;
|
||||
|
||||
while (MORE() && isdigit(PEEK()) && count <= DUPMAX)
|
||||
while (MORE() && pg_isdigit(PEEK()) && count <= DUPMAX)
|
||||
{
|
||||
count = count * 10 + (GETNEXT() - '0');
|
||||
ndigits++;
|
||||
@ -721,15 +755,27 @@ struct parse *p;
|
||||
{
|
||||
cset *cs = allocset(p);
|
||||
int invert = 0;
|
||||
#ifdef MB
|
||||
pg_wchar sp1[] = {'[', ':', '<', ':', ']', ']'};
|
||||
pg_wchar sp2[] = {'[', ':', '>', ':', ']', ']'};
|
||||
#endif
|
||||
|
||||
/* Dept of Truly Sickening Special-Case Kludges */
|
||||
#ifdef MB
|
||||
if (p->next + 5 < p->end && pg_wchar_strncmp(p->next, sp1, 6) == 0)
|
||||
#else
|
||||
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0)
|
||||
#endif
|
||||
{
|
||||
EMIT(OBOW, 0);
|
||||
NEXTn(6);
|
||||
return;
|
||||
}
|
||||
#ifdef MB
|
||||
if (p->next + 5 < p->end && pg_wchar_strncmp(p->next, sp2, 6) == 0)
|
||||
#else
|
||||
if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0)
|
||||
#endif
|
||||
{
|
||||
EMIT(OEOW, 0);
|
||||
NEXTn(6);
|
||||
@ -757,7 +803,7 @@ struct parse *p;
|
||||
int ci;
|
||||
|
||||
for (i = p->g->csetsize - 1; i >= 0; i--)
|
||||
if (CHIN(cs, i) && isalpha(i))
|
||||
if (CHIN(cs, i) && pg_isalpha(i))
|
||||
{
|
||||
ci = othercase(i);
|
||||
if (ci != i)
|
||||
@ -801,8 +847,8 @@ p_b_term(p, cs)
|
||||
struct parse *p;
|
||||
cset *cs;
|
||||
{
|
||||
char c;
|
||||
char start,
|
||||
pg_wchar c;
|
||||
pg_wchar start,
|
||||
finish;
|
||||
int i;
|
||||
|
||||
@ -857,6 +903,11 @@ cset *cs;
|
||||
finish = start;
|
||||
/* xxx what about signed chars here... */
|
||||
REQUIRE(start <= finish, REG_ERANGE);
|
||||
#ifdef MB
|
||||
if (CHlc(start) != CHlc(finish)) {
|
||||
SETERROR(REG_ERANGE);
|
||||
}
|
||||
#endif
|
||||
for (i = start; i <= finish; i++)
|
||||
CHadd(cs, i);
|
||||
break;
|
||||
@ -872,17 +923,21 @@ p_b_cclass(p, cs)
|
||||
struct parse *p;
|
||||
cset *cs;
|
||||
{
|
||||
char *sp = p->next;
|
||||
pg_wchar *sp = p->next;
|
||||
struct cclass *cp;
|
||||
size_t len;
|
||||
char *u;
|
||||
char c;
|
||||
|
||||
while (MORE() && isalpha(PEEK()))
|
||||
while (MORE() && pg_isalpha(PEEK()))
|
||||
NEXT();
|
||||
len = p->next - sp;
|
||||
for (cp = cclasses; cp->name != NULL; cp++)
|
||||
#ifdef MB
|
||||
if (pg_char_and_wchar_strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
|
||||
#else
|
||||
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
|
||||
#endif
|
||||
break;
|
||||
if (cp->name == NULL)
|
||||
{
|
||||
@ -919,11 +974,11 @@ cset *cs;
|
||||
- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
|
||||
== static char p_b_symbol(struct parse *p);
|
||||
*/
|
||||
static char /* value of symbol */
|
||||
static pg_wchar /* value of symbol */
|
||||
p_b_symbol(p)
|
||||
struct parse *p;
|
||||
{
|
||||
char value;
|
||||
pg_wchar value;
|
||||
|
||||
REQUIRE(MORE(), REG_EBRACK);
|
||||
if (!EATTWO('[', '.'))
|
||||
@ -944,7 +999,7 @@ p_b_coll_elem(p, endc)
|
||||
struct parse *p;
|
||||
int endc; /* name ended by endc,']' */
|
||||
{
|
||||
char *sp = p->next;
|
||||
pg_wchar *sp = p->next;
|
||||
struct cname *cp;
|
||||
int len;
|
||||
|
||||
@ -957,7 +1012,11 @@ int endc; /* name ended by endc,']' */
|
||||
}
|
||||
len = p->next - sp;
|
||||
for (cp = cnames; cp->name != NULL; cp++)
|
||||
#ifdef MB
|
||||
if (pg_char_and_wchar_strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
|
||||
#else
|
||||
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
|
||||
#endif
|
||||
return (cp->code); /* known name */
|
||||
if (len == 1)
|
||||
return (*sp); /* single character */
|
||||
@ -973,10 +1032,10 @@ static char /* if no counterpart, return ch */
|
||||
othercase(ch)
|
||||
int ch;
|
||||
{
|
||||
assert(isalpha(ch));
|
||||
if (isupper(ch))
|
||||
assert(pg_isalpha(ch));
|
||||
if (pg_isupper(ch))
|
||||
return (tolower(ch));
|
||||
else if (islower(ch))
|
||||
else if (pg_islower(ch))
|
||||
return (toupper(ch));
|
||||
else
|
||||
/* peculiar, but could happen */
|
||||
@ -994,9 +1053,9 @@ bothcases(p, ch)
|
||||
struct parse *p;
|
||||
int ch;
|
||||
{
|
||||
char *oldnext = p->next;
|
||||
char *oldend = p->end;
|
||||
char bracket[3];
|
||||
pg_wchar *oldnext = p->next;
|
||||
pg_wchar *oldend = p->end;
|
||||
pg_wchar bracket[3];
|
||||
|
||||
assert(othercase(ch) != ch);/* p_bracket() would recurse */
|
||||
p->next = bracket;
|
||||
@ -1021,12 +1080,16 @@ int ch;
|
||||
{
|
||||
cat_t *cap = p->g->categories;
|
||||
|
||||
if ((p->g->cflags & REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
|
||||
if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch)
|
||||
bothcases(p, ch);
|
||||
else
|
||||
{
|
||||
#ifdef MB
|
||||
EMIT(OCHAR, (pg_wchar) ch);
|
||||
#else
|
||||
EMIT(OCHAR, (unsigned char) ch);
|
||||
if (cap[ch] == 0)
|
||||
#endif
|
||||
if (ch >= CHAR_MIN && ch <= CHAR_MAX && cap[ch] == 0)
|
||||
cap[ch] = p->g->ncategories++;
|
||||
}
|
||||
}
|
||||
@ -1041,9 +1104,9 @@ static void
|
||||
nonnewline(p)
|
||||
struct parse *p;
|
||||
{
|
||||
char *oldnext = p->next;
|
||||
char *oldend = p->end;
|
||||
char bracket[4];
|
||||
pg_wchar *oldnext = p->next;
|
||||
pg_wchar *oldend = p->end;
|
||||
pg_wchar bracket[4];
|
||||
|
||||
p->next = bracket;
|
||||
p->end = bracket + 3;
|
||||
@ -1674,7 +1737,7 @@ struct re_guts *g;
|
||||
sop *newstart = 0;
|
||||
sopno newlen;
|
||||
sop s;
|
||||
char *cp;
|
||||
pg_wchar *cp;
|
||||
sopno i;
|
||||
|
||||
/* avoid making error situations worse */
|
||||
@ -1729,7 +1792,11 @@ struct re_guts *g;
|
||||
return;
|
||||
|
||||
/* turn it into a character string */
|
||||
#ifdef MB
|
||||
g->must = (pg_wchar *)malloc((size_t) (g->mlen + 1)*sizeof(pg_wchar));
|
||||
#else
|
||||
g->must = malloc((size_t) g->mlen + 1);
|
||||
#endif
|
||||
if (g->must == NULL)
|
||||
{ /* argh; just forget it */
|
||||
g->mlen = 0;
|
||||
@ -1742,7 +1809,7 @@ struct re_guts *g;
|
||||
while (OP(s = *scan++) != OCHAR)
|
||||
continue;
|
||||
assert(cp < g->must + g->mlen);
|
||||
*cp++ = (char) OPND(s);
|
||||
*cp++ = (pg_wchar) OPND(s);
|
||||
}
|
||||
assert(cp == g->must + g->mlen);
|
||||
*cp++ = '\0'; /* just on general principles */
|
||||
@ -1785,3 +1852,42 @@ struct re_guts *g;
|
||||
g->iflags |= BAD;
|
||||
return (maxnest);
|
||||
}
|
||||
|
||||
/*
|
||||
* some ctype functions with none-ascii-char guard
|
||||
*/
|
||||
static int pg_isdigit(int c)
|
||||
{
|
||||
#ifdef MB
|
||||
return(c >= 0 && c <= UCHAR_MAX && isdigit(c));
|
||||
#else
|
||||
return(isdigit(c));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int pg_isalpha(int c)
|
||||
{
|
||||
#ifdef MB
|
||||
return(c >= 0 && c <= UCHAR_MAX && isalpha(c));
|
||||
#else
|
||||
return(isalpha(c));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int pg_isupper(int c)
|
||||
{
|
||||
#ifdef MB
|
||||
return(c >= 0 && c <= UCHAR_MAX && isupper(c));
|
||||
#else
|
||||
return(isupper(c));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int pg_islower(int c)
|
||||
{
|
||||
#ifdef MB
|
||||
return(c >= 0 && c <= UCHAR_MAX && islower(c));
|
||||
#else
|
||||
return(islower(c));
|
||||
#endif
|
||||
}
|
||||
|
@ -52,6 +52,7 @@ static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94";
|
||||
|
||||
#include <regex/regex.h>
|
||||
#include <regex/utils.h>
|
||||
#include <regex/regex2.h>
|
||||
|
||||
/* ========= begin header generated by ./mkh ========= */
|
||||
#ifdef __cplusplus
|
||||
@ -214,7 +215,11 @@ char *localbuf;
|
||||
struct rerr *r;
|
||||
|
||||
for (r = rerrs; r->code != 0; r++)
|
||||
#ifdef MB
|
||||
if (pg_char_and_wchar_strcmp(r->name, preg->re_endp) == 0)
|
||||
#else
|
||||
if (strcmp(r->name, preg->re_endp) == 0)
|
||||
#endif
|
||||
break;
|
||||
if (r->code == 0)
|
||||
return ("0");
|
||||
|
@ -164,6 +164,11 @@ int eflags;
|
||||
{
|
||||
struct re_guts *g = preg->re_g;
|
||||
|
||||
#ifdef MB
|
||||
pg_wchar *str;
|
||||
int sts;
|
||||
#endif
|
||||
|
||||
#ifdef REDEBUG
|
||||
#define GOODFLAGS(f) (f)
|
||||
#else
|
||||
@ -177,8 +182,24 @@ int eflags;
|
||||
return (REG_BADPAT);
|
||||
eflags = GOODFLAGS(eflags);
|
||||
|
||||
#ifdef MB
|
||||
str = (pg_wchar *)malloc((strlen(string)+1) * sizeof(pg_wchar));
|
||||
if (!str) {
|
||||
return(REG_ESPACE);
|
||||
}
|
||||
(void)pg_mb2wchar((unsigned char *)string,str);
|
||||
if (g->nstates <= CHAR_BIT * sizeof(states1) && !(eflags & REG_LARGE))
|
||||
return (smatcher(g, (char *) string, nmatch, pmatch, eflags));
|
||||
sts = smatcher(g, str, nmatch, pmatch, eflags);
|
||||
else
|
||||
return (lmatcher(g, (char *) string, nmatch, pmatch, eflags));
|
||||
sts = lmatcher(g, str, nmatch, pmatch, eflags);
|
||||
free((char *)str);
|
||||
return(sts);
|
||||
|
||||
# else
|
||||
|
||||
if (g->nstates <= CHAR_BIT * sizeof(states1) && !(eflags & REG_LARGE))
|
||||
return (smatcher(g, (pg_wchar *) string, nmatch, pmatch, eflags));
|
||||
else
|
||||
return (lmatcher(g, (pg_wchar *) string, nmatch, pmatch, eflags));
|
||||
#endif
|
||||
}
|
||||
|
@ -68,7 +68,11 @@ regex_t *preg;
|
||||
return;
|
||||
preg->re_magic = 0; /* mark it invalid */
|
||||
g->magic = 0; /* mark it invalid */
|
||||
|
||||
#ifdef MB
|
||||
if (preg->patsave != NULL) {
|
||||
free((char *)preg->patsave);
|
||||
}
|
||||
#endif
|
||||
if (g->strip != NULL)
|
||||
free((char *) g->strip);
|
||||
if (g->sets != NULL)
|
||||
|
33
src/backend/regex/utftest.c
Normal file
33
src/backend/regex/utftest.c
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* testing of utf2wchar()
|
||||
* $Id: utftest.c,v 1.1 1998/03/15 07:38:37 scrappy Exp $
|
||||
*/
|
||||
#include <regex/regex.h>
|
||||
#include <regex/utils.h>
|
||||
#include <regex/regex2.h>
|
||||
|
||||
#include <regex/pg_wchar.h>
|
||||
|
||||
main()
|
||||
{
|
||||
/* Example 1 from RFC2044 */
|
||||
char utf1[] = {0x41,0xe2,0x89,0xa2,0xce,0x91,0x2e,0};
|
||||
/* Example 2 from RFC2044 */
|
||||
char utf2[] = {0x48,0x69,0x20,0x4d,0x6f,0x6d,0x20,0xe2,0x98,0xba,0x21,0};
|
||||
/* Example 3 from RFC2044 */
|
||||
char utf3[] = {0xe6,0x97,0xa5,0xe6,0x9c,0xac,0xe8,0xaa,0x9e,0};
|
||||
char *utf[] = {utf1,utf2,utf3};
|
||||
pg_wchar ucs[128];
|
||||
pg_wchar *p;
|
||||
int i;
|
||||
|
||||
for (i=0;i<sizeof(utf)/sizeof(char *);i++) {
|
||||
pg_utf2wchar(utf[i],ucs);
|
||||
p = ucs;
|
||||
while(*p) {
|
||||
printf("%04x ",*p);
|
||||
p++;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
348
src/backend/regex/utils.c
Normal file
348
src/backend/regex/utils.c
Normal file
@ -0,0 +1,348 @@
|
||||
/*
|
||||
* misc conversion functions between pg_wchar and other encodings.
|
||||
* Tatsuo Ishii
|
||||
* $Id: utils.c,v 1.1 1998/03/15 07:38:39 scrappy Exp $
|
||||
*/
|
||||
#include <regex/pg_wchar.h>
|
||||
/*
|
||||
* convert EUC to pg_wchar (EUC process code)
|
||||
* caller should allocate enough space for "to"
|
||||
*/
|
||||
static void pg_euc2wchar(const unsigned char *from, pg_wchar *to)
|
||||
{
|
||||
while (*from) {
|
||||
if (*from == SS2) {
|
||||
from++;
|
||||
*to = *from++;
|
||||
} else if (*from == SS3) {
|
||||
from++;
|
||||
*to = *from++ << 8;
|
||||
*to |= 0x3f & *from++;
|
||||
} else if (*from & 0x80) {
|
||||
*to = *from++ << 8;
|
||||
*to |= *from++;
|
||||
} else {
|
||||
*to = *from++;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
static void pg_eucjp2wchar(const unsigned char *from, pg_wchar *to)
|
||||
{
|
||||
pg_euc2wchar(from,to);
|
||||
}
|
||||
|
||||
static void pg_euckr2wchar(const unsigned char *from, pg_wchar *to)
|
||||
{
|
||||
pg_euc2wchar(from,to);
|
||||
}
|
||||
|
||||
static void pg_eucch2wchar(const unsigned char *from, pg_wchar *to)
|
||||
{
|
||||
while (*from) {
|
||||
if (*from == SS2) {
|
||||
from++;
|
||||
*to = 0x3f00 & (*from++ << 8);
|
||||
*to = *from++;
|
||||
} else if (*from == SS3) {
|
||||
from++;
|
||||
*to = *from++ << 8;
|
||||
*to |= 0x3f & *from++;
|
||||
} else if (*from & 0x80) {
|
||||
*to = *from++ << 8;
|
||||
*to |= *from++;
|
||||
} else {
|
||||
*to = *from++;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
static void pg_euccn2wchar(const unsigned char *from, pg_wchar *to)
|
||||
{
|
||||
while (*from) {
|
||||
if (*from == SS2) {
|
||||
from++;
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++ << 8;
|
||||
*to |= *from++;
|
||||
} else if (*from == SS3) {
|
||||
from++;
|
||||
*to = *from++ << 8;
|
||||
*to |= 0x3f & *from++;
|
||||
} else if (*from & 0x80) {
|
||||
*to = *from++ << 8;
|
||||
*to |= *from++;
|
||||
} else {
|
||||
*to = *from++;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* convert UTF-8 to pg_wchar (UCS-2)
|
||||
* caller should allocate enough space for "to"
|
||||
*/
|
||||
static void pg_utf2wchar(const unsigned char *from, pg_wchar *to)
|
||||
{
|
||||
unsigned char c1,c2,c3;
|
||||
while (*from) {
|
||||
if ((*from & 0x80) == 0) {
|
||||
*to = *from++;
|
||||
} else if ((*from & 0xe0) == 0xc0) {
|
||||
c1 = *from++ & 0x1f;
|
||||
c2 = *from++ & 0x3f;
|
||||
*to = c1 << 6;
|
||||
*to |= c2;
|
||||
} else if ((*from & 0xe0) == 0xe0) {
|
||||
c1 = *from++ & 0x0f;
|
||||
c2 = *from++ & 0x3f;
|
||||
c3 = *from++ & 0x3f;
|
||||
*to = c1 << 12;
|
||||
*to |= c2 << 6;
|
||||
*to |= c3;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* convert mule internal code to pg_wchar.
|
||||
* in this case pg_wchar consists of following 4 bytes:
|
||||
*
|
||||
* 0x00(unused)
|
||||
* 0x00(ASCII)|leading character (one of LC1, LC12, LC2 or LC22)
|
||||
* 0x00(ASCII,1 byte code)|other than 0x00(2 byte code)
|
||||
* the lowest byte of the code
|
||||
*
|
||||
* note that Type N (variable length byte encoding) cannot be represented by
|
||||
* this schema. sorry.
|
||||
* caller should allocate enough space for "to"
|
||||
*/
|
||||
static void pg_mule2wchar(const unsigned char *from, pg_wchar *to)
|
||||
{
|
||||
while (*from) {
|
||||
if (IS_LC1(*from)) {
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++;
|
||||
} else if (IS_LCPRV1(*from)) {
|
||||
from++;
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++;
|
||||
} else if (IS_LC2(*from)) {
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++ << 8;
|
||||
*to |= *from++;
|
||||
} else if (IS_LCPRV2(*from)) {
|
||||
from++;
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++ << 8;
|
||||
*to |= *from++;
|
||||
} else { /* assume ASCII */
|
||||
*to = *from++;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* convert EUC to pg_wchar (EUC process code)
|
||||
* caller should allocate enough space for "to"
|
||||
* len: length of from.
|
||||
* "from" not necessarily null terminated.
|
||||
*/
|
||||
static void pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
||||
{
|
||||
while (*from && len > 0) {
|
||||
if (*from == SS2) {
|
||||
from++;
|
||||
len--;
|
||||
*to = 0xff & *from++;
|
||||
len--;
|
||||
} else if (*from == SS3) {
|
||||
from++;
|
||||
*to = *from++ << 8;
|
||||
*to |= 0x3f & *from++;
|
||||
len -= 3;
|
||||
} else if (*from & 0x80) {
|
||||
*to = *from++ << 8;
|
||||
*to |= *from++;
|
||||
len -= 2;
|
||||
} else {
|
||||
*to = *from++;
|
||||
len--;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
static void pg_eucjp2wchar_with_len
|
||||
(const unsigned char *from, pg_wchar *to, int len)
|
||||
{
|
||||
pg_euc2wchar_with_len(from,to,len);
|
||||
}
|
||||
|
||||
static void pg_euckr2wchar_with_len
|
||||
(const unsigned char *from, pg_wchar *to, int len)
|
||||
{
|
||||
pg_euc2wchar_with_len(from,to,len);
|
||||
}
|
||||
|
||||
static void pg_eucch2wchar_with_len
|
||||
(const unsigned char *from, pg_wchar *to, int len)
|
||||
{
|
||||
while (*from && len > 0) {
|
||||
if (*from == SS2) {
|
||||
from++;
|
||||
len--;
|
||||
*to = 0x3f00 & (*from++ << 8);
|
||||
*to = *from++;
|
||||
len -= 2;
|
||||
} else if (*from == SS3) {
|
||||
from++;
|
||||
*to = *from++ << 8;
|
||||
*to |= 0x3f & *from++;
|
||||
len -= 3;
|
||||
} else if (*from & 0x80) {
|
||||
*to = *from++ << 8;
|
||||
*to |= *from++;
|
||||
len -= 2;
|
||||
} else {
|
||||
*to = *from++;
|
||||
len--;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
static void pg_euccn2wchar_with_len
|
||||
(const unsigned char *from, pg_wchar *to, int len)
|
||||
{
|
||||
while (*from && len > 0) {
|
||||
if (*from == SS2) {
|
||||
from++;
|
||||
len--;
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++ << 8;
|
||||
*to |= *from++;
|
||||
len -= 3;
|
||||
} else if (*from == SS3) {
|
||||
from++;
|
||||
*to = *from++ << 8;
|
||||
*to |= 0x3f & *from++;
|
||||
len -= 3;
|
||||
} else if (*from & 0x80) {
|
||||
*to = *from++ << 8;
|
||||
*to |= *from++;
|
||||
len -= 2;
|
||||
} else {
|
||||
*to = *from++;
|
||||
len--;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* convert UTF-8 to pg_wchar (UCS-2)
|
||||
* caller should allocate enough space for "to"
|
||||
* len: length of from.
|
||||
* "from" not necessarily null terminated.
|
||||
*/
|
||||
static void pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
||||
{
|
||||
unsigned char c1,c2,c3;
|
||||
while (*from && len > 0) {
|
||||
if ((*from & 0x80) == 0) {
|
||||
*to = *from++;
|
||||
len--;
|
||||
} else if ((*from & 0xe0) == 0xc0) {
|
||||
c1 = *from++ & 0x1f;
|
||||
c2 = *from++ & 0x3f;
|
||||
len -= 2;
|
||||
*to = c1 << 6;
|
||||
*to |= c2;
|
||||
} else if ((*from & 0xe0) == 0xe0) {
|
||||
c1 = *from++ & 0x0f;
|
||||
c2 = *from++ & 0x3f;
|
||||
c3 = *from++ & 0x3f;
|
||||
len -= 3;
|
||||
*to = c1 << 12;
|
||||
*to |= c2 << 6;
|
||||
*to |= c3;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* convert mule internal code to pg_wchar
|
||||
* caller should allocate enough space for "to"
|
||||
* len: length of from.
|
||||
* "from" not necessarily null terminated.
|
||||
*/
|
||||
static void pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
||||
{
|
||||
while (*from && len > 0) {
|
||||
if (IS_LC1(*from)) {
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++;
|
||||
len -= 2;
|
||||
} else if (IS_LCPRV1(*from)) {
|
||||
from++;
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++;
|
||||
len -= 3;
|
||||
} else if (IS_LC2(*from)) {
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++ << 8;
|
||||
*to |= *from++;
|
||||
len -= 3;
|
||||
} else if (IS_LCPRV2(*from)) {
|
||||
from++;
|
||||
*to = *from++ << 16;
|
||||
*to |= *from++ << 8;
|
||||
*to |= *from++;
|
||||
len -= 4;
|
||||
} else { /* assume ASCII */
|
||||
*to = (unsigned char)*from++;
|
||||
len--;
|
||||
}
|
||||
to++;
|
||||
}
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
void (*mb2wchar)();
|
||||
void (*mb2wchar_with_len)();
|
||||
} pg_wchar_tbl;
|
||||
|
||||
static pg_wchar_tbl pg_wchar_table[] = {
|
||||
{pg_eucjp2wchar, pg_eucjp2wchar_with_len},
|
||||
{pg_eucch2wchar, pg_eucch2wchar_with_len},
|
||||
{pg_euckr2wchar, pg_euckr2wchar_with_len},
|
||||
{pg_euccn2wchar, pg_euccn2wchar_with_len},
|
||||
{pg_utf2wchar, pg_utf2wchar_with_len},
|
||||
{pg_mule2wchar, pg_mule2wchar_with_len}};
|
||||
|
||||
void pg_mb2wchar(const unsigned char *from, pg_wchar *to)
|
||||
{
|
||||
(*pg_wchar_table[MB].mb2wchar)(from,to);
|
||||
}
|
||||
|
||||
void pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
||||
{
|
||||
(*pg_wchar_table[MB].mb2wchar_with_len)(from,to,len);
|
||||
}
|
48
src/backend/regex/wstrcmp.c
Normal file
48
src/backend/regex/wstrcmp.c
Normal file
@ -0,0 +1,48 @@
|
||||
/*-
|
||||
* Copyright (c) 1990, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Chris Torek.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <regex/pg_wchar.h>
|
||||
|
||||
int
|
||||
pg_char_and_wchar_strcmp(s1, s2)
|
||||
register const char *s1;
|
||||
register const pg_wchar *s2;
|
||||
{
|
||||
while ((pg_wchar)*s1 == *s2++)
|
||||
if (*s1++ == 0)
|
||||
return (0);
|
||||
return (*(const unsigned char *)s1 - *(const pg_wchar *)(s2 - 1));
|
||||
}
|
83
src/backend/regex/wstrncmp.c
Normal file
83
src/backend/regex/wstrncmp.c
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from FreeBSD 2.2.1-RELEASE software.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <regex/pg_wchar.h>
|
||||
|
||||
int
|
||||
pg_wchar_strncmp(s1, s2, n)
|
||||
register const pg_wchar *s1, *s2;
|
||||
register size_t n;
|
||||
{
|
||||
|
||||
if (n == 0)
|
||||
return (0);
|
||||
do {
|
||||
if (*s1 != *s2++)
|
||||
return (*(const pg_wchar *)s1 -
|
||||
*(const pg_wchar *)(s2 - 1));
|
||||
if (*s1++ == 0)
|
||||
break;
|
||||
} while (--n != 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
pg_char_and_wchar_strncmp(s1, s2, n)
|
||||
register const char *s1;
|
||||
register const pg_wchar *s2;
|
||||
register size_t n;
|
||||
{
|
||||
|
||||
if (n == 0)
|
||||
return (0);
|
||||
do {
|
||||
if ((pg_wchar )*s1 != *s2++)
|
||||
return (*(const pg_wchar *)s1 -
|
||||
*(const pg_wchar *)(s2 - 1));
|
||||
if (*s1++ == 0)
|
||||
break;
|
||||
} while (--n != 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
size_t
|
||||
pg_wchar_strlen(str)
|
||||
const pg_wchar *str;
|
||||
{
|
||||
register const pg_wchar *s;
|
||||
|
||||
for (s = str; *s; ++s);
|
||||
return(s - str);
|
||||
}
|
Reference in New Issue
Block a user