1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Fix localization support for multibyte encoding and C locale.

Slightly reworked patch from Tatsuo Ishii
This commit is contained in:
Teodor Sigaev
2007-01-15 15:16:28 +00:00
parent 7021d6f6c8
commit f2a01b0d5a
3 changed files with 144 additions and 52 deletions

View File

@ -1,4 +1,4 @@
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11 2006/10/04 00:29:47 momjian Exp $ */
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.12 2007/01/15 15:16:28 teodor Exp $ */
#include "postgres.h"
@ -40,16 +40,13 @@ TParserInit(char *str, int len)
#ifdef TS_USE_WIDE
/*
* Use wide char code only when max encoding length > 1 and ctype != C.
* Some operating systems fail with multi-byte encodings and a C locale.
* Also, for a C locale there is no need to process as multibyte. From
* backend/utils/adt/oracle_compat.c Teodor
* Use wide char code only when max encoding length > 1.
*/
if (prs->charmaxlen > 1 && !lc_ctype_is_c())
if (prs->charmaxlen > 1)
{
prs->usewide = true;
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr);
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1));
prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr);
}
else
@ -83,25 +80,99 @@ TParserClose(TParser * prs)
/*
* defining support function, equvalent is* macroses, but
* working with any possible encodings and locales
* working with any possible encodings and locales. Note,
* that with multibyte encoding and C-locale isw* function may fail
* or give wrong result. Note 2: multibyte encoding and C-locale
* often are used for Asian languages.
*/
#ifdef TS_USE_WIDE
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \
is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
if ( prs->usewide ) \
{ \
if ( lc_ctype_is_c() ) \
return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \
\
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
} \
\
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
}
static int
p_isalnum(TParser *prs)
{
Assert( prs->state );
if (prs->usewide)
{
if (lc_ctype_is_c())
{
unsigned int c = *(unsigned int*)(prs->wstr + prs->state->poschar);
/*
* any non-ascii symbol with multibyte encoding
* with C-locale is an alpha character
*/
if ( c > 0x7f )
return 1;
return isalnum(0xff & c);
}
return iswalnum( (wint_t)*( prs->wstr + prs->state->poschar));
}
return isalnum( *(unsigned char*)( prs->str + prs->state->posbyte ));
}
static int
p_isnotalnum(TParser *prs)
{
return !p_isalnum(prs);
}
static int
p_isalpha(TParser *prs)
{
Assert( prs->state );
if (prs->usewide)
{
if (lc_ctype_is_c())
{
unsigned int c = *(prs->wstr + prs->state->poschar);
/*
* any non-ascii symbol with multibyte encoding
* with C-locale is an alpha character
*/
if ( c > 0x7f )
return 1;
return isalpha(0xff & c);
}
return iswalpha( (wint_t)*( prs->wstr + prs->state->poschar));
}
return isalpha( *(unsigned char*)( prs->str + prs->state->posbyte ));
}
static int
p_isnotalpha(TParser *prs)
{
return !p_isalpha(prs);
}
/* p_iseq should be used only for ascii symbols */
@ -111,18 +182,19 @@ p_iseq(TParser * prs, char c)
Assert(prs->state);
return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
}
#else /* TS_USE_WIDE */
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
}
@ -132,10 +204,12 @@ p_iseq(TParser * prs, char c)
Assert(prs->state);
return (*(prs->str + prs->state->posbyte) == c) ? 1 : 0;
}
#endif /* TS_USE_WIDE */
p_iswhat(alnum)
p_iswhat(alpha)
#endif /* TS_USE_WIDE */
p_iswhat(digit)
p_iswhat(lower)
p_iswhat(print)