mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
Mop-up for commit 85feb77aa09cda9ff3e12cf95c757c499dc25343.
Adjust commentary in regc_pg_locale.c to remove mention of the possibility of not having <wctype.h> functions, since we no longer consider that. Eliminate duplicate code in wparser_def.c by generalizing the p_iswhat macro to take a parameter saying what to return for non-ASCII chars in C locale. (That's not really a consequence of the USE_WIDE_UPPER_LOWER-ectomy, but I noticed it while doing that.)
This commit is contained in:
parent
85feb77aa0
commit
ed87e19807
@ -29,20 +29,20 @@
|
||||
*
|
||||
* 2. In the "default" collation (which is supposed to obey LC_CTYPE):
|
||||
*
|
||||
* 2a. When working in UTF8 encoding, we use the <wctype.h> functions if
|
||||
* available. This assumes that every platform uses Unicode codepoints
|
||||
* directly as the wchar_t representation of Unicode. On some platforms
|
||||
* 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
|
||||
* This assumes that every platform uses Unicode codepoints directly
|
||||
* as the wchar_t representation of Unicode. On some platforms
|
||||
* wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
|
||||
*
|
||||
* 2b. In all other encodings, or on machines that lack <wctype.h>, we use
|
||||
* the <ctype.h> functions for pg_wchar values up to 255, and punt for values
|
||||
* above that. This is only 100% correct in single-byte encodings such as
|
||||
* LATINn. However, non-Unicode multibyte encodings are mostly Far Eastern
|
||||
* character sets for which the properties being tested here aren't very
|
||||
* relevant for higher code values anyway. The difficulty with using the
|
||||
* <wctype.h> functions with non-Unicode multibyte encodings is that we can
|
||||
* have no certainty that the platform's wchar_t representation matches
|
||||
* what we do in pg_wchar conversions.
|
||||
* 2b. In all other encodings, we use the <ctype.h> functions for pg_wchar
|
||||
* values up to 255, and punt for values above that. This is 100% correct
|
||||
* only in single-byte encodings such as LATINn. However, non-Unicode
|
||||
* multibyte encodings are mostly Far Eastern character sets for which the
|
||||
* properties being tested here aren't very relevant for higher code values
|
||||
* anyway. The difficulty with using the <wctype.h> functions with
|
||||
* non-Unicode multibyte encodings is that we can have no certainty that
|
||||
* the platform's wchar_t representation matches what we do in pg_wchar
|
||||
* conversions.
|
||||
*
|
||||
* 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
|
||||
* Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
|
||||
|
@ -427,94 +427,45 @@ TParserCopyClose(TParser *prs)
|
||||
* - if locale is C then we use pgwstr instead of wstr.
|
||||
*/
|
||||
|
||||
#define p_iswhat(type) \
|
||||
#define p_iswhat(type, nonascii) \
|
||||
\
|
||||
static int \
|
||||
p_is##type(TParser *prs) { \
|
||||
Assert( prs->state ); \
|
||||
if ( prs->usewide ) \
|
||||
p_is##type(TParser *prs) \
|
||||
{ \
|
||||
Assert(prs->state); \
|
||||
if (prs->usewide) \
|
||||
{ \
|
||||
if ( prs->pgwstr ) \
|
||||
if (prs->pgwstr) \
|
||||
{ \
|
||||
unsigned int c = *(prs->pgwstr + prs->state->poschar); \
|
||||
if ( c > 0x7f ) \
|
||||
return 0; \
|
||||
return is##type( c ); \
|
||||
if (c > 0x7f) \
|
||||
return nonascii; \
|
||||
return is##type(c); \
|
||||
} \
|
||||
return isw##type( *( prs->wstr + prs->state->poschar ) ); \
|
||||
return isw##type(*(prs->wstr + prs->state->poschar)); \
|
||||
} \
|
||||
\
|
||||
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
|
||||
} \
|
||||
return is##type(*(unsigned char *) (prs->str + prs->state->posbyte)); \
|
||||
} \
|
||||
\
|
||||
static int \
|
||||
p_isnot##type(TParser *prs) { \
|
||||
p_isnot##type(TParser *prs) \
|
||||
{ \
|
||||
return !p_is##type(prs); \
|
||||
}
|
||||
|
||||
static int
|
||||
p_isalnum(TParser *prs)
|
||||
{
|
||||
Assert(prs->state);
|
||||
|
||||
if (prs->usewide)
|
||||
{
|
||||
if (prs->pgwstr)
|
||||
{
|
||||
unsigned int c = *(prs->pgwstr + prs->state->poschar);
|
||||
|
||||
/*
|
||||
* any non-ascii symbol with multibyte encoding with C-locale is
|
||||
* an alpha character
|
||||
*/
|
||||
if (c > 0x7f)
|
||||
return 1;
|
||||
|
||||
return isalnum(c);
|
||||
}
|
||||
|
||||
return iswalnum(*(prs->wstr + prs->state->poschar));
|
||||
}
|
||||
|
||||
return isalnum(*(unsigned char *) (prs->str + prs->state->posbyte));
|
||||
}
|
||||
static int
|
||||
p_isnotalnum(TParser *prs)
|
||||
{
|
||||
return !p_isalnum(prs);
|
||||
}
|
||||
|
||||
static int
|
||||
p_isalpha(TParser *prs)
|
||||
{
|
||||
Assert(prs->state);
|
||||
|
||||
if (prs->usewide)
|
||||
{
|
||||
if (prs->pgwstr)
|
||||
{
|
||||
unsigned int c = *(prs->pgwstr + prs->state->poschar);
|
||||
|
||||
/*
|
||||
* any non-ascii symbol with multibyte encoding with C-locale is
|
||||
* an alpha character
|
||||
*/
|
||||
if (c > 0x7f)
|
||||
return 1;
|
||||
|
||||
return isalpha(c);
|
||||
}
|
||||
|
||||
return iswalpha(*(prs->wstr + prs->state->poschar));
|
||||
}
|
||||
|
||||
return isalpha(*(unsigned char *) (prs->str + prs->state->posbyte));
|
||||
}
|
||||
|
||||
static int
|
||||
p_isnotalpha(TParser *prs)
|
||||
{
|
||||
return !p_isalpha(prs);
|
||||
}
|
||||
/*
|
||||
* In C locale with a multibyte encoding, any non-ASCII symbol is considered
|
||||
* an alpha character, but not a member of other char classes.
|
||||
*/
|
||||
p_iswhat(alnum, 1)
|
||||
p_iswhat(alpha, 1)
|
||||
p_iswhat(digit, 0)
|
||||
p_iswhat(lower, 0)
|
||||
p_iswhat(print, 0)
|
||||
p_iswhat(punct, 0)
|
||||
p_iswhat(space, 0)
|
||||
p_iswhat(upper, 0)
|
||||
p_iswhat(xdigit, 0)
|
||||
|
||||
/* p_iseq should be used only for ascii symbols */
|
||||
|
||||
@ -525,14 +476,6 @@ p_iseq(TParser *prs, char c)
|
||||
return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
|
||||
}
|
||||
|
||||
p_iswhat(digit)
|
||||
p_iswhat(lower)
|
||||
p_iswhat(print)
|
||||
p_iswhat(punct)
|
||||
p_iswhat(space)
|
||||
p_iswhat(upper)
|
||||
p_iswhat(xdigit)
|
||||
|
||||
static int
|
||||
p_isEOF(TParser *prs)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user