1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-29 10:41:53 +03:00

Merge duplicate upper/lower/initcap() routines in oracle_compat.c and

formatting.c to use common code;  remove duplicate functions and support
routines that are no longer needed.
This commit is contained in:
Bruce Momjian
2008-06-23 19:27:19 +00:00
parent eeee06919f
commit f6ec7430f9
3 changed files with 165 additions and 496 deletions

View File

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.80 2008/06/17 16:09:06 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.81 2008/06/23 19:27:19 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@ -29,292 +29,16 @@
#endif
#include "utils/builtins.h"
#include "utils/formatting.h"
#include "utils/pg_locale.h"
#include "mb/pg_wchar.h"
/*
* If the system provides the needed functions for wide-character manipulation
* (which are all standardized by C99), then we implement upper/lower/initcap
* using wide-character functions. Otherwise we use the traditional <ctype.h>
* functions, which of course will not work as desired in multibyte character
* sets. Note that in either case we are effectively assuming that the
* database character encoding matches the encoding implied by LC_CTYPE.
*/
#ifdef USE_WIDE_UPPER_LOWER
char *wstring_lower(char *str);
char *wstring_upper(char *str);
wchar_t *texttowcs(const text *txt);
text *wcstotext(const wchar_t *str, int ncodes);
#endif
static text *dotrim(const char *string, int stringlen,
const char *set, int setlen,
bool doltrim, bool dortrim);
#ifdef USE_WIDE_UPPER_LOWER
/*
* Convert a TEXT value into a palloc'd wchar string.
*/
wchar_t *
texttowcs(const text *txt)
{
int nbytes = VARSIZE_ANY_EXHDR(txt);
char *workstr;
wchar_t *result;
size_t ncodes;
/* Overflow paranoia */
if (nbytes < 0 ||
nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Need a null-terminated version of the input */
workstr = text_to_cstring(txt);
/* Output workspace cannot have more codes than input bytes */
result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
/* Do the conversion */
ncodes = mbstowcs(result, workstr, nbytes + 1);
if (ncodes == (size_t) -1)
{
/*
* Invalid multibyte character encountered. We try to give a useful
* error message by letting pg_verifymbstr check the string. But it's
* possible that the string is OK to us, and not OK to mbstowcs ---
* this suggests that the LC_CTYPE locale is different from the
* database encoding. Give a generic error message if verifymbstr
* can't find anything wrong.
*/
pg_verifymbstr(workstr, nbytes, false);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
}
Assert(ncodes <= (size_t) nbytes);
return result;
}
/*
* Convert a wchar string into a palloc'd TEXT value. The wchar string
* must be zero-terminated, but we also require the caller to pass the string
* length, since it will know it anyway in current uses.
*/
text *
wcstotext(const wchar_t *str, int ncodes)
{
text *result;
size_t nbytes;
/* Overflow paranoia */
if (ncodes < 0 ||
ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Make workspace certainly large enough for result */
result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
/* Do the conversion */
nbytes = wcstombs((char *) VARDATA(result), str,
(ncodes + 1) * MB_CUR_MAX);
if (nbytes == (size_t) -1)
{
/* Invalid multibyte character encountered ... shouldn't happen */
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale")));
}
Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
SET_VARSIZE(result, nbytes + VARHDRSZ);
return result;
}
#endif /* USE_WIDE_UPPER_LOWER */
/*
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
* To make use of the upper/lower functionality, we need to map UTF8 to
* UTF16, which for some reason mbstowcs and wcstombs won't do for us.
* This conversion layer takes care of it.
*/
#ifdef WIN32
/* texttowcs for the case of UTF8 to UTF16 */
static wchar_t *
win32_utf8_texttowcs(const text *txt)
{
int nbytes = VARSIZE_ANY_EXHDR(txt);
wchar_t *result;
int r;
/* Overflow paranoia */
if (nbytes < 0 ||
nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
/* stupid Microsloth API does not work for zero-length input */
if (nbytes == 0)
r = 0;
else
{
/* Do the conversion */
r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
result, nbytes);
if (r <= 0) /* assume it's NO_UNICODE_TRANSLATION */
{
/* see notes above about error reporting */
pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
}
}
/* Append trailing null wchar (MultiByteToWideChar won't have) */
Assert(r <= nbytes);
result[r] = 0;
return result;
}
/* wcstotext for the case of UTF16 to UTF8 */
static text *
win32_utf8_wcstotext(const wchar_t *str)
{
text *result;
int nbytes;
int r;
/* Compute size of output string (this *will* include trailing null) */
nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
if (nbytes <= 0) /* shouldn't happen */
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("UTF-16 to UTF-8 translation failed: %lu",
GetLastError())));
result = palloc(nbytes + VARHDRSZ);
r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
NULL, NULL);
if (r != nbytes) /* shouldn't happen */
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("UTF-16 to UTF-8 translation failed: %lu",
GetLastError())));
SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
return result;
}
/* interface layer to check which encoding is in use */
static wchar_t *
win32_texttowcs(const text *txt)
{
if (GetDatabaseEncoding() == PG_UTF8)
return win32_utf8_texttowcs(txt);
else
return texttowcs(txt);
}
static text *
win32_wcstotext(const wchar_t *str, int ncodes)
{
if (GetDatabaseEncoding() == PG_UTF8)
return win32_utf8_wcstotext(str);
else
return wcstotext(str, ncodes);
}
/* use macros to cause routines below to call interface layer */
#define texttowcs win32_texttowcs
#define wcstotext win32_wcstotext
#endif /* WIN32 */
#ifdef USE_WIDE_UPPER_LOWER
/*
* string_upper and string_lower are used for correct multibyte upper/lower
* transformations localized strings. Returns pointers to transformated
* string.
*/
char *
wstring_upper(char *str)
{
wchar_t *workspace;
text *in_text;
text *out_text;
char *result;
int i;
in_text = cstring_to_text(str);
workspace = texttowcs(in_text);
for (i = 0; workspace[i] != 0; i++)
workspace[i] = towupper(workspace[i]);
out_text = wcstotext(workspace, i);
result = text_to_cstring(out_text);
pfree(workspace);
pfree(in_text);
pfree(out_text);
return result;
}
char *
wstring_lower(char *str)
{
wchar_t *workspace;
text *in_text;
text *out_text;
char *result;
int i;
in_text = cstring_to_text(str);
workspace = texttowcs(in_text);
for (i = 0; workspace[i] != 0; i++)
workspace[i] = towlower(workspace[i]);
out_text = wcstotext(workspace, i);
result = text_to_cstring(out_text);
pfree(workspace);
pfree(in_text);
pfree(out_text);
return result;
}
#endif /* USE_WIDE_UPPER_LOWER */
/********************************************************************
*
* lower
@ -332,52 +56,15 @@ wstring_lower(char *str)
Datum
lower(PG_FUNCTION_ARGS)
{
#ifdef USE_WIDE_UPPER_LOWER
text *in_string = PG_GETARG_TEXT_PP(0);
char *out_string;
text *result;
/*
* Use wide char code only when max encoding length > 1 and ctype != C.
* Some operating systems fail with multi-byte encodings and a C locale.
* Also, for a C locale there is no need to process as multibyte.
*/
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
text *string = PG_GETARG_TEXT_PP(0);
text *result;
wchar_t *workspace;
int i;
out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
result = cstring_to_text(out_string);
pfree(out_string);
workspace = texttowcs(string);
for (i = 0; workspace[i] != 0; i++)
workspace[i] = towlower(workspace[i]);
result = wcstotext(workspace, i);
pfree(workspace);
PG_RETURN_TEXT_P(result);
}
else
#endif /* USE_WIDE_UPPER_LOWER */
{
text *string = PG_GETARG_TEXT_P_COPY(0);
char *ptr;
int m;
/*
* Since we copied the string, we can scribble directly on the value
*/
ptr = VARDATA(string);
m = VARSIZE(string) - VARHDRSZ;
while (m-- > 0)
{
*ptr = tolower((unsigned char) *ptr);
ptr++;
}
PG_RETURN_TEXT_P(string);
}
PG_RETURN_TEXT_P(result);
}
@ -398,52 +85,15 @@ lower(PG_FUNCTION_ARGS)
Datum
upper(PG_FUNCTION_ARGS)
{
#ifdef USE_WIDE_UPPER_LOWER
text *in_string = PG_GETARG_TEXT_PP(0);
char *out_string;
text *result;
/*
* Use wide char code only when max encoding length > 1 and ctype != C.
* Some operating systems fail with multi-byte encodings and a C locale.
* Also, for a C locale there is no need to process as multibyte.
*/
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
text *string = PG_GETARG_TEXT_PP(0);
text *result;
wchar_t *workspace;
int i;
out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
result = cstring_to_text(out_string);
pfree(out_string);
workspace = texttowcs(string);
for (i = 0; workspace[i] != 0; i++)
workspace[i] = towupper(workspace[i]);
result = wcstotext(workspace, i);
pfree(workspace);
PG_RETURN_TEXT_P(result);
}
else
#endif /* USE_WIDE_UPPER_LOWER */
{
text *string = PG_GETARG_TEXT_P_COPY(0);
char *ptr;
int m;
/*
* Since we copied the string, we can scribble directly on the value
*/
ptr = VARDATA(string);
m = VARSIZE(string) - VARHDRSZ;
while (m-- > 0)
{
*ptr = toupper((unsigned char) *ptr);
ptr++;
}
PG_RETURN_TEXT_P(string);
}
PG_RETURN_TEXT_P(result);
}
@ -467,64 +117,15 @@ upper(PG_FUNCTION_ARGS)
Datum
initcap(PG_FUNCTION_ARGS)
{
#ifdef USE_WIDE_UPPER_LOWER
text *in_string = PG_GETARG_TEXT_PP(0);
char *out_string;
text *result;
/*
* Use wide char code only when max encoding length > 1 and ctype != C.
* Some operating systems fail with multi-byte encodings and a C locale.
* Also, for a C locale there is no need to process as multibyte.
*/
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
text *string = PG_GETARG_TEXT_PP(0);
text *result;
wchar_t *workspace;
int wasalnum = 0;
int i;
out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
result = cstring_to_text(out_string);
pfree(out_string);
workspace = texttowcs(string);
for (i = 0; workspace[i] != 0; i++)
{
if (wasalnum)
workspace[i] = towlower(workspace[i]);
else
workspace[i] = towupper(workspace[i]);
wasalnum = iswalnum(workspace[i]);
}
result = wcstotext(workspace, i);
pfree(workspace);
PG_RETURN_TEXT_P(result);
}
else
#endif /* USE_WIDE_UPPER_LOWER */
{
text *string = PG_GETARG_TEXT_P_COPY(0);
int wasalnum = 0;
char *ptr;
int m;
/*
* Since we copied the string, we can scribble directly on the value
*/
ptr = VARDATA(string);
m = VARSIZE(string) - VARHDRSZ;
while (m-- > 0)
{
if (wasalnum)
*ptr = tolower((unsigned char) *ptr);
else
*ptr = toupper((unsigned char) *ptr);
wasalnum = isalnum((unsigned char) *ptr);
ptr++;
}
PG_RETURN_TEXT_P(string);
}
PG_RETURN_TEXT_P(result);
}