mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Add conver/convert2 functions. They are similar to the SQL99's convert.
This commit is contained in:
parent
872cd63d43
commit
ab9b6c45cf
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.75 2001/08/06 18:17:42 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.76 2001/08/15 07:07:40 ishii Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -209,6 +209,19 @@ PG_char_to_encoding(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_INT32(0);
|
||||
}
|
||||
|
||||
Datum
|
||||
pg_convert(PG_FUNCTION_ARGS)
|
||||
{
|
||||
elog(ERROR, "convert is not supported. To use convert, you need to enable multibyte capability");
|
||||
return DirectFunctionCall1(textin, CStringGetDatum(""));
|
||||
}
|
||||
|
||||
Datum
|
||||
pg_convert2(PG_FUNCTION_ARGS)
|
||||
{
|
||||
elog(ERROR, "convert is not supported. To use convert, you need to enable multibyte capability");
|
||||
return DirectFunctionCall1(textin, CStringGetDatum(""));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
|
@ -3,7 +3,7 @@
|
||||
* client encoding and server internal encoding.
|
||||
* (currently mule internal code (mic) is used)
|
||||
* Tatsuo Ishii
|
||||
* $Id: mbutils.c,v 1.18 2001/07/15 11:07:36 ishii Exp $
|
||||
* $Id: mbutils.c,v 1.19 2001/08/15 07:07:40 ishii Exp $
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
@ -33,6 +33,67 @@ pg_get_enc_ent(int encoding)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find appropriate encoding conversion functions. If no such
|
||||
* functions found, returns -1.
|
||||
*
|
||||
* Arguments:
|
||||
*
|
||||
* src, dest (in): source and destination encoding ids
|
||||
*
|
||||
* src_to_mic (out): pointer to a function which converts src to
|
||||
* mic/unicode according to dest. if src == mic/unicode or no
|
||||
* appropriate function found, set to 0.
|
||||
*
|
||||
* dest_from_mic (out): pointer to a function which converts
|
||||
* mic/unicode to dest according to src. if dest == mic/unicode or no
|
||||
* appropriate function found, set to 0.
|
||||
*/
|
||||
int
|
||||
pg_find_encoding_converters(int src, int dest, void (**src_to_mic)(), void (**dest_from_mic)())
|
||||
{
|
||||
if (src == dest)
|
||||
{ /* src == dest? */
|
||||
*src_to_mic = *dest_from_mic = 0;
|
||||
}
|
||||
else if (src == MULE_INTERNAL)
|
||||
{ /* src == MULE_INETRNAL? */
|
||||
*dest_from_mic = pg_get_enc_ent(dest)->from_mic;
|
||||
if (*dest_from_mic == 0)
|
||||
return (-1);
|
||||
*src_to_mic = 0;
|
||||
}
|
||||
else if (dest == MULE_INTERNAL)
|
||||
{ /* dest == MULE_INETRNAL? */
|
||||
*src_to_mic = pg_get_enc_ent(src)->to_mic;
|
||||
if (*src_to_mic == 0)
|
||||
return (-1);
|
||||
*dest_from_mic = 0;
|
||||
}
|
||||
else if (src == UNICODE)
|
||||
{ /* src == UNICODE? */
|
||||
*dest_from_mic = pg_get_enc_ent(dest)->from_unicode;
|
||||
if (*dest_from_mic == 0)
|
||||
return (-1);
|
||||
*src_to_mic = 0;
|
||||
}
|
||||
else if (dest == UNICODE)
|
||||
{ /* dest == UNICODE? */
|
||||
*src_to_mic = pg_get_enc_ent(src)->to_unicode;
|
||||
if (*src_to_mic == 0)
|
||||
return (-1);
|
||||
*dest_from_mic = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*src_to_mic = pg_get_enc_ent(src)->to_mic;
|
||||
*dest_from_mic = pg_get_enc_ent(dest)->from_mic;
|
||||
if (*src_to_mic == 0 || *dest_from_mic == 0)
|
||||
return (-1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* set the client encoding. if encoding conversion between
|
||||
* client/server encoding is not supported, returns -1
|
||||
@ -40,59 +101,15 @@ pg_get_enc_ent(int encoding)
|
||||
int
|
||||
pg_set_client_encoding(int encoding)
|
||||
{
|
||||
int current_server_encoding = GetDatabaseEncoding();
|
||||
int current_server_encoding = GetDatabaseEncoding();
|
||||
|
||||
if (pg_find_encoding_converters(encoding, current_server_encoding, &client_to_mic, &server_from_mic) < 0)
|
||||
return (-1);
|
||||
client_encoding = encoding;
|
||||
|
||||
if (client_encoding == current_server_encoding)
|
||||
{ /* server == client? */
|
||||
client_to_mic = client_from_mic = 0;
|
||||
server_to_mic = server_from_mic = 0;
|
||||
}
|
||||
else if (current_server_encoding == MULE_INTERNAL)
|
||||
{ /* server == MULE_INETRNAL? */
|
||||
client_to_mic = pg_get_enc_ent(encoding)->to_mic;
|
||||
client_from_mic = pg_get_enc_ent(encoding)->from_mic;
|
||||
server_to_mic = server_from_mic = 0;
|
||||
if (client_to_mic == 0 || client_from_mic == 0)
|
||||
return (-1);
|
||||
}
|
||||
else if (encoding == MULE_INTERNAL)
|
||||
{ /* client == MULE_INETRNAL? */
|
||||
client_to_mic = client_from_mic = 0;
|
||||
server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
|
||||
server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
|
||||
if (server_to_mic == 0 || server_from_mic == 0)
|
||||
return (-1);
|
||||
}
|
||||
else if (current_server_encoding == UNICODE)
|
||||
{ /* server == UNICODE? */
|
||||
client_to_mic = pg_get_enc_ent(encoding)->to_unicode;
|
||||
client_from_mic = pg_get_enc_ent(encoding)->from_unicode;
|
||||
server_to_mic = server_from_mic = 0;
|
||||
if (client_to_mic == 0 || client_from_mic == 0)
|
||||
return (-1);
|
||||
}
|
||||
else if (encoding == UNICODE)
|
||||
{ /* client == UNICODE? */
|
||||
client_to_mic = client_from_mic = 0;
|
||||
server_to_mic = pg_get_enc_ent(current_server_encoding)->to_unicode;
|
||||
server_from_mic = pg_get_enc_ent(current_server_encoding)->from_unicode;
|
||||
if (server_to_mic == 0 || server_from_mic == 0)
|
||||
return (-1);
|
||||
}
|
||||
else
|
||||
{
|
||||
client_to_mic = pg_get_enc_ent(encoding)->to_mic;
|
||||
client_from_mic = pg_get_enc_ent(encoding)->from_mic;
|
||||
server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
|
||||
server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
|
||||
if (client_to_mic == 0 || client_from_mic == 0)
|
||||
return (-1);
|
||||
if (server_to_mic == 0 || server_from_mic == 0)
|
||||
return (-1);
|
||||
}
|
||||
return (0);
|
||||
if (pg_find_encoding_converters(current_server_encoding, encoding, &server_to_mic, &client_from_mic) < 0)
|
||||
return (-1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -109,6 +126,141 @@ pg_get_client_encoding()
|
||||
return (client_encoding);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert src encoding and returns it. Actual conversion is done by
|
||||
* src_to_mic and dest_from_mic, which can be obtained by
|
||||
* pg_find_encoding_converters(). The reason we require two conversion
|
||||
* functions is that we have an intermediate encoding: MULE_INTERNAL
|
||||
* Using intermediate encodings will reduce the number of functions
|
||||
* doing encoding conversions. Special case is either src or dest is
|
||||
* the intermediate encoding itself. In this case, you don't need src
|
||||
* or dest (setting 0 will indicate there's no conversion
|
||||
* function). Another case is you have direct-conversion function from
|
||||
* src to dest. In this case either src_to_mic or dest_from_mic could
|
||||
* be set to 0 also.
|
||||
*
|
||||
* Note that If src or dest is UNICODE, we have to do
|
||||
* direct-conversion, since we don't support conversion bwteen UNICODE
|
||||
* and MULE_INTERNAL, we cannot go through MULE_INTERNAL.
|
||||
*
|
||||
* CASE 1: if no conversion is required, then the given pointer s is returned.
|
||||
*
|
||||
* CASE 2: if conversion is required, a palloc'd string is returned.
|
||||
*
|
||||
* Callers must check whether return value differs from passed value
|
||||
* to determine whether to pfree the result or not!
|
||||
*
|
||||
* Note: we assume that conversion cannot cause more than a 4-to-1 growth
|
||||
* in the length of the string --- is this enough? */
|
||||
|
||||
unsigned char *
|
||||
pg_do_encoding_conversion(unsigned char *src, int len, void (*src_to_mic)(), void (*dest_from_mic)())
|
||||
{
|
||||
unsigned char *result = src;
|
||||
unsigned char *buf;
|
||||
|
||||
if (src_to_mic)
|
||||
{
|
||||
buf = (unsigned char *) palloc(len * 4 + 1);
|
||||
(*src_to_mic) (result, buf, len);
|
||||
result = buf;
|
||||
len = strlen(result);
|
||||
}
|
||||
if (dest_from_mic)
|
||||
{
|
||||
buf = (unsigned char *) palloc(len * 4 + 1);
|
||||
(*dest_from_mic) (result, buf, len);
|
||||
if (result != src)
|
||||
pfree(result); /* release first buffer */
|
||||
result = buf;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert string using encoding_nanme. We assume that string's
|
||||
* encoding is same as DB encoding.
|
||||
*
|
||||
* TEXT convert(TEXT string, NAME encoding_name)
|
||||
*/
|
||||
Datum
|
||||
pg_convert(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_P(0);
|
||||
Name s = PG_GETARG_NAME(1);
|
||||
int encoding = pg_char_to_encoding(NameStr(*s));
|
||||
int db_encoding = GetDatabaseEncoding();
|
||||
void (*src)(), (*dest)();
|
||||
unsigned char *result;
|
||||
text *retval;
|
||||
|
||||
if (encoding < 0)
|
||||
elog(ERROR, "Invalid encoding name %s", NameStr(*s));
|
||||
|
||||
if (pg_find_encoding_converters(db_encoding, encoding, &src, &dest) < 0)
|
||||
{
|
||||
char *encoding_name = (char *)pg_encoding_to_char(db_encoding);
|
||||
elog(ERROR, "Conversion from %s to %s is not possible", NameStr(*s), encoding_name);
|
||||
}
|
||||
|
||||
result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ,
|
||||
src, dest);
|
||||
if (result == NULL)
|
||||
elog(ERROR, "Encoding conversion failed");
|
||||
|
||||
retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result)));
|
||||
if (result != (unsigned char *)VARDATA(string))
|
||||
pfree(result);
|
||||
|
||||
/* free memory if allocated by the toaster */
|
||||
PG_FREE_IF_COPY(string, 0);
|
||||
|
||||
PG_RETURN_TEXT_P(retval);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert string using encoding_nanme.
|
||||
*
|
||||
* TEXT convert(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
|
||||
*/
|
||||
Datum
|
||||
pg_convert2(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *string = PG_GETARG_TEXT_P(0);
|
||||
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
|
||||
int src_encoding = pg_char_to_encoding(src_encoding_name);
|
||||
char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
|
||||
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
|
||||
void (*src)(), (*dest)();
|
||||
unsigned char *result;
|
||||
text *retval;
|
||||
|
||||
if (src_encoding < 0)
|
||||
elog(ERROR, "Invalid source encoding name %s", src_encoding_name);
|
||||
if (dest_encoding < 0)
|
||||
elog(ERROR, "Invalid destination encoding name %s", dest_encoding_name);
|
||||
|
||||
if (pg_find_encoding_converters(src_encoding, dest_encoding, &src, &dest) < 0)
|
||||
{
|
||||
elog(ERROR, "Conversion from %s to %s is not possible",
|
||||
src_encoding_name, dest_encoding_name);
|
||||
}
|
||||
|
||||
result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ,
|
||||
src, dest);
|
||||
if (result == NULL)
|
||||
elog(ERROR, "Encoding conversion failed");
|
||||
|
||||
retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result)));
|
||||
if (result != (unsigned char *)VARDATA(string))
|
||||
pfree(result);
|
||||
|
||||
/* free memory if allocated by the toaster */
|
||||
PG_FREE_IF_COPY(string, 0);
|
||||
|
||||
PG_RETURN_TEXT_P(retval);
|
||||
}
|
||||
|
||||
/*
|
||||
* convert client encoding to server encoding.
|
||||
*
|
||||
@ -125,27 +277,10 @@ pg_get_client_encoding()
|
||||
unsigned char *
|
||||
pg_client_to_server(unsigned char *s, int len)
|
||||
{
|
||||
unsigned char *result = s;
|
||||
unsigned char *buf;
|
||||
|
||||
if (client_encoding == GetDatabaseEncoding())
|
||||
return result;
|
||||
if (client_to_mic)
|
||||
{
|
||||
buf = (unsigned char *) palloc(len * 4 + 1);
|
||||
(*client_to_mic) (result, buf, len);
|
||||
result = buf;
|
||||
len = strlen(result);
|
||||
}
|
||||
if (server_from_mic)
|
||||
{
|
||||
buf = (unsigned char *) palloc(len * 4 + 1);
|
||||
(*server_from_mic) (result, buf, len);
|
||||
if (result != s)
|
||||
pfree(result); /* release first buffer */
|
||||
result = buf;
|
||||
}
|
||||
return result;
|
||||
return s;
|
||||
|
||||
return pg_do_encoding_conversion(s, len, client_to_mic, server_from_mic);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -164,27 +299,10 @@ pg_client_to_server(unsigned char *s, int len)
|
||||
unsigned char *
|
||||
pg_server_to_client(unsigned char *s, int len)
|
||||
{
|
||||
unsigned char *result = s;
|
||||
unsigned char *buf;
|
||||
|
||||
if (client_encoding == GetDatabaseEncoding())
|
||||
return result;
|
||||
if (server_to_mic)
|
||||
{
|
||||
buf = (unsigned char *) palloc(len * 4 + 1);
|
||||
(*server_to_mic) (result, buf, len);
|
||||
result = buf;
|
||||
len = strlen(result);
|
||||
}
|
||||
if (client_from_mic)
|
||||
{
|
||||
buf = (unsigned char *) palloc(len * 4 + 1);
|
||||
(*client_from_mic) (result, buf, len);
|
||||
if (result != s)
|
||||
pfree(result); /* release first buffer */
|
||||
result = buf;
|
||||
}
|
||||
return result;
|
||||
return s;
|
||||
|
||||
return pg_do_encoding_conversion(s, len, server_to_mic, client_from_mic);
|
||||
}
|
||||
|
||||
/* convert a multi-byte string to a wchar */
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: pg_proc.h,v 1.204 2001/08/14 22:21:58 tgl Exp $
|
||||
* $Id: pg_proc.h,v 1.205 2001/08/15 07:07:40 ishii Exp $
|
||||
*
|
||||
* NOTES
|
||||
* The script catalog/genbki.sh reads this file and generates .bki
|
||||
@ -2137,7 +2137,13 @@ DESCR("return portion of string");
|
||||
DATA(insert OID = 1039 ( getdatabaseencoding PGUID 12 f t f t 0 f 19 "0" 100 0 0 100 getdatabaseencoding - ));
|
||||
DESCR("encoding name of current database");
|
||||
|
||||
DATA(insert OID = 1295 ( pg_char_to_encoding PGUID 12 f t f t 1 f 23 "19" 100 0 0 100 PG_char_to_encoding - ));
|
||||
DATA(insert OID = 1717 ( convert PGUID 12 f t f t 2 f 25 "25 19" 100 0 0 100 pg_convert - ));
|
||||
DESCR("convert string with specified destination encoding name");
|
||||
|
||||
DATA(insert OID = 1813 ( convert PGUID 12 f t f t 3 f 25 "25 19 19" 100 0 0 100 pg_convert2 - ));
|
||||
DESCR("convert string with specified encoding names");
|
||||
|
||||
DATA(insert OID = 1264 ( pg_char_to_encoding PGUID 12 f t f t 1 f 23 "19" 100 0 0 100 PG_char_to_encoding - ));
|
||||
DESCR("convert encoding name to encoding id");
|
||||
|
||||
DATA(insert OID = 1597 ( pg_encoding_to_char PGUID 12 f t f t 1 f 19 "23" 100 0 0 100 PG_encoding_to_char - ));
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $Id: pg_wchar.h,v 1.27 2001/07/15 11:07:37 ishii Exp $ */
|
||||
/* $Id: pg_wchar.h,v 1.28 2001/08/15 07:07:40 ishii Exp $ */
|
||||
|
||||
#ifndef PG_WCHAR_H
|
||||
#define PG_WCHAR_H
|
||||
@ -145,6 +145,8 @@ extern unsigned char *pg_server_to_client(unsigned char *, int);
|
||||
extern int pg_valid_client_encoding(const char *);
|
||||
extern pg_encoding_conv_tbl *pg_get_enc_ent(int);
|
||||
extern int pg_utf_mblen(const unsigned char *);
|
||||
extern int pg_find_encoding_converters(int, int, void (**)(), void (**)());
|
||||
extern unsigned char *pg_do_encoding_conversion(unsigned char *, int, void (*)(), void (*)());
|
||||
|
||||
/* internally-used versions of functions. The PG_xxx forms of these
|
||||
* functions have fmgr-compatible interfaves.
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: builtins.h,v 1.161 2001/08/14 22:21:59 tgl Exp $
|
||||
* $Id: builtins.h,v 1.162 2001/08/15 07:07:40 ishii Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -581,6 +581,8 @@ extern Datum RI_FKey_setdefault_upd(PG_FUNCTION_ARGS);
|
||||
extern Datum getdatabaseencoding(PG_FUNCTION_ARGS);
|
||||
extern Datum PG_encoding_to_char(PG_FUNCTION_ARGS);
|
||||
extern Datum PG_char_to_encoding(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_convert(PG_FUNCTION_ARGS);
|
||||
extern Datum pg_convert2(PG_FUNCTION_ARGS);
|
||||
|
||||
/* format_type.c */
|
||||
extern Datum format_type(PG_FUNCTION_ARGS);
|
||||
|
Loading…
x
Reference in New Issue
Block a user