mirror of
https://github.com/postgres/postgres.git
synced 2025-07-27 12:41:57 +03:00
Change initdb and CREATE DATABASE to actively reject attempts to create
databases with encodings that are incompatible with the server's LC_CTYPE locale, when we can determine that (which we can on most modern platforms, I believe). C/POSIX locale is compatible with all encodings, of course, so there is still some usefulness to CREATE DATABASE's ENCODING option, but this will insulate us against all sorts of recurring complaints caused by mismatched settings. I moved initdb's existing LC_CTYPE-to-encoding mapping knowledge into a new src/port/ file so it could be shared by CREATE DATABASE.
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
# -*-makefile-*-
|
||||
# $PostgreSQL: pgsql/src/Makefile.global.in,v 1.238 2007/08/20 08:53:12 petere Exp $
|
||||
# $PostgreSQL: pgsql/src/Makefile.global.in,v 1.239 2007/09/28 22:25:49 tgl Exp $
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# All PostgreSQL makefiles include this file and use the variables it sets,
|
||||
@ -423,7 +423,7 @@ endif
|
||||
#
|
||||
# substitute implementations of C library routines (see src/port/)
|
||||
|
||||
LIBOBJS = @LIBOBJS@ copydir.o dirmod.o exec.o noblock.o path.o pipe.o pgsleep.o pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o
|
||||
LIBOBJS = @LIBOBJS@
|
||||
|
||||
LIBS := -lpgport $(LIBS)
|
||||
# add location of libpgport.a to LDFLAGS
|
||||
|
@ -13,13 +13,14 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.198 2007/09/03 18:46:29 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.199 2007/09/28 22:25:49 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <locale.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
@ -96,6 +97,7 @@ createdb(const CreatedbStmt *stmt)
|
||||
const char *dbtemplate = NULL;
|
||||
int encoding = -1;
|
||||
int dbconnlimit = -1;
|
||||
int ctype_encoding;
|
||||
|
||||
/* Extract options from the statement node tree */
|
||||
foreach(option, stmt->options)
|
||||
@ -254,6 +256,32 @@ createdb(const CreatedbStmt *stmt)
|
||||
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
||||
errmsg("invalid server encoding %d", encoding)));
|
||||
|
||||
/*
|
||||
* Check whether encoding matches server locale settings. We allow
|
||||
* mismatch in two cases:
|
||||
*
|
||||
* 1. ctype_encoding = SQL_ASCII, which means either that the locale
|
||||
* is C/POSIX which works with any encoding, or that we couldn't determine
|
||||
* the locale's encoding and have to trust the user to get it right.
|
||||
*
|
||||
* 2. selected encoding is SQL_ASCII, but only if you're a superuser.
|
||||
* This is risky but we have historically allowed it --- notably, the
|
||||
* regression tests require it.
|
||||
*
|
||||
* Note: if you change this policy, fix initdb to match.
|
||||
*/
|
||||
ctype_encoding = pg_get_encoding_from_locale(NULL);
|
||||
|
||||
if (!(ctype_encoding == encoding ||
|
||||
ctype_encoding == PG_SQL_ASCII ||
|
||||
(encoding == PG_SQL_ASCII && superuser())))
|
||||
ereport(ERROR,
|
||||
(errmsg("encoding %s does not match server's locale %s",
|
||||
pg_encoding_to_char(encoding),
|
||||
setlocale(LC_CTYPE, NULL)),
|
||||
errdetail("The server's LC_CTYPE setting requires encoding %s.",
|
||||
pg_encoding_to_char(ctype_encoding))));
|
||||
|
||||
/* Resolve default tablespace for new database */
|
||||
if (dtablespacename && dtablespacename->arg)
|
||||
{
|
||||
|
@ -42,7 +42,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
* Portions taken from FreeBSD.
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.142 2007/09/28 15:25:44 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.143 2007/09/28 22:25:49 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -54,9 +54,6 @@
|
||||
#include <unistd.h>
|
||||
#include <locale.h>
|
||||
#include <signal.h>
|
||||
#ifdef HAVE_LANGINFO_H
|
||||
#include <langinfo.h>
|
||||
#endif
|
||||
#include <time.h>
|
||||
|
||||
#include "libpq/pqsignal.h"
|
||||
@ -720,197 +717,6 @@ get_encoding_id(char *encoding_name)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
/*
|
||||
* Checks whether the encoding selected for PostgreSQL and the
|
||||
* encoding used by the system locale match.
|
||||
*/
|
||||
|
||||
struct encoding_match
|
||||
{
|
||||
enum pg_enc pg_enc_code;
|
||||
const char *system_enc_name;
|
||||
};
|
||||
|
||||
static const struct encoding_match encoding_match_list[] = {
|
||||
{PG_EUC_JP, "EUC-JP"},
|
||||
{PG_EUC_JP, "eucJP"},
|
||||
{PG_EUC_JP, "IBM-eucJP"},
|
||||
{PG_EUC_JP, "sdeckanji"},
|
||||
|
||||
{PG_EUC_CN, "EUC-CN"},
|
||||
{PG_EUC_CN, "eucCN"},
|
||||
{PG_EUC_CN, "IBM-eucCN"},
|
||||
{PG_EUC_CN, "GB2312"},
|
||||
{PG_EUC_CN, "dechanzi"},
|
||||
|
||||
{PG_EUC_KR, "EUC-KR"},
|
||||
{PG_EUC_KR, "eucKR"},
|
||||
{PG_EUC_KR, "IBM-eucKR"},
|
||||
{PG_EUC_KR, "deckorean"},
|
||||
{PG_EUC_KR, "5601"},
|
||||
|
||||
{PG_EUC_TW, "EUC-TW"},
|
||||
{PG_EUC_TW, "eucTW"},
|
||||
{PG_EUC_TW, "IBM-eucTW"},
|
||||
{PG_EUC_TW, "cns11643"},
|
||||
|
||||
#ifdef NOT_VERIFIED
|
||||
{PG_JOHAB, "???"},
|
||||
#endif
|
||||
|
||||
{PG_UTF8, "UTF-8"},
|
||||
{PG_UTF8, "utf8"},
|
||||
|
||||
{PG_LATIN1, "ISO-8859-1"},
|
||||
{PG_LATIN1, "ISO8859-1"},
|
||||
{PG_LATIN1, "iso88591"},
|
||||
|
||||
{PG_LATIN2, "ISO-8859-2"},
|
||||
{PG_LATIN2, "ISO8859-2"},
|
||||
{PG_LATIN2, "iso88592"},
|
||||
|
||||
{PG_LATIN3, "ISO-8859-3"},
|
||||
{PG_LATIN3, "ISO8859-3"},
|
||||
{PG_LATIN3, "iso88593"},
|
||||
|
||||
{PG_LATIN4, "ISO-8859-4"},
|
||||
{PG_LATIN4, "ISO8859-4"},
|
||||
{PG_LATIN4, "iso88594"},
|
||||
|
||||
{PG_LATIN5, "ISO-8859-9"},
|
||||
{PG_LATIN5, "ISO8859-9"},
|
||||
{PG_LATIN5, "iso88599"},
|
||||
|
||||
{PG_LATIN6, "ISO-8859-10"},
|
||||
{PG_LATIN6, "ISO8859-10"},
|
||||
{PG_LATIN6, "iso885910"},
|
||||
|
||||
{PG_LATIN7, "ISO-8859-13"},
|
||||
{PG_LATIN7, "ISO8859-13"},
|
||||
{PG_LATIN7, "iso885913"},
|
||||
|
||||
{PG_LATIN8, "ISO-8859-14"},
|
||||
{PG_LATIN8, "ISO8859-14"},
|
||||
{PG_LATIN8, "iso885914"},
|
||||
|
||||
{PG_LATIN9, "ISO-8859-15"},
|
||||
{PG_LATIN9, "ISO8859-15"},
|
||||
{PG_LATIN9, "iso885915"},
|
||||
|
||||
{PG_LATIN10, "ISO-8859-16"},
|
||||
{PG_LATIN10, "ISO8859-16"},
|
||||
{PG_LATIN10, "iso885916"},
|
||||
|
||||
{PG_WIN1252, "CP1252"},
|
||||
{PG_WIN1253, "CP1253"},
|
||||
{PG_WIN1254, "CP1254"},
|
||||
{PG_WIN1255, "CP1255"},
|
||||
{PG_WIN1256, "CP1256"},
|
||||
{PG_WIN1257, "CP1257"},
|
||||
{PG_WIN1258, "CP1258"},
|
||||
#ifdef NOT_VERIFIED
|
||||
{PG_WIN874, "???"},
|
||||
#endif
|
||||
{PG_KOI8R, "KOI8-R"},
|
||||
{PG_WIN1251, "CP1251"},
|
||||
{PG_WIN866, "CP866"},
|
||||
|
||||
{PG_ISO_8859_5, "ISO-8859-5"},
|
||||
{PG_ISO_8859_5, "ISO8859-5"},
|
||||
{PG_ISO_8859_5, "iso88595"},
|
||||
|
||||
{PG_ISO_8859_6, "ISO-8859-6"},
|
||||
{PG_ISO_8859_6, "ISO8859-6"},
|
||||
{PG_ISO_8859_6, "iso88596"},
|
||||
|
||||
{PG_ISO_8859_7, "ISO-8859-7"},
|
||||
{PG_ISO_8859_7, "ISO8859-7"},
|
||||
{PG_ISO_8859_7, "iso88597"},
|
||||
|
||||
{PG_ISO_8859_8, "ISO-8859-8"},
|
||||
{PG_ISO_8859_8, "ISO8859-8"},
|
||||
{PG_ISO_8859_8, "iso88598"},
|
||||
|
||||
{PG_SQL_ASCII, NULL} /* end marker */
|
||||
};
|
||||
|
||||
static char *
|
||||
get_encoding_from_locale(const char *ctype)
|
||||
{
|
||||
char *save;
|
||||
char *sys;
|
||||
|
||||
save = setlocale(LC_CTYPE, NULL);
|
||||
if (!save)
|
||||
return NULL;
|
||||
save = xstrdup(save);
|
||||
|
||||
setlocale(LC_CTYPE, ctype);
|
||||
sys = nl_langinfo(CODESET);
|
||||
sys = xstrdup(sys);
|
||||
|
||||
setlocale(LC_CTYPE, save);
|
||||
free(save);
|
||||
|
||||
return sys;
|
||||
}
|
||||
|
||||
static void
|
||||
check_encodings_match(int pg_enc, const char *ctype)
|
||||
{
|
||||
char *sys;
|
||||
int i;
|
||||
|
||||
sys = get_encoding_from_locale(ctype);
|
||||
|
||||
for (i = 0; encoding_match_list[i].system_enc_name; i++)
|
||||
{
|
||||
if (pg_enc == encoding_match_list[i].pg_enc_code
|
||||
&& pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
|
||||
{
|
||||
free(sys);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
_("%s: warning: encoding mismatch\n"), progname);
|
||||
fprintf(stderr,
|
||||
_("The encoding you selected (%s) and the encoding that the selected\n"
|
||||
"locale uses (%s) are not known to match. This might lead to\n"
|
||||
"misbehavior in various character string processing functions. To fix\n"
|
||||
"this situation, rerun %s and either do not specify an encoding\n"
|
||||
"explicitly, or choose a matching combination.\n"),
|
||||
pg_encoding_to_char(pg_enc), sys, progname);
|
||||
|
||||
free(sys);
|
||||
return;
|
||||
}
|
||||
|
||||
static int
|
||||
find_matching_encoding(const char *ctype)
|
||||
{
|
||||
char *sys;
|
||||
int i;
|
||||
|
||||
sys = get_encoding_from_locale(ctype);
|
||||
|
||||
for (i = 0; encoding_match_list[i].system_enc_name; i++)
|
||||
{
|
||||
if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
|
||||
{
|
||||
free(sys);
|
||||
return encoding_match_list[i].pg_enc_code;
|
||||
}
|
||||
}
|
||||
|
||||
free(sys);
|
||||
return -1;
|
||||
}
|
||||
#endif /* HAVE_LANGINFO_H && CODESET */
|
||||
|
||||
|
||||
/*
|
||||
* Support for determining the best default text search configuration.
|
||||
* We key this off LC_CTYPE, after stripping its encoding indicator if any.
|
||||
@ -2909,10 +2715,6 @@ main(int argc, char *argv[])
|
||||
if (strlen(username) == 0)
|
||||
username = effective_user;
|
||||
|
||||
|
||||
if (strlen(encoding))
|
||||
encodingid = get_encoding_id(encoding);
|
||||
|
||||
set_input(&bki_file, "postgres.bki");
|
||||
set_input(&desc_file, "postgres.description");
|
||||
set_input(&shdesc_file, "postgres.shdescription");
|
||||
@ -2988,32 +2790,58 @@ main(int argc, char *argv[])
|
||||
lc_time);
|
||||
}
|
||||
|
||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
if (strcmp(lc_ctype, "C") != 0 && strcmp(lc_ctype, "POSIX") != 0)
|
||||
if (strlen(encoding) == 0)
|
||||
{
|
||||
if (strlen(encoding) == 0)
|
||||
{
|
||||
int tmp;
|
||||
int ctype_enc;
|
||||
|
||||
tmp = find_matching_encoding(lc_ctype);
|
||||
if (tmp == -1)
|
||||
{
|
||||
fprintf(stderr, _("%s: could not find suitable encoding for locale \"%s\"\n"), progname, lc_ctype);
|
||||
fprintf(stderr, _("Rerun %s with the -E option.\n"), progname);
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
||||
exit(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
encodingid = encodingid_to_string(tmp);
|
||||
printf(_("The default database encoding has accordingly been set to %s.\n"),
|
||||
pg_encoding_to_char(tmp));
|
||||
}
|
||||
ctype_enc = pg_get_encoding_from_locale(lc_ctype);
|
||||
|
||||
if (ctype_enc == PG_SQL_ASCII &&
|
||||
!(pg_strcasecmp(lc_ctype, "C") == 0 ||
|
||||
pg_strcasecmp(lc_ctype, "POSIX") == 0))
|
||||
{
|
||||
fprintf(stderr, _("%s: could not find suitable encoding for locale \"%s\"\n"),
|
||||
progname, lc_ctype);
|
||||
fprintf(stderr, _("Rerun %s with the -E option.\n"), progname);
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
|
||||
progname);
|
||||
exit(1);
|
||||
}
|
||||
else
|
||||
check_encodings_match(atoi(encodingid), lc_ctype);
|
||||
{
|
||||
encodingid = encodingid_to_string(ctype_enc);
|
||||
printf(_("The default database encoding has accordingly been set to %s.\n"),
|
||||
pg_encoding_to_char(ctype_enc));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int user_enc;
|
||||
int ctype_enc;
|
||||
|
||||
encodingid = get_encoding_id(encoding);
|
||||
user_enc = atoi(encodingid);
|
||||
|
||||
ctype_enc = pg_get_encoding_from_locale(lc_ctype);
|
||||
|
||||
/* We allow selection of SQL_ASCII --- see notes in createdb() */
|
||||
if (!(ctype_enc == user_enc ||
|
||||
ctype_enc == PG_SQL_ASCII ||
|
||||
user_enc == PG_SQL_ASCII))
|
||||
{
|
||||
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
|
||||
fprintf(stderr,
|
||||
_("The encoding you selected (%s) and the encoding that the\n"
|
||||
"selected locale uses (%s) do not match. This would lead to\n"
|
||||
"misbehavior in various character string processing functions.\n"
|
||||
"Rerun %s and either do not specify an encoding explicitly,\n"
|
||||
"or choose a matching combination.\n"),
|
||||
pg_encoding_to_char(user_enc),
|
||||
pg_encoding_to_char(ctype_enc),
|
||||
progname);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_LANGINFO_H && CODESET */
|
||||
|
||||
if (strlen(default_text_search_config) == 0)
|
||||
{
|
||||
|
@ -6,7 +6,7 @@
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/port.h,v 1.112 2007/07/12 23:28:49 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/port.h,v 1.113 2007/09/28 22:25:49 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -391,4 +391,7 @@ typedef int (*qsort_arg_comparator) (const void *a, const void *b, void *arg);
|
||||
extern void qsort_arg(void *base, size_t nel, size_t elsize,
|
||||
qsort_arg_comparator cmp, void *arg);
|
||||
|
||||
/* port/chklocale.c */
|
||||
extern int pg_get_encoding_from_locale(const char *ctype);
|
||||
|
||||
#endif /* PG_PORT_H */
|
||||
|
@ -14,8 +14,12 @@
|
||||
# libpgport_srv.a - contains object files without FRONTEND defined,
|
||||
# for use only by the backend binaries
|
||||
#
|
||||
# LIBOBJS is set by configure (via Makefile.global) to be the list of
|
||||
# object files that are conditionally needed depending on platform.
|
||||
# OBJS adds additional object files that are always compiled.
|
||||
#
|
||||
# IDENTIFICATION
|
||||
# $PostgreSQL: pgsql/src/port/Makefile,v 1.34 2007/02/09 15:56:00 petere Exp $
|
||||
# $PostgreSQL: pgsql/src/port/Makefile,v 1.35 2007/09/28 22:25:49 tgl Exp $
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
@ -26,8 +30,10 @@ include $(top_builddir)/src/Makefile.global
|
||||
override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS)
|
||||
LIBS += $(PTHREAD_LIBS)
|
||||
|
||||
# Replace all object files so they use FRONTEND define
|
||||
LIBOBJS_SRV = $(LIBOBJS:%.o=%_srv.o)
|
||||
OBJS = $(LIBOBJS) chklocale.o copydir.o dirmod.o exec.o noblock.o path.o pipe.o pgsleep.o pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o
|
||||
|
||||
# foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND
|
||||
OBJS_SRV = $(OBJS:%.o=%_srv.o)
|
||||
|
||||
all: libpgport.a libpgport_srv.a
|
||||
|
||||
@ -41,32 +47,29 @@ installdirs:
|
||||
uninstall:
|
||||
rm -f '$(DESTDIR)$(libdir)/libpgport.a'
|
||||
|
||||
libpgport.a: $(LIBOBJS)
|
||||
libpgport.a: $(OBJS)
|
||||
$(AR) $(AROPT) $@ $^
|
||||
|
||||
# thread.o needs PTHREAD_CFLAGS (but thread_srv.o does not)
|
||||
thread.o: thread.c
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) $(PTHREAD_CFLAGS) -c $<
|
||||
|
||||
path.o: path.c pg_config_paths.h
|
||||
|
||||
path_srv.o: path.c pg_config_paths.h
|
||||
|
||||
#
|
||||
# Server versions of object files
|
||||
#
|
||||
|
||||
libpgport_srv.a: $(LIBOBJS_SRV)
|
||||
libpgport_srv.a: $(OBJS_SRV)
|
||||
$(AR) $(AROPT) $@ $^
|
||||
|
||||
%_srv.o: %.c
|
||||
$(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@
|
||||
|
||||
# No thread flags for server version
|
||||
thread_srv.o: thread.c
|
||||
$(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@
|
||||
|
||||
# Dependency is to ensure that path changes propagate
|
||||
#
|
||||
|
||||
path.o: path.c pg_config_paths.h
|
||||
|
||||
path_srv.o: path.c pg_config_paths.h
|
||||
|
||||
# We create a separate file rather than put these in pg_config.h
|
||||
# because many of these values come from makefiles and are not
|
||||
# available to configure.
|
||||
@ -84,4 +87,4 @@ pg_config_paths.h: $(top_builddir)/src/Makefile.global
|
||||
echo "#define MANDIR \"$(mandir)\"" >>$@
|
||||
|
||||
clean distclean maintainer-clean:
|
||||
rm -f libpgport.a libpgport_srv.a $(LIBOBJS) $(LIBOBJS_SRV) pg_config_paths.h
|
||||
rm -f libpgport.a libpgport_srv.a $(OBJS) $(OBJS_SRV) pg_config_paths.h
|
||||
|
246
src/port/chklocale.c
Normal file
246
src/port/chklocale.c
Normal file
@ -0,0 +1,246 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* chklocale.c
|
||||
* Functions for handling locale-related info
|
||||
*
|
||||
*
|
||||
* Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/port/chklocale.c,v 1.1 2007/09/28 22:25:49 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef FRONTEND
|
||||
#include "postgres.h"
|
||||
#else
|
||||
#include "postgres_fe.h"
|
||||
#endif
|
||||
|
||||
#include <locale.h>
|
||||
#ifdef HAVE_LANGINFO_H
|
||||
#include <langinfo.h>
|
||||
#endif
|
||||
|
||||
#include "mb/pg_wchar.h"
|
||||
|
||||
|
||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
|
||||
/*
|
||||
* This table needs to recognize all the CODESET spellings for supported
|
||||
* backend encodings. We don't need to handle frontend-only encodings.
|
||||
* Note that we search the table with pg_strcasecmp(), so variant
|
||||
* capitalizations don't need their own entries.
|
||||
*/
|
||||
struct encoding_match
|
||||
{
|
||||
enum pg_enc pg_enc_code;
|
||||
const char *system_enc_name;
|
||||
};
|
||||
|
||||
static const struct encoding_match encoding_match_list[] = {
|
||||
{PG_EUC_JP, "EUC-JP"},
|
||||
{PG_EUC_JP, "eucJP"},
|
||||
{PG_EUC_JP, "IBM-eucJP"},
|
||||
{PG_EUC_JP, "sdeckanji"},
|
||||
|
||||
{PG_EUC_CN, "EUC-CN"},
|
||||
{PG_EUC_CN, "eucCN"},
|
||||
{PG_EUC_CN, "IBM-eucCN"},
|
||||
{PG_EUC_CN, "GB2312"},
|
||||
{PG_EUC_CN, "dechanzi"},
|
||||
|
||||
{PG_EUC_KR, "EUC-KR"},
|
||||
{PG_EUC_KR, "eucKR"},
|
||||
{PG_EUC_KR, "IBM-eucKR"},
|
||||
{PG_EUC_KR, "deckorean"},
|
||||
{PG_EUC_KR, "5601"},
|
||||
|
||||
{PG_EUC_TW, "EUC-TW"},
|
||||
{PG_EUC_TW, "eucTW"},
|
||||
{PG_EUC_TW, "IBM-eucTW"},
|
||||
{PG_EUC_TW, "cns11643"},
|
||||
|
||||
{PG_UTF8, "UTF-8"},
|
||||
{PG_UTF8, "utf8"},
|
||||
|
||||
{PG_LATIN1, "ISO-8859-1"},
|
||||
{PG_LATIN1, "ISO8859-1"},
|
||||
{PG_LATIN1, "iso88591"},
|
||||
|
||||
{PG_LATIN2, "ISO-8859-2"},
|
||||
{PG_LATIN2, "ISO8859-2"},
|
||||
{PG_LATIN2, "iso88592"},
|
||||
|
||||
{PG_LATIN3, "ISO-8859-3"},
|
||||
{PG_LATIN3, "ISO8859-3"},
|
||||
{PG_LATIN3, "iso88593"},
|
||||
|
||||
{PG_LATIN4, "ISO-8859-4"},
|
||||
{PG_LATIN4, "ISO8859-4"},
|
||||
{PG_LATIN4, "iso88594"},
|
||||
|
||||
{PG_LATIN5, "ISO-8859-9"},
|
||||
{PG_LATIN5, "ISO8859-9"},
|
||||
{PG_LATIN5, "iso88599"},
|
||||
|
||||
{PG_LATIN6, "ISO-8859-10"},
|
||||
{PG_LATIN6, "ISO8859-10"},
|
||||
{PG_LATIN6, "iso885910"},
|
||||
|
||||
{PG_LATIN7, "ISO-8859-13"},
|
||||
{PG_LATIN7, "ISO8859-13"},
|
||||
{PG_LATIN7, "iso885913"},
|
||||
|
||||
{PG_LATIN8, "ISO-8859-14"},
|
||||
{PG_LATIN8, "ISO8859-14"},
|
||||
{PG_LATIN8, "iso885914"},
|
||||
|
||||
{PG_LATIN9, "ISO-8859-15"},
|
||||
{PG_LATIN9, "ISO8859-15"},
|
||||
{PG_LATIN9, "iso885915"},
|
||||
|
||||
{PG_LATIN10, "ISO-8859-16"},
|
||||
{PG_LATIN10, "ISO8859-16"},
|
||||
{PG_LATIN10, "iso885916"},
|
||||
|
||||
{PG_KOI8R, "KOI8-R"},
|
||||
|
||||
{PG_WIN1252, "CP1252"},
|
||||
{PG_WIN1253, "CP1253"},
|
||||
{PG_WIN1254, "CP1254"},
|
||||
{PG_WIN1255, "CP1255"},
|
||||
{PG_WIN1256, "CP1256"},
|
||||
{PG_WIN1257, "CP1257"},
|
||||
{PG_WIN1258, "CP1258"},
|
||||
#ifdef NOT_VERIFIED
|
||||
{PG_WIN874, "???"},
|
||||
#endif
|
||||
{PG_WIN1251, "CP1251"},
|
||||
{PG_WIN866, "CP866"},
|
||||
|
||||
{PG_ISO_8859_5, "ISO-8859-5"},
|
||||
{PG_ISO_8859_5, "ISO8859-5"},
|
||||
{PG_ISO_8859_5, "iso88595"},
|
||||
|
||||
{PG_ISO_8859_6, "ISO-8859-6"},
|
||||
{PG_ISO_8859_6, "ISO8859-6"},
|
||||
{PG_ISO_8859_6, "iso88596"},
|
||||
|
||||
{PG_ISO_8859_7, "ISO-8859-7"},
|
||||
{PG_ISO_8859_7, "ISO8859-7"},
|
||||
{PG_ISO_8859_7, "iso88597"},
|
||||
|
||||
{PG_ISO_8859_8, "ISO-8859-8"},
|
||||
{PG_ISO_8859_8, "ISO8859-8"},
|
||||
{PG_ISO_8859_8, "iso88598"},
|
||||
|
||||
{PG_SQL_ASCII, NULL} /* end marker */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Given a setting for LC_CTYPE, return the Postgres ID of the associated
|
||||
* encoding, if we can determine it.
|
||||
*
|
||||
* Pass in NULL to get the encoding for the current locale setting.
|
||||
*
|
||||
* If the result is PG_SQL_ASCII, callers should treat it as being compatible
|
||||
* with any desired encoding. We return this if the locale is C/POSIX or we
|
||||
* can't determine the encoding.
|
||||
*/
|
||||
int
|
||||
pg_get_encoding_from_locale(const char *ctype)
|
||||
{
|
||||
char *sys;
|
||||
int i;
|
||||
|
||||
if (ctype)
|
||||
{
|
||||
char *save;
|
||||
|
||||
save = setlocale(LC_CTYPE, NULL);
|
||||
if (!save)
|
||||
return PG_SQL_ASCII; /* setlocale() broken? */
|
||||
/* must copy result, or it might change after setlocale */
|
||||
save = strdup(save);
|
||||
if (!save)
|
||||
return PG_SQL_ASCII; /* out of memory; unlikely */
|
||||
|
||||
if (!setlocale(LC_CTYPE, ctype))
|
||||
{
|
||||
free(save);
|
||||
return PG_SQL_ASCII; /* bogus ctype passed in? */
|
||||
}
|
||||
|
||||
sys = nl_langinfo(CODESET);
|
||||
if (sys)
|
||||
sys = strdup(sys);
|
||||
|
||||
setlocale(LC_CTYPE, save);
|
||||
free(save);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* much easier... */
|
||||
ctype = setlocale(LC_CTYPE, NULL);
|
||||
if (!ctype)
|
||||
return PG_SQL_ASCII; /* setlocale() broken? */
|
||||
sys = nl_langinfo(CODESET);
|
||||
if (sys)
|
||||
sys = strdup(sys);
|
||||
}
|
||||
|
||||
if (!sys)
|
||||
return PG_SQL_ASCII; /* out of memory; unlikely */
|
||||
|
||||
if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
|
||||
{
|
||||
free(sys);
|
||||
return PG_SQL_ASCII;
|
||||
}
|
||||
|
||||
for (i = 0; encoding_match_list[i].system_enc_name; i++)
|
||||
{
|
||||
if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
|
||||
{
|
||||
free(sys);
|
||||
return encoding_match_list[i].pg_enc_code;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We print a warning if we got a CODESET string but couldn't recognize
|
||||
* it. This means we need another entry in the table.
|
||||
*/
|
||||
#ifdef FRONTEND
|
||||
fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
|
||||
ctype, sys);
|
||||
/* keep newline separate so there's only one translatable string */
|
||||
fputc('\n', stderr);
|
||||
#else
|
||||
ereport(WARNING,
|
||||
(errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
|
||||
ctype, sys),
|
||||
errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
|
||||
#endif
|
||||
|
||||
free(sys);
|
||||
return PG_SQL_ASCII;
|
||||
}
|
||||
|
||||
#else /* !(HAVE_LANGINFO_H && CODESET) */
|
||||
|
||||
/*
|
||||
* stub if no platform support
|
||||
*/
|
||||
int
|
||||
pg_get_encoding_from_locale(const char *ctype)
|
||||
{
|
||||
return PG_SQL_ASCII;
|
||||
}
|
||||
|
||||
#endif /* HAVE_LANGINFO_H && CODESET */
|
Reference in New Issue
Block a user