mirror of
https://github.com/postgres/postgres.git
synced 2025-05-01 01:04:50 +03:00
Add function to import operating system collations
Move this logic out of initdb into a user-callable function. This simplifies the code and makes it possible to update the standard collations later on if additional operating system collations appear. Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Euler Taveira <euler@timbira.com.br>
This commit is contained in:
parent
193a7d791e
commit
aa17c06fb5
@ -496,7 +496,7 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<sect2 id="collation-managing">
|
||||
<title>Managing Collations</title>
|
||||
|
||||
<para>
|
||||
|
@ -19190,6 +19190,46 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
|
||||
in the database's default tablespace, the tablespace can be specified as 0.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<xref linkend="functions-admin-collation"> lists functions used to manage
|
||||
collations.
|
||||
</para>
|
||||
|
||||
<table id="functions-admin-collation">
|
||||
<title>Collation Management Functions</title>
|
||||
<tgroup cols="3">
|
||||
<thead>
|
||||
<row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>
|
||||
<indexterm><primary>pg_import_system_collations</primary></indexterm>
|
||||
<literal><function>pg_import_system_collations(<parameter>if_not_exists</> <type>boolean</>, <parameter>schema</> <type>regnamespace</>)</function></literal>
|
||||
</entry>
|
||||
<entry><type>void</type></entry>
|
||||
<entry>Import operating system collations</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>
|
||||
<function>pg_import_system_collations</> populates the system
|
||||
catalog <literal>pg_collation</literal> with collations based on all the
|
||||
locales it finds on the operating system. This is
|
||||
what <command>initdb</command> uses;
|
||||
see <xref linkend="collation-managing"> for more details. If additional
|
||||
locales are installed into the operating system later on, this function
|
||||
can be run again to add collations for the new locales. In that case, the
|
||||
parameter <parameter>if_not_exists</parameter> should be set to true to
|
||||
skip over existing collations. The <parameter>schema</parameter>
|
||||
parameter would typically be <literal>pg_catalog</literal>, but that is
|
||||
not a requirement. (Collation objects based on locales that are no longer
|
||||
present on the operating system are never removed by this function.)
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="functions-admin-index">
|
||||
|
@ -41,7 +41,8 @@ Oid
|
||||
CollationCreate(const char *collname, Oid collnamespace,
|
||||
Oid collowner,
|
||||
int32 collencoding,
|
||||
const char *collcollate, const char *collctype)
|
||||
const char *collcollate, const char *collctype,
|
||||
bool if_not_exists)
|
||||
{
|
||||
Relation rel;
|
||||
TupleDesc tupDesc;
|
||||
@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
|
||||
PointerGetDatum(collname),
|
||||
Int32GetDatum(collencoding),
|
||||
ObjectIdGetDatum(collnamespace)))
|
||||
ereport(ERROR,
|
||||
{
|
||||
if (if_not_exists)
|
||||
{
|
||||
ereport(NOTICE,
|
||||
(errcode(ERRCODE_DUPLICATE_OBJECT),
|
||||
errmsg("collation \"%s\" for encoding \"%s\" already exists",
|
||||
errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
|
||||
collname, pg_encoding_to_char(collencoding))));
|
||||
return InvalidOid;
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DUPLICATE_OBJECT),
|
||||
errmsg("collation \"%s\" for encoding \"%s\" already exists",
|
||||
collname, pg_encoding_to_char(collencoding))));
|
||||
}
|
||||
|
||||
/*
|
||||
* Also forbid matching an any-encoding entry. This test of course is not
|
||||
@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace,
|
||||
PointerGetDatum(collname),
|
||||
Int32GetDatum(-1),
|
||||
ObjectIdGetDatum(collnamespace)))
|
||||
ereport(ERROR,
|
||||
{
|
||||
if (if_not_exists)
|
||||
{
|
||||
ereport(NOTICE,
|
||||
(errcode(ERRCODE_DUPLICATE_OBJECT),
|
||||
errmsg("collation \"%s\" already exists, skipping",
|
||||
collname)));
|
||||
return InvalidOid;
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DUPLICATE_OBJECT),
|
||||
errmsg("collation \"%s\" already exists",
|
||||
collname)));
|
||||
}
|
||||
|
||||
/* open pg_collation */
|
||||
rel = heap_open(CollationRelationId, RowExclusiveLock);
|
||||
|
@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
|
||||
GetUserId(),
|
||||
GetDatabaseEncoding(),
|
||||
collcollate,
|
||||
collctype);
|
||||
collctype,
|
||||
false);
|
||||
|
||||
if (!OidIsValid(newoid))
|
||||
return InvalidObjectAddress;
|
||||
|
||||
ObjectAddressSet(address, CollationRelationId, newoid);
|
||||
|
||||
@ -177,3 +181,151 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
|
||||
errmsg("collation \"%s\" already exists in schema \"%s\"",
|
||||
collname, get_namespace_name(nspOid))));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* "Normalize" a locale name, stripping off encoding tags such as
|
||||
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
|
||||
* -> "br_FR@euro"). Return true if a new, different name was
|
||||
* generated.
|
||||
*/
|
||||
pg_attribute_unused()
|
||||
static bool
|
||||
normalize_locale_name(char *new, const char *old)
|
||||
{
|
||||
char *n = new;
|
||||
const char *o = old;
|
||||
bool changed = false;
|
||||
|
||||
while (*o)
|
||||
{
|
||||
if (*o == '.')
|
||||
{
|
||||
/* skip over encoding tag such as ".utf8" or ".UTF-8" */
|
||||
o++;
|
||||
while ((*o >= 'A' && *o <= 'Z')
|
||||
|| (*o >= 'a' && *o <= 'z')
|
||||
|| (*o >= '0' && *o <= '9')
|
||||
|| (*o == '-'))
|
||||
o++;
|
||||
changed = true;
|
||||
}
|
||||
else
|
||||
*n++ = *o++;
|
||||
}
|
||||
*n = '\0';
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
pg_import_system_collations(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
|
||||
bool if_not_exists = PG_GETARG_BOOL(0);
|
||||
Oid nspid = PG_GETARG_OID(1);
|
||||
|
||||
FILE *locale_a_handle;
|
||||
char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
|
||||
int count = 0;
|
||||
#endif
|
||||
|
||||
if (!superuser())
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
||||
(errmsg("must be superuser to import system collations"))));
|
||||
|
||||
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
|
||||
locale_a_handle = OpenPipeStream("locale -a", "r");
|
||||
if (locale_a_handle == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not execute command \"%s\": %m",
|
||||
"locale -a")));
|
||||
|
||||
while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
|
||||
{
|
||||
int i;
|
||||
size_t len;
|
||||
int enc;
|
||||
bool skip;
|
||||
char alias[NAMEDATALEN];
|
||||
|
||||
len = strlen(localebuf);
|
||||
|
||||
if (len == 0 || localebuf[len - 1] != '\n')
|
||||
{
|
||||
elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
|
||||
continue;
|
||||
}
|
||||
localebuf[len - 1] = '\0';
|
||||
|
||||
/*
|
||||
* Some systems have locale names that don't consist entirely of ASCII
|
||||
* letters (such as "bokmål" or "français"). This is
|
||||
* pretty silly, since we need the locale itself to interpret the
|
||||
* non-ASCII characters. We can't do much with those, so we filter
|
||||
* them out.
|
||||
*/
|
||||
skip = false;
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
if (IS_HIGHBIT_SET(localebuf[i]))
|
||||
{
|
||||
skip = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (skip)
|
||||
{
|
||||
elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
|
||||
continue;
|
||||
}
|
||||
|
||||
enc = pg_get_encoding_from_locale(localebuf, false);
|
||||
if (enc < 0)
|
||||
{
|
||||
/* error message printed by pg_get_encoding_from_locale() */
|
||||
continue;
|
||||
}
|
||||
if (!PG_VALID_BE_ENCODING(enc))
|
||||
continue; /* ignore locales for client-only encodings */
|
||||
if (enc == PG_SQL_ASCII)
|
||||
continue; /* C/POSIX are already in the catalog */
|
||||
|
||||
count++;
|
||||
|
||||
CollationCreate(localebuf, nspid, GetUserId(), enc,
|
||||
localebuf, localebuf, if_not_exists);
|
||||
|
||||
CommandCounterIncrement();
|
||||
|
||||
/*
|
||||
* Generate aliases such as "en_US" in addition to "en_US.utf8" for
|
||||
* ease of use. Note that collation names are unique per encoding
|
||||
* only, so this doesn't clash with "en_US" for LATIN1, say.
|
||||
*
|
||||
* This always runs in "if not exists" mode, to skip aliases that
|
||||
* conflict with an existing locale name for the same encoding. For
|
||||
* example, "br_FR.iso88591" is normalized to "br_FR", both for
|
||||
* encoding LATIN1. But the unnormalized locale "br_FR" already
|
||||
* exists for LATIN1.
|
||||
*/
|
||||
if (normalize_locale_name(alias, localebuf))
|
||||
{
|
||||
CollationCreate(alias, nspid, GetUserId(), enc,
|
||||
localebuf, localebuf, true);
|
||||
CommandCounterIncrement();
|
||||
}
|
||||
}
|
||||
|
||||
ClosePipeStream(locale_a_handle);
|
||||
|
||||
if (count == 0)
|
||||
ereport(ERROR,
|
||||
(errmsg("no usable system locales were found")));
|
||||
#endif /* not HAVE_LOCALE_T && not WIN32 */
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
@ -1608,178 +1608,16 @@ setup_description(FILE *cmdfd)
|
||||
PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n");
|
||||
}
|
||||
|
||||
#ifdef HAVE_LOCALE_T
|
||||
/*
|
||||
* "Normalize" a locale name, stripping off encoding tags such as
|
||||
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
|
||||
* -> "br_FR@euro"). Return true if a new, different name was
|
||||
* generated.
|
||||
*/
|
||||
static bool
|
||||
normalize_locale_name(char *new, const char *old)
|
||||
{
|
||||
char *n = new;
|
||||
const char *o = old;
|
||||
bool changed = false;
|
||||
|
||||
while (*o)
|
||||
{
|
||||
if (*o == '.')
|
||||
{
|
||||
/* skip over encoding tag such as ".utf8" or ".UTF-8" */
|
||||
o++;
|
||||
while ((*o >= 'A' && *o <= 'Z')
|
||||
|| (*o >= 'a' && *o <= 'z')
|
||||
|| (*o >= '0' && *o <= '9')
|
||||
|| (*o == '-'))
|
||||
o++;
|
||||
changed = true;
|
||||
}
|
||||
else
|
||||
*n++ = *o++;
|
||||
}
|
||||
*n = '\0';
|
||||
|
||||
return changed;
|
||||
}
|
||||
#endif /* HAVE_LOCALE_T */
|
||||
|
||||
/*
|
||||
* populate pg_collation
|
||||
*/
|
||||
static void
|
||||
setup_collation(FILE *cmdfd)
|
||||
{
|
||||
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
|
||||
int i;
|
||||
FILE *locale_a_handle;
|
||||
char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
|
||||
int count = 0;
|
||||
|
||||
locale_a_handle = popen_check("locale -a", "r");
|
||||
if (!locale_a_handle)
|
||||
return; /* complaint already printed */
|
||||
|
||||
PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
|
||||
" collname name, "
|
||||
" locale name, "
|
||||
" encoding int) WITHOUT OIDS;\n\n");
|
||||
|
||||
while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
|
||||
{
|
||||
size_t len;
|
||||
int enc;
|
||||
bool skip;
|
||||
char *quoted_locale;
|
||||
char alias[NAMEDATALEN];
|
||||
|
||||
len = strlen(localebuf);
|
||||
|
||||
if (len == 0 || localebuf[len - 1] != '\n')
|
||||
{
|
||||
if (debug)
|
||||
fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"),
|
||||
progname, localebuf);
|
||||
continue;
|
||||
}
|
||||
localebuf[len - 1] = '\0';
|
||||
|
||||
/*
|
||||
* Some systems have locale names that don't consist entirely of ASCII
|
||||
* letters (such as "bokmål" or "français"). This is
|
||||
* pretty silly, since we need the locale itself to interpret the
|
||||
* non-ASCII characters. We can't do much with those, so we filter
|
||||
* them out.
|
||||
*/
|
||||
skip = false;
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
if (IS_HIGHBIT_SET(localebuf[i]))
|
||||
{
|
||||
skip = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (skip)
|
||||
{
|
||||
if (debug)
|
||||
fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"),
|
||||
progname, localebuf);
|
||||
continue;
|
||||
}
|
||||
|
||||
enc = pg_get_encoding_from_locale(localebuf, debug);
|
||||
if (enc < 0)
|
||||
{
|
||||
/* error message printed by pg_get_encoding_from_locale() */
|
||||
continue;
|
||||
}
|
||||
if (!PG_VALID_BE_ENCODING(enc))
|
||||
continue; /* ignore locales for client-only encodings */
|
||||
if (enc == PG_SQL_ASCII)
|
||||
continue; /* C/POSIX are already in the catalog */
|
||||
|
||||
count++;
|
||||
|
||||
quoted_locale = escape_quotes(localebuf);
|
||||
|
||||
PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
|
||||
quoted_locale, quoted_locale, enc);
|
||||
|
||||
/*
|
||||
* Generate aliases such as "en_US" in addition to "en_US.utf8" for
|
||||
* ease of use. Note that collation names are unique per encoding
|
||||
* only, so this doesn't clash with "en_US" for LATIN1, say.
|
||||
*/
|
||||
if (normalize_locale_name(alias, localebuf))
|
||||
{
|
||||
char *quoted_alias = escape_quotes(alias);
|
||||
|
||||
PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
|
||||
quoted_alias, quoted_locale, enc);
|
||||
free(quoted_alias);
|
||||
}
|
||||
free(quoted_locale);
|
||||
}
|
||||
PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
|
||||
|
||||
/* Add an SQL-standard name */
|
||||
PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8);
|
||||
|
||||
/*
|
||||
* When copying collations to the final location, eliminate aliases that
|
||||
* conflict with an existing locale name for the same encoding. For
|
||||
* example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
|
||||
* LATIN1. But the unnormalized locale "br_FR" already exists for LATIN1.
|
||||
* Prefer the alias that matches the OS locale name, else the first locale
|
||||
* name by sort order (arbitrary choice to be deterministic).
|
||||
*
|
||||
* Also, eliminate any aliases that conflict with pg_collation's
|
||||
* hard-wired entries for "C" etc.
|
||||
*/
|
||||
PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
|
||||
" SELECT DISTINCT ON (collname, encoding)"
|
||||
" collname, "
|
||||
" (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
|
||||
" (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
|
||||
" encoding, locale, locale "
|
||||
" FROM tmp_pg_collation"
|
||||
" WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
|
||||
" ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n");
|
||||
|
||||
/*
|
||||
* Even though the table is temp, drop it explicitly so it doesn't get
|
||||
* copied into template0/postgres databases.
|
||||
*/
|
||||
PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n");
|
||||
|
||||
pclose(locale_a_handle);
|
||||
|
||||
if (count == 0 && !debug)
|
||||
{
|
||||
printf(_("No usable system locales were found.\n"));
|
||||
printf(_("Use the option \"--debug\" to see details.\n"));
|
||||
}
|
||||
#endif /* not HAVE_LOCALE_T && not WIN32 */
|
||||
PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 201701172
|
||||
#define CATALOG_VERSION_NO 201701181
|
||||
|
||||
#endif
|
||||
|
@ -17,7 +17,8 @@
|
||||
extern Oid CollationCreate(const char *collname, Oid collnamespace,
|
||||
Oid collowner,
|
||||
int32 collencoding,
|
||||
const char *collcollate, const char *collctype);
|
||||
const char *collcollate, const char *collctype,
|
||||
bool if_not_exists);
|
||||
extern void RemoveCollationById(Oid collationOid);
|
||||
|
||||
#endif /* PG_COLLATION_FN_H */
|
||||
|
@ -5349,6 +5349,9 @@ DESCR("pg_controldata recovery state information as a function");
|
||||
DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ ));
|
||||
DESCR("pg_controldata init state information as a function");
|
||||
|
||||
DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
|
||||
DESCR("import collations from operating system");
|
||||
|
||||
/*
|
||||
* Symbolic values for provolatile column: these indicate whether the result
|
||||
* of a function is dependent *only* on the values of its explicit arguments,
|
||||
|
Loading…
x
Reference in New Issue
Block a user