mirror of
https://github.com/postgres/postgres.git
synced 2025-10-29 22:49:41 +03:00
Make collation-aware system catalog columns use "C" collation.
Up to now we allowed text columns in system catalogs to use collation "default", but that isn't really safe because it might mean something different in template0 than it means in a database cloned from template0. In particular, this could mean that cloned pg_statistic entries for such columns weren't entirely valid, possibly leading to bogus planner estimates, though (probably) not any outright failures. In the wake of commit5e0928005, a better solution is available: if we label such columns with "C" collation, then their pg_statistic entries will also use that collation and hence will be valid independently of the database collation. This also provides a cleaner solution for indexes on such columns than the hack added by commit0b28ea79c: the indexes will naturally inherit "C" collation and don't have to be forced to use text_pattern_ops. Also, with the planned improvement of type "name" to be collation-aware, this policy will apply cleanly to both text and name columns. Because of the pg_statistic angle, we should also apply this policy to the tables in information_schema. This patch does that by adjusting information_schema's textual domain types to specify "C" collation. That has the user-visible effect that order-sensitive comparisons to textual information_schema view columns will now use "C" collation by default. The SQL standard says that the collation of those view columns is implementation-defined, so I think this is legal per spec. At some point this might allow for translation of such comparisons into indexable conditions on the underlying "name" columns, although additional work will be needed before that can happen. Discussion: https://postgr.es/m/19346.1544895309@sss.pgh.pa.us
This commit is contained in:
@@ -64,9 +64,9 @@ ScanKeyEntryInitialize(ScanKey entry,
|
||||
* It cannot handle NULL arguments, unary operators, or nondefault operators,
|
||||
* but we need none of those features for most hardwired lookups.
|
||||
*
|
||||
* We set collation to DEFAULT_COLLATION_OID always. This is appropriate
|
||||
* for textual columns in system catalogs, and it will be ignored for
|
||||
* non-textual columns, so it's not worth trying to be more finicky.
|
||||
* We set collation to C_COLLATION_OID always. This is the correct value
|
||||
* for all collation-aware columns in system catalogs, and it will be ignored
|
||||
* for other column types, so it's not worth trying to be more finicky.
|
||||
*
|
||||
* Note: CurrentMemoryContext at call should be as long-lived as the ScanKey
|
||||
* itself, because that's what will be used for any subsidiary info attached
|
||||
@@ -83,7 +83,7 @@ ScanKeyInit(ScanKey entry,
|
||||
entry->sk_attno = attributeNumber;
|
||||
entry->sk_strategy = strategy;
|
||||
entry->sk_subtype = InvalidOid;
|
||||
entry->sk_collation = DEFAULT_COLLATION_OID;
|
||||
entry->sk_collation = C_COLLATION_OID;
|
||||
entry->sk_argument = argument;
|
||||
fmgr_info(procedure, &entry->sk_func);
|
||||
}
|
||||
|
||||
@@ -744,6 +744,15 @@ DefineAttr(char *name, char *type, int attnum, int nullness)
|
||||
attrtypes[attnum]->attndims = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a system catalog column is collation-aware, force it to use C
|
||||
* collation, so that its behavior is independent of the database's
|
||||
* collation. This is essential to allow template0 to be cloned with a
|
||||
* different database collation.
|
||||
*/
|
||||
if (OidIsValid(attrtypes[attnum]->attcollation))
|
||||
attrtypes[attnum]->attcollation = C_COLLATION_OID;
|
||||
|
||||
attrtypes[attnum]->attstattarget = -1;
|
||||
attrtypes[attnum]->attcacheoff = -1;
|
||||
attrtypes[attnum]->atttypmod = -1;
|
||||
|
||||
@@ -167,6 +167,9 @@ my $GenbkiNextOid = $FirstGenbkiObjectId;
|
||||
my $BOOTSTRAP_SUPERUSERID =
|
||||
Catalog::FindDefinedSymbolFromData($catalog_data{pg_authid},
|
||||
'BOOTSTRAP_SUPERUSERID');
|
||||
my $C_COLLATION_OID =
|
||||
Catalog::FindDefinedSymbolFromData($catalog_data{pg_collation},
|
||||
'C_COLLATION_OID');
|
||||
my $PG_CATALOG_NAMESPACE =
|
||||
Catalog::FindDefinedSymbolFromData($catalog_data{pg_namespace},
|
||||
'PG_CATALOG_NAMESPACE');
|
||||
@@ -693,7 +696,10 @@ sub morph_row_for_pgattr
|
||||
|
||||
# set attndims if it's an array type
|
||||
$row->{attndims} = $type->{typcategory} eq 'A' ? '1' : '0';
|
||||
$row->{attcollation} = $type->{typcollation};
|
||||
|
||||
# collation-aware catalog columns must use C collation
|
||||
$row->{attcollation} = $type->{typcollation} != 0 ?
|
||||
$C_COLLATION_OID : 0;
|
||||
|
||||
if (defined $attr->{forcenotnull})
|
||||
{
|
||||
|
||||
@@ -208,7 +208,7 @@ CREATE DOMAIN cardinal_number AS integer
|
||||
* CHARACTER_DATA domain
|
||||
*/
|
||||
|
||||
CREATE DOMAIN character_data AS character varying;
|
||||
CREATE DOMAIN character_data AS character varying COLLATE "C";
|
||||
|
||||
|
||||
/*
|
||||
@@ -216,7 +216,7 @@ CREATE DOMAIN character_data AS character varying;
|
||||
* SQL_IDENTIFIER domain
|
||||
*/
|
||||
|
||||
CREATE DOMAIN sql_identifier AS character varying;
|
||||
CREATE DOMAIN sql_identifier AS character varying COLLATE "C";
|
||||
|
||||
|
||||
/*
|
||||
@@ -243,7 +243,7 @@ CREATE DOMAIN time_stamp AS timestamp(2) with time zone
|
||||
* YES_OR_NO domain
|
||||
*/
|
||||
|
||||
CREATE DOMAIN yes_or_no AS character varying(3)
|
||||
CREATE DOMAIN yes_or_no AS character varying(3) COLLATE "C"
|
||||
CONSTRAINT yes_or_no_check CHECK (value IN ('YES', 'NO'));
|
||||
|
||||
|
||||
|
||||
13
src/backend/utils/cache/catcache.c
vendored
13
src/backend/utils/cache/catcache.c
vendored
@@ -22,6 +22,7 @@
|
||||
#include "access/tuptoaster.h"
|
||||
#include "access/valid.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/pg_collation.h"
|
||||
#include "catalog/pg_operator.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "miscadmin.h"
|
||||
@@ -1014,8 +1015,8 @@ CatalogCacheInitializeCache(CatCache *cache)
|
||||
/* Fill in sk_strategy as well --- always standard equality */
|
||||
cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber;
|
||||
cache->cc_skey[i].sk_subtype = InvalidOid;
|
||||
/* Currently, there are no catcaches on collation-aware data types */
|
||||
cache->cc_skey[i].sk_collation = InvalidOid;
|
||||
/* If a catcache key requires a collation, it must be C collation */
|
||||
cache->cc_skey[i].sk_collation = C_COLLATION_OID;
|
||||
|
||||
CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p",
|
||||
cache->cc_relname,
|
||||
@@ -1961,10 +1962,10 @@ CatCacheCopyKeys(TupleDesc tupdesc, int nkeys, int *attnos,
|
||||
NameData srcname;
|
||||
|
||||
/*
|
||||
* Must be careful in case the caller passed a C string where a
|
||||
* NAME is wanted: convert the given argument to a correctly
|
||||
* padded NAME. Otherwise the memcpy() done by datumCopy() could
|
||||
* fall off the end of memory.
|
||||
* Must be careful in case the caller passed a C string where a NAME
|
||||
* is wanted: convert the given argument to a correctly padded NAME.
|
||||
* Otherwise the memcpy() done by datumCopy() could fall off the end
|
||||
* of memory.
|
||||
*/
|
||||
if (att->atttypid == NAMEOID)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user