From 2810396312664bdb941e549df7dfa75218d73a1c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 21 Sep 2019 16:29:17 -0400 Subject: [PATCH] Fix up handling of nondeterministic collations with pattern_ops opclasses. text_pattern_ops and its siblings can't be used with nondeterministic collations, because they use the text_eq operator which will not behave as bitwise equality if applied with a nondeterministic collation. The initial implementation of that restriction was to insert a run-time test in the related comparison functions, but that is inefficient, may throw misleading errors, and will throw errors in some cases that would work. It seems sufficient to just prevent the combination during CREATE INDEX, so do that instead. Lacking any better way to identify the opclasses involved, we need to hard-wire tests for them, which requires hand-assigned values for their OIDs, which forces a catversion bump because they previously had OIDs that would be assigned automatically. That's slightly annoying in the v12 branch, but fortunately we're not at rc1 yet, so just do it. Back-patch to v12 where nondeterministic collations were added. In passing, run make reformat-dat-files, which found some unrelated whitespace issues (slightly different ones in HEAD and v12). Peter Eisentraut, with small corrections by me Discussion: https://postgr.es/m/22566.1568675619@sss.pgh.pa.us --- src/backend/catalog/index.c | 45 +++++++++++++++++++++++++++++ src/backend/utils/adt/varchar.c | 32 ++++---------------- src/backend/utils/adt/varlena.c | 43 ++++----------------------- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_opclass.dat | 9 ++++-- src/include/catalog/pg_operator.dat | 4 +-- 6 files changed, 66 insertions(+), 69 deletions(-) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 54288a498c1..098732cc4a8 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -762,6 +762,51 @@ index_create(Relation heapRelation, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("user-defined indexes on system catalog tables are not supported"))); + /* + * Btree text_pattern_ops uses text_eq as the equality operator, which is + * fine as long as the collation is deterministic; text_eq then reduces to + * bitwise equality and so it is semantically compatible with the other + * operators and functions in that opclass. But with a nondeterministic + * collation, text_eq could yield results that are incompatible with the + * actual behavior of the index (which is determined by the opclass's + * comparison function). We prevent such problems by refusing creation of + * an index with that opclass and a nondeterministic collation. + * + * The same applies to varchar_pattern_ops and bpchar_pattern_ops. If we + * find more cases, we might decide to create a real mechanism for marking + * opclasses as incompatible with nondeterminism; but for now, this small + * hack suffices. + * + * Another solution is to use a special operator, not text_eq, as the + * equality opclass member; but that is undesirable because it would + * prevent index usage in many queries that work fine today. + */ + for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++) + { + Oid collation = collationObjectId[i]; + Oid opclass = classObjectId[i]; + + if (collation) + { + if ((opclass == TEXT_BTREE_PATTERN_OPS_OID || + opclass == VARCHAR_BTREE_PATTERN_OPS_OID || + opclass == BPCHAR_BTREE_PATTERN_OPS_OID) && + !get_collation_isdeterministic(collation)) + { + HeapTuple classtup; + + classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass)); + if (!HeapTupleIsValid(classtup)) + elog(ERROR, "cache lookup failed for operator class %u", opclass); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + NameStr(((Form_pg_opclass) GETSTRUCT(classtup))->opcname)))); + ReleaseSysCache(classtup); + } + } + } + /* * Concurrent index build on a system catalog is unsafe because we tend to * release locks before committing in catalogs. diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 9d943993230..e63a4e553b2 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -1105,23 +1105,12 @@ hashbpcharextended(PG_FUNCTION_ARGS) */ static int -internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2, Oid collid) +internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2) { int result; int len1, len2; - check_collation_set(collid); - - /* - * see internal_text_pattern_compare() - */ - if (!get_collation_isdeterministic(collid)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("nondeterministic collations are not supported for operator class \"%s\"", - "bpchar_pattern_ops"))); - len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); @@ -1144,7 +1133,7 @@ bpchar_pattern_lt(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_bpchar_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1160,7 +1149,7 @@ bpchar_pattern_le(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_bpchar_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1176,7 +1165,7 @@ bpchar_pattern_ge(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_bpchar_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1192,7 +1181,7 @@ bpchar_pattern_gt(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_bpchar_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1208,7 +1197,7 @@ btbpchar_pattern_cmp(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_bpchar_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1221,17 +1210,8 @@ Datum btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); - Oid collid = ssup->ssup_collation; MemoryContext oldcontext; - check_collation_set(collid); - - if (!get_collation_isdeterministic(collid)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("nondeterministic collations are not supported for operator class \"%s\"", - "bpchar_pattern_ops"))); - oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); /* Use generic string SortSupport, forcing "C" collation */ diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index d36156f4e4d..2480074813d 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -2996,34 +2996,12 @@ textgename(PG_FUNCTION_ARGS) */ static int -internal_text_pattern_compare(text *arg1, text *arg2, Oid collid) +internal_text_pattern_compare(text *arg1, text *arg2) { int result; int len1, len2; - check_collation_set(collid); - - /* - * XXX We cannot use a text_pattern_ops index for nondeterministic - * collations, because these operators intentionally ignore the collation. - * However, the planner has no way to know that, so it might choose such - * an index for an "=" clause, which would lead to wrong results. This - * check here doesn't prevent choosing the index, but it will at least - * error out if the index is chosen. A text_pattern_ops index on a column - * with nondeterministic collation is pretty useless anyway, since LIKE - * etc. won't work there either. A future possibility would be to - * annotate the operator class or its members in the catalog to avoid the - * index. Another alternative is to stay away from the *_pattern_ops - * operator classes and prefer creating LIKE-supporting indexes with - * COLLATE "C". - */ - if (!get_collation_isdeterministic(collid)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("nondeterministic collations are not supported for operator class \"%s\"", - "text_pattern_ops"))); - len1 = VARSIZE_ANY_EXHDR(arg1); len2 = VARSIZE_ANY_EXHDR(arg2); @@ -3046,7 +3024,7 @@ text_pattern_lt(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_text_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3062,7 +3040,7 @@ text_pattern_le(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_text_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3078,7 +3056,7 @@ text_pattern_ge(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_text_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3094,7 +3072,7 @@ text_pattern_gt(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_text_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3110,7 +3088,7 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); + result = internal_text_pattern_compare(arg1, arg2); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3123,17 +3101,8 @@ Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); - Oid collid = ssup->ssup_collation; MemoryContext oldcontext; - check_collation_set(collid); - - if (!get_collation_isdeterministic(collid)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("nondeterministic collations are not supported for operator class \"%s\"", - "text_pattern_ops"))); - oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); /* Use generic string SortSupport, forcing "C" collation */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 00cc71dcd12..318d99ce63b 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201909071 +#define CATALOG_VERSION_NO 201909213 #endif diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat index fdfea85efed..2d575102efa 100644 --- a/src/include/catalog/pg_opclass.dat +++ b/src/include/catalog/pg_opclass.dat @@ -146,13 +146,16 @@ opcfamily => 'btree/datetime_ops', opcintype => 'timestamp' }, { opcmethod => 'hash', opcname => 'timestamp_ops', opcfamily => 'hash/timestamp_ops', opcintype => 'timestamp' }, -{ opcmethod => 'btree', opcname => 'text_pattern_ops', +{ oid => '4217', oid_symbol => 'TEXT_BTREE_PATTERN_OPS_OID', + opcmethod => 'btree', opcname => 'text_pattern_ops', opcfamily => 'btree/text_pattern_ops', opcintype => 'text', opcdefault => 'f' }, -{ opcmethod => 'btree', opcname => 'varchar_pattern_ops', +{ oid => '4218', oid_symbol => 'VARCHAR_BTREE_PATTERN_OPS_OID', + opcmethod => 'btree', opcname => 'varchar_pattern_ops', opcfamily => 'btree/text_pattern_ops', opcintype => 'text', opcdefault => 'f' }, -{ opcmethod => 'btree', opcname => 'bpchar_pattern_ops', +{ oid => '4219', oid_symbol => 'BPCHAR_BTREE_PATTERN_OPS_OID', + opcmethod => 'btree', opcname => 'bpchar_pattern_ops', opcfamily => 'btree/bpchar_pattern_ops', opcintype => 'bpchar', opcdefault => 'f' }, { opcmethod => 'btree', opcname => 'money_ops', opcfamily => 'btree/money_ops', diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 96823cd59b4..fa7dc96ece6 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -660,13 +660,13 @@ { oid => '613', descr => 'distance between', oprname => '<->', oprleft => 'point', oprright => 'line', - oprresult => 'float8', oprcom => '<->(line,point)',oprcode => 'dist_pl' }, + oprresult => 'float8', oprcom => '<->(line,point)', oprcode => 'dist_pl' }, { oid => '760', descr => 'distance between', oprname => '<->', oprleft => 'line', oprright => 'point', oprresult => 'float8', oprcom => '<->(point,line)', oprcode => 'dist_lp' }, { oid => '614', descr => 'distance between', oprname => '<->', oprleft => 'point', oprright => 'lseg', - oprresult => 'float8', oprcom => '<->(lseg,point)',oprcode => 'dist_ps' }, + oprresult => 'float8', oprcom => '<->(lseg,point)', oprcode => 'dist_ps' }, { oid => '761', descr => 'distance between', oprname => '<->', oprleft => 'lseg', oprright => 'point', oprresult => 'float8', oprcom => '<->(point,lseg)', oprcode => 'dist_sp' },