Collations with nondeterministic comparison

This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com
2025-07-28 23:42:10 +03:00 · 2019-03-22 12:09:32 +01:00
parent 2ab6d28d23
commit 5e1963fb76
69 changed files with 2090 additions and 242 deletions
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@ -1149,6 +1149,716 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes
 t        | t
 (1 row)

+-- nondeterministic collations
+CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true);
+CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false);
+CREATE TABLE test6 (a int, b text);
+-- same string in different normal forms
+INSERT INTO test6 VALUES (1, U&'\00E4bc');
+INSERT INTO test6 VALUES (2, U&'\0061\0308bc');
+SELECT * FROM test6;
+ a |  b  
+---+-----
+ 1 | äbc
+ 2 | äbc
+(2 rows)
+
+SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det;
+ a |  b  
+---+-----
+ 1 | äbc
+(1 row)
+
+SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet;
+ a |  b  
+---+-----
+ 1 | äbc
+ 2 | äbc
+(2 rows)
+
+CREATE COLLATION case_sensitive (provider = icu, locale = 'und');
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
+ ?column? | ?column? 
+----------+----------
+ t        | f
+(1 row)
+
+SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+ ?column? | ?column? 
+----------+----------
+ t        | t
+(1 row)
+
+CREATE TABLE test1cs (x text COLLATE case_sensitive);
+CREATE TABLE test2cs (x text COLLATE case_sensitive);
+CREATE TABLE test3cs (x text COLLATE case_sensitive);
+INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2cs VALUES ('ABC'), ('ghi');
+INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+SELECT x FROM test3cs WHERE x = 'abc';
+  x  
+-----
+ abc
+(1 row)
+
+SELECT x FROM test3cs WHERE x <> 'abc';
+  x  
+-----
+ ABC
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test3cs WHERE x LIKE 'a%';
+  x  
+-----
+ abc
+(1 row)
+
+SELECT x FROM test3cs WHERE x ILIKE 'a%';
+  x  
+-----
+ abc
+ ABC
+(2 rows)
+
+SELECT x FROM test3cs WHERE x SIMILAR TO 'a%';
+  x  
+-----
+ abc
+(1 row)
+
+SELECT x FROM test3cs WHERE x ~ 'a';
+  x  
+-----
+ abc
+(1 row)
+
+SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
+  x  
+-----
+ abc
+ ABC
+ def
+ ghi
+(4 rows)
+
+SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x;
+  x  
+-----
+ abc
+ ABC
+ def
+ ghi
+(4 rows)
+
+SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs;
+  x  
+-----
+ ghi
+(1 row)
+
+SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs;
+  x  
+-----
+ ghi
+(1 row)
+
+SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs;
+  x  
+-----
+ abc
+ def
+(2 rows)
+
+SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs;
+  x  
+-----
+ ABC
+(1 row)
+
+SELECT DISTINCT x FROM test3cs ORDER BY x;
+  x  
+-----
+ abc
+ ABC
+ def
+ ghi
+(4 rows)
+
+SELECT count(DISTINCT x) FROM test3cs;
+ count 
+-------
+     4
+(1 row)
+
+SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x;
+  x  | count 
+-----+-------
+ abc |     1
+ ABC |     1
+ def |     1
+ ghi |     1
+(4 rows)
+
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x;
+  x  | row_number | rank 
+-----+------------+------
+ abc |          1 |    1
+ ABC |          2 |    2
+ def |          3 |    3
+ ghi |          4 |    4
+(4 rows)
+
+CREATE UNIQUE INDEX ON test1cs (x);  -- ok
+INSERT INTO test1cs VALUES ('ABC');  -- ok
+CREATE UNIQUE INDEX ON test3cs (x);  -- ok
+SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc');
+ string_to_array 
+-----------------
+ {ABC,DEF,GHI}
+(1 row)
+
+SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b');
+   string_to_array   
+---------------------
+ {A,B,C,D,E,F,G,H,I}
+(1 row)
+
+CREATE TABLE test1ci (x text COLLATE case_insensitive);
+CREATE TABLE test2ci (x text COLLATE case_insensitive);
+CREATE TABLE test3ci (x text COLLATE case_insensitive);
+CREATE INDEX ON test3ci (x text_pattern_ops);  -- error
+ERROR:  nondeterministic collations are not supported for operator class "text_pattern_ops"
+INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2ci VALUES ('ABC'), ('ghi');
+INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+SELECT x FROM test3ci WHERE x = 'abc';
+  x  
+-----
+ abc
+ ABC
+(2 rows)
+
+SELECT x FROM test3ci WHERE x <> 'abc';
+  x  
+-----
+ def
+ ghi
+(2 rows)
+
+SELECT x FROM test3ci WHERE x LIKE 'a%';
+ERROR:  nondeterministic collations are not supported for LIKE
+SELECT x FROM test3ci WHERE x ILIKE 'a%';
+ERROR:  nondeterministic collations are not supported for ILIKE
+SELECT x FROM test3ci WHERE x SIMILAR TO 'a%';
+ERROR:  nondeterministic collations are not supported for regular expressions
+SELECT x FROM test3ci WHERE x ~ 'a';
+ERROR:  nondeterministic collations are not supported for regular expressions
+SELECT x FROM test1ci UNION SELECT x FROM test2ci ORDER BY x;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test2ci UNION SELECT x FROM test1ci ORDER BY x;
+  x  
+-----
+ ABC
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci;
+  x  
+-----
+ ghi
+ abc
+(2 rows)
+
+SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci;
+  x  
+-----
+ ghi
+ ABC
+(2 rows)
+
+SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci;
+  x  
+-----
+ def
+(1 row)
+
+SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci;
+ x 
+---
+(0 rows)
+
+SELECT DISTINCT x FROM test3ci ORDER BY x;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT count(DISTINCT x) FROM test3ci;
+ count 
+-------
+     3
+(1 row)
+
+SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x;
+  x  | count 
+-----+-------
+ abc |     2
+ def |     1
+ ghi |     1
+(3 rows)
+
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x;
+  x  | row_number | rank 
+-----+------------+------
+ abc |          1 |    1
+ ABC |          2 |    1
+ def |          3 |    3
+ ghi |          4 |    4
+(4 rows)
+
+CREATE UNIQUE INDEX ON test1ci (x);  -- ok
+INSERT INTO test1ci VALUES ('ABC');  -- error
+ERROR:  duplicate key value violates unique constraint "test1ci_x_idx"
+DETAIL:  Key (x)=(ABC) already exists.
+CREATE UNIQUE INDEX ON test3ci (x);  -- error
+ERROR:  could not create unique index "test3ci_x_idx"
+DETAIL:  Key (x)=(abc) is duplicated.
+SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc');
+ERROR:  nondeterministic collations are not supported for substring searches
+SELECT string_to_array('ABCDEFGHI' COLLATE case_insensitive, NULL, 'b');
+    string_to_array     
+------------------------
+ {A,NULL,C,D,E,F,G,H,I}
+(1 row)
+
+-- bpchar
+CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive);
+CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive);
+CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive);
+CREATE INDEX ON test3bpci (x bpchar_pattern_ops);  -- error
+ERROR:  nondeterministic collations are not supported for operator class "bpchar_pattern_ops"
+INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2bpci VALUES ('ABC'), ('ghi');
+INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+SELECT x FROM test3bpci WHERE x = 'abc';
+  x  
+-----
+ abc
+ ABC
+(2 rows)
+
+SELECT x FROM test3bpci WHERE x <> 'abc';
+  x  
+-----
+ def
+ ghi
+(2 rows)
+
+SELECT x FROM test3bpci WHERE x LIKE 'a%';
+ERROR:  nondeterministic collations are not supported for LIKE
+SELECT x FROM test3bpci WHERE x ILIKE 'a%';
+ERROR:  nondeterministic collations are not supported for ILIKE
+SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%';
+ERROR:  nondeterministic collations are not supported for regular expressions
+SELECT x FROM test3bpci WHERE x ~ 'a';
+ERROR:  nondeterministic collations are not supported for regular expressions
+SELECT x FROM test1bpci UNION SELECT x FROM test2bpci ORDER BY x;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test2bpci UNION SELECT x FROM test1bpci ORDER BY x;
+  x  
+-----
+ ABC
+ def
+ ghi
+(3 rows)
+
+SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci;
+  x  
+-----
+ ghi
+ abc
+(2 rows)
+
+SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci;
+  x  
+-----
+ ghi
+ ABC
+(2 rows)
+
+SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci;
+  x  
+-----
+ def
+(1 row)
+
+SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci;
+ x 
+---
+(0 rows)
+
+SELECT DISTINCT x FROM test3bpci ORDER BY x;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT count(DISTINCT x) FROM test3bpci;
+ count 
+-------
+     3
+(1 row)
+
+SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x;
+  x  | count 
+-----+-------
+ abc |     2
+ def |     1
+ ghi |     1
+(3 rows)
+
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x;
+  x  | row_number | rank 
+-----+------------+------
+ abc |          1 |    1
+ ABC |          2 |    1
+ def |          3 |    3
+ ghi |          4 |    4
+(4 rows)
+
+CREATE UNIQUE INDEX ON test1bpci (x);  -- ok
+INSERT INTO test1bpci VALUES ('ABC');  -- error
+ERROR:  duplicate key value violates unique constraint "test1bpci_x_idx"
+DETAIL:  Key (x)=(ABC) already exists.
+CREATE UNIQUE INDEX ON test3bpci (x);  -- error
+ERROR:  could not create unique index "test3bpci_x_idx"
+DETAIL:  Key (x)=(abc) is duplicated.
+SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc');
+ERROR:  nondeterministic collations are not supported for substring searches
+SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b');
+    string_to_array     
+------------------------
+ {A,NULL,C,D,E,F,G,H,I}
+(1 row)
+
+-- This tests the issue described in match_pattern_prefix().  In the
+-- absence of that check, the case_insensitive tests below would
+-- return no rows where they should logically return one.
+CREATE TABLE test4c (x text COLLATE "C");
+INSERT INTO test4c VALUES ('abc');
+CREATE INDEX ON test4c (x);
+SET enable_seqscan = off;
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive;  -- ok, no rows
+ x 
+---
+(0 rows)
+
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive;  -- ok, no rows
+ x 
+---
+(0 rows)
+
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive;  -- error
+ERROR:  nondeterministic collations are not supported for LIKE
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive;  -- error
+ERROR:  nondeterministic collations are not supported for LIKE
+RESET enable_seqscan;
+-- Unicode special case: different variants of Greek lower case sigma.
+-- A naive implementation like citext that just does lower(x) =
+-- lower(y) will do the wrong thing here, because lower('Σ') is 'σ'
+-- but upper('ς') is 'Σ'.
+SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_sensitive;
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive;
+ ?column? 
+----------
+ t
+(1 row)
+
+-- name vs. text comparison operators
+SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive;
+ relname  
+----------
+ pg_class
+(1 row)
+
+SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive;
+ relname  
+----------
+ pg_class
+(1 row)
+
+SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive;
+ typname 
+---------
+ int4
+ int8
+(2 rows)
+
+SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;;
+ typname 
+---------
+ int4
+ int8
+(2 rows)
+
+-- test case adapted from subselect.sql
+CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text);
+INSERT INTO outer_text VALUES ('a', 'a');
+INSERT INTO outer_text VALUES ('b', 'a');
+INSERT INTO outer_text VALUES ('A', NULL);
+INSERT INTO outer_text VALUES ('B', NULL);
+CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text);
+INSERT INTO inner_text VALUES ('a', NULL);
+SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
+ f1 | f2 
+----+----
+ b  | a
+ B  | 
+(2 rows)
+
+-- accents
+CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false);
+CREATE TABLE test4 (a int, b text);
+INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
+SELECT * FROM test4 WHERE b = 'cote';
+ a |  b   
+---+------
+ 1 | cote
+(1 row)
+
+SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents;
+ a |  b   
+---+------
+ 1 | cote
+ 2 | côte
+ 3 | coté
+ 4 | côté
+(4 rows)
+
+SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents;  -- still case-sensitive
+ a | b 
+---+---
+(0 rows)
+
+SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive;
+ a |  b   
+---+------
+ 1 | cote
+(1 row)
+
+-- foreign keys (should use collation of primary key)
+-- PK is case-sensitive, FK is case-insensitive
+CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY);
+INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi');
+CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE);
+INSERT INTO test10fk VALUES ('abc');  -- ok
+INSERT INTO test10fk VALUES ('ABC');  -- error
+ERROR:  insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey"
+DETAIL:  Key (x)=(ABC) is not present in table "test10pk".
+INSERT INTO test10fk VALUES ('xyz');  -- error
+ERROR:  insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey"
+DETAIL:  Key (x)=(xyz) is not present in table "test10pk".
+SELECT * FROM test10pk;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT * FROM test10fk;
+  x  
+-----
+ abc
+(1 row)
+
+-- restrict update even though the values are "equal" in the FK table
+UPDATE test10fk SET x = 'ABC' WHERE x = 'abc';  -- error
+ERROR:  insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey"
+DETAIL:  Key (x)=(ABC) is not present in table "test10pk".
+SELECT * FROM test10fk;
+  x  
+-----
+ abc
+(1 row)
+
+DELETE FROM test10pk WHERE x = 'abc';
+SELECT * FROM test10pk;
+  x  
+-----
+ def
+ ghi
+(2 rows)
+
+SELECT * FROM test10fk;
+ x 
+---
+(0 rows)
+
+-- PK is case-insensitive, FK is case-sensitive
+CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY);
+INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi');
+CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE);
+INSERT INTO test11fk VALUES ('abc');  -- ok
+INSERT INTO test11fk VALUES ('ABC');  -- ok
+INSERT INTO test11fk VALUES ('xyz');  -- error
+ERROR:  insert or update on table "test11fk" violates foreign key constraint "test11fk_x_fkey"
+DETAIL:  Key (x)=(xyz) is not present in table "test11pk".
+SELECT * FROM test11pk;
+  x  
+-----
+ abc
+ def
+ ghi
+(3 rows)
+
+SELECT * FROM test11fk;
+  x  
+-----
+ abc
+ ABC
+(2 rows)
+
+-- cascade update even though the values are "equal" in the PK table
+UPDATE test11pk SET x = 'ABC' WHERE x = 'abc';
+SELECT * FROM test11fk;
+  x  
+-----
+ ABC
+ ABC
+(2 rows)
+
+DELETE FROM test11pk WHERE x = 'abc';
+SELECT * FROM test11pk;
+  x  
+-----
+ def
+ ghi
+(2 rows)
+
+SELECT * FROM test11fk;
+ x 
+---
+(0 rows)
+
+-- partitioning
+CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b);
+CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc');
+INSERT INTO test20 VALUES (1, 'abc');
+INSERT INTO test20 VALUES (2, 'ABC');
+SELECT * FROM test20_1;
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | ABC
+(2 rows)
+
+CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b);
+CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF');
+INSERT INTO test21 VALUES (1, 'abc');
+INSERT INTO test21 VALUES (2, 'ABC');
+SELECT * FROM test21_1;
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | ABC
+(2 rows)
+
+CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b);
+CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test22 VALUES (1, 'def');
+INSERT INTO test22 VALUES (2, 'DEF');
+-- they end up in different partitions
+SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1);
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b);
+CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test23 VALUES (1, 'def');
+INSERT INTO test23 VALUES (2, 'DEF');
+-- they end up in the same partition (but it's platform-dependent which one)
+SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1);
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b);
+CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc');
+INSERT INTO test30 VALUES (1, 'abc');
+INSERT INTO test30 VALUES (2, 'ABC');
+SELECT * FROM test30_1;
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | ABC
+(2 rows)
+
+CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b);
+CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF');
+INSERT INTO test31 VALUES (1, 'abc');
+INSERT INTO test31 VALUES (2, 'ABC');
+SELECT * FROM test31_1;
+ a |  b  
+---+-----
+ 1 | abc
+ 2 | ABC
+(2 rows)
+
+CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b);
+CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test32 VALUES (1, 'def');
+INSERT INTO test32 VALUES (2, 'DEF');
+-- they end up in different partitions
+SELECT (SELECT count(*) FROM test32_0) = (SELECT count(*) FROM test32_1);
+ ?column? 
+----------
+ t
+(1 row)
+
+CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b);
+CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test33 VALUES (1, 'def');
+INSERT INTO test33 VALUES (2, 'DEF');
+-- they end up in the same partition (but it's platform-dependent which one)
+SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
+ ?column? 
+----------
+ t
+(1 row)
+
 -- cleanup
 SET client_min_messages TO warning;
 DROP SCHEMA collate_tests CASCADE;
--- a/src/test/regress/expected/collate.linux.utf8.out
+++ b/src/test/regress/expected/collate.linux.utf8.out
@ -1117,6 +1117,11 @@ select textrange_en_us('A','Z') @> 'b'::text;

 drop type textrange_c;
 drop type textrange_en_us;
+-- nondeterministic collations
+-- (not supported with libc provider)
+CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true);
+CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false);
+ERROR:  nondeterministic collations not supported with this provider
 -- cleanup
 SET client_min_messages TO warning;
 DROP SCHEMA collate_tests CASCADE;
--- a/src/test/regress/expected/collate.out
+++ b/src/test/regress/expected/collate.out
@ -498,6 +498,21 @@ SELECT a, b, a < b as lt FROM
 A | b | t
 (2 rows)

+-- collation mismatch in subselects
+SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10);
+ERROR:  could not determine which collation to use for string hashing
+HINT:  Use the COLLATE clause to set the collation explicitly.
+-- now it works with overrides
+SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10);
+ a | x | y 
+---+---+---
+(0 rows)
+
+SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10);
+ a | x | y 
+---+---+---
+(0 rows)
+
 -- casting
 SELECT CAST('42' AS text COLLATE "C");
 ERROR:  syntax error at or near "COLLATE"
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@ -745,6 +745,25 @@ select * from outer_7597 where (f1, f2) not in (select * from inner_7597);
  1 |   
 (2 rows)

+--
+-- Similar test case using text that verifies that collation
+-- information is passed through by execTuplesEqual() in nodeSubplan.c
+-- (otherwise it would error in texteq())
+--
+create temp table outer_text (f1 text, f2 text);
+insert into outer_text values ('a', 'a');
+insert into outer_text values ('b', 'a');
+insert into outer_text values ('a', null);
+insert into outer_text values ('b', null);
+create temp table inner_text (c1 text, c2 text);
+insert into inner_text values ('a', null);
+select * from outer_text where (f1, f2) not in (select * from inner_text);
+ f1 | f2 
+----+----
+ b  | a
+ b  | 
+(2 rows)
+
 --
 -- Test case for premature memory release during hashing of subplan output
 --
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@ -453,6 +453,256 @@ CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=p
 SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook;


+-- nondeterministic collations
+
+CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true);
+CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false);
+
+CREATE TABLE test6 (a int, b text);
+-- same string in different normal forms
+INSERT INTO test6 VALUES (1, U&'\00E4bc');
+INSERT INTO test6 VALUES (2, U&'\0061\0308bc');
+SELECT * FROM test6;
+SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det;
+SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet;
+
+CREATE COLLATION case_sensitive (provider = icu, locale = 'und');
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+
+SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
+SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
+
+CREATE TABLE test1cs (x text COLLATE case_sensitive);
+CREATE TABLE test2cs (x text COLLATE case_sensitive);
+CREATE TABLE test3cs (x text COLLATE case_sensitive);
+INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2cs VALUES ('ABC'), ('ghi');
+INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+
+SELECT x FROM test3cs WHERE x = 'abc';
+SELECT x FROM test3cs WHERE x <> 'abc';
+SELECT x FROM test3cs WHERE x LIKE 'a%';
+SELECT x FROM test3cs WHERE x ILIKE 'a%';
+SELECT x FROM test3cs WHERE x SIMILAR TO 'a%';
+SELECT x FROM test3cs WHERE x ~ 'a';
+SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x;
+SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x;
+SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs;
+SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs;
+SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs;
+SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs;
+SELECT DISTINCT x FROM test3cs ORDER BY x;
+SELECT count(DISTINCT x) FROM test3cs;
+SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x;
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x;
+CREATE UNIQUE INDEX ON test1cs (x);  -- ok
+INSERT INTO test1cs VALUES ('ABC');  -- ok
+CREATE UNIQUE INDEX ON test3cs (x);  -- ok
+SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc');
+SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b');
+
+CREATE TABLE test1ci (x text COLLATE case_insensitive);
+CREATE TABLE test2ci (x text COLLATE case_insensitive);
+CREATE TABLE test3ci (x text COLLATE case_insensitive);
+CREATE INDEX ON test3ci (x text_pattern_ops);  -- error
+INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2ci VALUES ('ABC'), ('ghi');
+INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+
+SELECT x FROM test3ci WHERE x = 'abc';
+SELECT x FROM test3ci WHERE x <> 'abc';
+SELECT x FROM test3ci WHERE x LIKE 'a%';
+SELECT x FROM test3ci WHERE x ILIKE 'a%';
+SELECT x FROM test3ci WHERE x SIMILAR TO 'a%';
+SELECT x FROM test3ci WHERE x ~ 'a';
+SELECT x FROM test1ci UNION SELECT x FROM test2ci ORDER BY x;
+SELECT x FROM test2ci UNION SELECT x FROM test1ci ORDER BY x;
+SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci;
+SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci;
+SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci;
+SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci;
+SELECT DISTINCT x FROM test3ci ORDER BY x;
+SELECT count(DISTINCT x) FROM test3ci;
+SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x;
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x;
+CREATE UNIQUE INDEX ON test1ci (x);  -- ok
+INSERT INTO test1ci VALUES ('ABC');  -- error
+CREATE UNIQUE INDEX ON test3ci (x);  -- error
+SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc');
+SELECT string_to_array('ABCDEFGHI' COLLATE case_insensitive, NULL, 'b');
+
+-- bpchar
+CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive);
+CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive);
+CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive);
+CREATE INDEX ON test3bpci (x bpchar_pattern_ops);  -- error
+INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi');
+INSERT INTO test2bpci VALUES ('ABC'), ('ghi');
+INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi');
+
+SELECT x FROM test3bpci WHERE x = 'abc';
+SELECT x FROM test3bpci WHERE x <> 'abc';
+SELECT x FROM test3bpci WHERE x LIKE 'a%';
+SELECT x FROM test3bpci WHERE x ILIKE 'a%';
+SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%';
+SELECT x FROM test3bpci WHERE x ~ 'a';
+SELECT x FROM test1bpci UNION SELECT x FROM test2bpci ORDER BY x;
+SELECT x FROM test2bpci UNION SELECT x FROM test1bpci ORDER BY x;
+SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci;
+SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci;
+SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci;
+SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci;
+SELECT DISTINCT x FROM test3bpci ORDER BY x;
+SELECT count(DISTINCT x) FROM test3bpci;
+SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x;
+SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x;
+CREATE UNIQUE INDEX ON test1bpci (x);  -- ok
+INSERT INTO test1bpci VALUES ('ABC');  -- error
+CREATE UNIQUE INDEX ON test3bpci (x);  -- error
+SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc');
+SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b');
+
+-- This tests the issue described in match_pattern_prefix().  In the
+-- absence of that check, the case_insensitive tests below would
+-- return no rows where they should logically return one.
+CREATE TABLE test4c (x text COLLATE "C");
+INSERT INTO test4c VALUES ('abc');
+CREATE INDEX ON test4c (x);
+SET enable_seqscan = off;
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive;  -- ok, no rows
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive;  -- ok, no rows
+SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive;  -- error
+SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive;  -- error
+RESET enable_seqscan;
+
+-- Unicode special case: different variants of Greek lower case sigma.
+-- A naive implementation like citext that just does lower(x) =
+-- lower(y) will do the wrong thing here, because lower('Σ') is 'σ'
+-- but upper('ς') is 'Σ'.
+SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_sensitive;
+SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive;
+
+-- name vs. text comparison operators
+SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive;
+SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive;
+
+SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive;
+SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;;
+
+-- test case adapted from subselect.sql
+CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text);
+INSERT INTO outer_text VALUES ('a', 'a');
+INSERT INTO outer_text VALUES ('b', 'a');
+INSERT INTO outer_text VALUES ('A', NULL);
+INSERT INTO outer_text VALUES ('B', NULL);
+
+CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text);
+INSERT INTO inner_text VALUES ('a', NULL);
+
+SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
+
+-- accents
+CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false);
+
+CREATE TABLE test4 (a int, b text);
+INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
+SELECT * FROM test4 WHERE b = 'cote';
+SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents;
+SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents;  -- still case-sensitive
+SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive;
+
+-- foreign keys (should use collation of primary key)
+
+-- PK is case-sensitive, FK is case-insensitive
+CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY);
+INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi');
+CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE);
+INSERT INTO test10fk VALUES ('abc');  -- ok
+INSERT INTO test10fk VALUES ('ABC');  -- error
+INSERT INTO test10fk VALUES ('xyz');  -- error
+SELECT * FROM test10pk;
+SELECT * FROM test10fk;
+-- restrict update even though the values are "equal" in the FK table
+UPDATE test10fk SET x = 'ABC' WHERE x = 'abc';  -- error
+SELECT * FROM test10fk;
+DELETE FROM test10pk WHERE x = 'abc';
+SELECT * FROM test10pk;
+SELECT * FROM test10fk;
+
+-- PK is case-insensitive, FK is case-sensitive
+CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY);
+INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi');
+CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE);
+INSERT INTO test11fk VALUES ('abc');  -- ok
+INSERT INTO test11fk VALUES ('ABC');  -- ok
+INSERT INTO test11fk VALUES ('xyz');  -- error
+SELECT * FROM test11pk;
+SELECT * FROM test11fk;
+-- cascade update even though the values are "equal" in the PK table
+UPDATE test11pk SET x = 'ABC' WHERE x = 'abc';
+SELECT * FROM test11fk;
+DELETE FROM test11pk WHERE x = 'abc';
+SELECT * FROM test11pk;
+SELECT * FROM test11fk;
+
+-- partitioning
+CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b);
+CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc');
+INSERT INTO test20 VALUES (1, 'abc');
+INSERT INTO test20 VALUES (2, 'ABC');
+SELECT * FROM test20_1;
+
+CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b);
+CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF');
+INSERT INTO test21 VALUES (1, 'abc');
+INSERT INTO test21 VALUES (2, 'ABC');
+SELECT * FROM test21_1;
+
+CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b);
+CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test22 VALUES (1, 'def');
+INSERT INTO test22 VALUES (2, 'DEF');
+-- they end up in different partitions
+SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1);
+
+CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b);
+CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test23 VALUES (1, 'def');
+INSERT INTO test23 VALUES (2, 'DEF');
+-- they end up in the same partition (but it's platform-dependent which one)
+SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1);
+
+CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b);
+CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc');
+INSERT INTO test30 VALUES (1, 'abc');
+INSERT INTO test30 VALUES (2, 'ABC');
+SELECT * FROM test30_1;
+
+CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b);
+CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF');
+INSERT INTO test31 VALUES (1, 'abc');
+INSERT INTO test31 VALUES (2, 'ABC');
+SELECT * FROM test31_1;
+
+CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b);
+CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test32 VALUES (1, 'def');
+INSERT INTO test32 VALUES (2, 'DEF');
+-- they end up in different partitions
+SELECT (SELECT count(*) FROM test32_0) = (SELECT count(*) FROM test32_1);
+
+CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b);
+CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0);
+CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1);
+INSERT INTO test33 VALUES (1, 'def');
+INSERT INTO test33 VALUES (2, 'DEF');
+-- they end up in the same partition (but it's platform-dependent which one)
+SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
+
+
 -- cleanup
 SET client_min_messages TO warning;
 DROP SCHEMA collate_tests CASCADE;
--- a/src/test/regress/sql/collate.linux.utf8.sql
+++ b/src/test/regress/sql/collate.linux.utf8.sql
@ -428,6 +428,13 @@ drop type textrange_c;
 drop type textrange_en_us;


+-- nondeterministic collations
+-- (not supported with libc provider)
+
+CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true);
+CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false);
+
+
 -- cleanup
 SET client_min_messages TO warning;
 DROP SCHEMA collate_tests CASCADE;
--- a/src/test/regress/sql/collate.sql
+++ b/src/test/regress/sql/collate.sql
@ -163,6 +163,11 @@ SELECT * FROM foo;
 SELECT a, b, a < b as lt FROM
  (VALUES ('a', 'B'), ('A', 'b' COLLATE "C")) v(a,b);

+-- collation mismatch in subselects
+SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10);
+-- now it works with overrides
+SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10);
+SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10);

 -- casting

--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@ -435,6 +435,23 @@ insert into inner_7597 values(0, null);

 select * from outer_7597 where (f1, f2) not in (select * from inner_7597);

+--
+-- Similar test case using text that verifies that collation
+-- information is passed through by execTuplesEqual() in nodeSubplan.c
+-- (otherwise it would error in texteq())
+--
+
+create temp table outer_text (f1 text, f2 text);
+insert into outer_text values ('a', 'a');
+insert into outer_text values ('b', 'a');
+insert into outer_text values ('a', null);
+insert into outer_text values ('b', null);
+
+create temp table inner_text (c1 text, c2 text);
+insert into inner_text values ('a', null);
+
+select * from outer_text where (f1, f2) not in (select * from inner_text);
+
 --
 -- Test case for premature memory release during hashing of subplan output
 --
--- a/src/test/subscription/Makefile
+++ b/src/test/subscription/Makefile
@ -15,6 +15,8 @@ include $(top_builddir)/src/Makefile.global

 EXTRA_INSTALL = contrib/hstore

+export with_icu
+
 check:
 	$(prove_check)

--- a/src/test/subscription/t/012_collation.pl
+++ b/src/test/subscription/t/012_collation.pl
@ -0,0 +1,103 @@
+# Test collations, in particular nondeterministic ones
+# (only works with ICU)
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More;
+
+if ($ENV{with_icu} eq 'yes')
+{
+	plan tests => 2;
+}
+else
+{
+	plan skip_all => 'ICU not supported by this build';
+}
+
+my $node_publisher = get_new_node('publisher');
+$node_publisher->init(allows_streaming => 'logical');
+$node_publisher->start;
+
+my $node_subscriber = get_new_node('subscriber');
+$node_subscriber->init(allows_streaming => 'logical');
+$node_subscriber->start;
+
+my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
+
+# Test plan: Create a table with a nondeterministic collation in the
+# primary key column.  Pre-insert rows on the publisher and subscriber
+# that are collation-wise equal but byte-wise different.  (We use a
+# string in different normal forms for that.)  Set up publisher and
+# subscriber.  Update the row on the publisher, but don't change the
+# primary key column.  The subscriber needs to find the row to be
+# updated using the nondeterministic collation semantics.  We need to
+# test for both a replica identity index and for replica identity
+# full, since those have different code paths internally.
+
+$node_subscriber->safe_psql('postgres',
+	q{CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false)});
+
+# table with replica identity index
+
+$node_publisher->safe_psql('postgres',
+	q{CREATE TABLE tab1 (a text PRIMARY KEY, b text)});
+
+$node_publisher->safe_psql('postgres',
+	q{INSERT INTO tab1 VALUES (U&'\00E4bc', 'foo')});
+
+$node_subscriber->safe_psql('postgres',
+	q{CREATE TABLE tab1 (a text COLLATE ctest_nondet PRIMARY KEY, b text)});
+
+$node_subscriber->safe_psql('postgres',
+	q{INSERT INTO tab1 VALUES (U&'\0061\0308bc', 'foo')});
+
+# table with replica identity full
+
+$node_publisher->safe_psql('postgres',
+	q{CREATE TABLE tab2 (a text, b text)});
+$node_publisher->safe_psql('postgres',
+	q{ALTER TABLE tab2 REPLICA IDENTITY FULL});
+
+$node_publisher->safe_psql('postgres',
+	q{INSERT INTO tab2 VALUES (U&'\00E4bc', 'foo')});
+
+$node_subscriber->safe_psql('postgres',
+	q{CREATE TABLE tab2 (a text COLLATE ctest_nondet, b text)});
+$node_subscriber->safe_psql('postgres',
+	q{ALTER TABLE tab2 REPLICA IDENTITY FULL});
+
+$node_subscriber->safe_psql('postgres',
+	q{INSERT INTO tab2 VALUES (U&'\0061\0308bc', 'foo')});
+
+# set up publication, subscription
+
+$node_publisher->safe_psql('postgres',
+	q{CREATE PUBLICATION pub1 FOR ALL TABLES});
+
+$node_subscriber->safe_psql('postgres',
+	qq{CREATE SUBSCRIPTION sub1 CONNECTION '$publisher_connstr' PUBLICATION pub1 WITH (copy_data = false)});
+
+$node_publisher->wait_for_catchup('sub1');
+
+# test with replica identity index
+
+$node_publisher->safe_psql('postgres',
+	q{UPDATE tab1 SET b = 'bar' WHERE b = 'foo'});
+
+$node_publisher->wait_for_catchup('sub1');
+
+is($node_subscriber->safe_psql('postgres', q{SELECT b FROM tab1}),
+   qq(bar),
+  'update with primary key with nondeterministic collation');
+
+# test with replica identity full
+
+$node_publisher->safe_psql('postgres',
+	q{UPDATE tab2 SET b = 'bar' WHERE b = 'foo'});
+
+$node_publisher->wait_for_catchup('sub1');
+
+is($node_subscriber->safe_psql('postgres', q{SELECT b FROM tab2}),
+   qq(bar),
+  'update with replica identity full with nondeterministic collation');