diff --git a/contrib/citext/Makefile b/contrib/citext/Makefile index a7de52928d7..789932fe366 100644 --- a/contrib/citext/Makefile +++ b/contrib/citext/Makefile @@ -11,7 +11,7 @@ DATA = citext--1.4.sql \ citext--1.0--1.1.sql PGFILEDESC = "citext - case-insensitive character string data type" -REGRESS = citext +REGRESS = citext citext_utf8 ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/citext/expected/citext.out b/contrib/citext/expected/citext.out index 3bac0534fb8..5afcc50920e 100644 --- a/contrib/citext/expected/citext.out +++ b/contrib/citext/expected/citext.out @@ -48,29 +48,6 @@ SELECT 'a'::citext <> 'ab'::citext AS t; t (1 row) --- Multibyte sanity tests. Uncomment to run. --- SELECT 'À'::citext = 'À'::citext AS t; --- SELECT 'À'::citext = 'à'::citext AS t; --- SELECT 'À'::text = 'à'::text AS f; -- text wins. --- SELECT 'À'::citext <> 'B'::citext AS t; --- Test combining characters making up canonically equivalent strings. --- SELECT 'Ä'::text <> 'Ä'::text AS t; --- SELECT 'Ä'::citext <> 'Ä'::citext AS t; --- Test the Turkish dotted I. The lowercase is a single byte while the --- uppercase is multibyte. This is why the comparison code can't be optimized --- to compare string lengths. --- SELECT 'i'::citext = 'İ'::citext AS t; --- Regression. --- SELECT 'láska'::citext <> 'laská'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive; --- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative; -- Test > and >= SELECT 'B'::citext > 'a'::citext AS t; t @@ -2614,8 +2591,6 @@ SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true; t (1 row) --- Multi-byte tests below are disabled like the sanity tests above. --- Uncomment to run them. -- Test ~<~ and ~<=~ SELECT 'a'::citext ~<~ 'B'::citext AS t; t @@ -2629,7 +2604,6 @@ SELECT 'b'::citext ~<~ 'A'::citext AS f; f (1 row) --- SELECT 'à'::citext ~<~ 'À'::citext AS f; SELECT 'a'::citext ~<=~ 'B'::citext AS t; t --- @@ -2642,7 +2616,6 @@ SELECT 'a'::citext ~<=~ 'A'::citext AS t; t (1 row) --- SELECT 'à'::citext ~<=~ 'À'::citext AS t; -- Test ~>~ and ~>=~ SELECT 'B'::citext ~>~ 'a'::citext AS t; t @@ -2656,7 +2629,6 @@ SELECT 'b'::citext ~>~ 'A'::citext AS t; t (1 row) --- SELECT 'à'::citext ~>~ 'À'::citext AS f; SELECT 'B'::citext ~>~ 'b'::citext AS f; f --- @@ -2669,7 +2641,6 @@ SELECT 'B'::citext ~>=~ 'b'::citext AS t; t (1 row) --- SELECT 'à'::citext ~>=~ 'À'::citext AS t; -- Test implicit casting. citext casts to text, but not vice-versa. SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins. t diff --git a/contrib/citext/expected/citext_1.out b/contrib/citext/expected/citext_1.out index 57fc863f7a5..8aa2b9e1dbc 100644 --- a/contrib/citext/expected/citext_1.out +++ b/contrib/citext/expected/citext_1.out @@ -48,29 +48,6 @@ SELECT 'a'::citext <> 'ab'::citext AS t; t (1 row) --- Multibyte sanity tests. Uncomment to run. --- SELECT 'À'::citext = 'À'::citext AS t; --- SELECT 'À'::citext = 'à'::citext AS t; --- SELECT 'À'::text = 'à'::text AS f; -- text wins. --- SELECT 'À'::citext <> 'B'::citext AS t; --- Test combining characters making up canonically equivalent strings. --- SELECT 'Ä'::text <> 'Ä'::text AS t; --- SELECT 'Ä'::citext <> 'Ä'::citext AS t; --- Test the Turkish dotted I. The lowercase is a single byte while the --- uppercase is multibyte. This is why the comparison code can't be optimized --- to compare string lengths. --- SELECT 'i'::citext = 'İ'::citext AS t; --- Regression. --- SELECT 'láska'::citext <> 'laská'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive; --- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative; -- Test > and >= SELECT 'B'::citext > 'a'::citext AS t; t @@ -2614,8 +2591,6 @@ SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true; t (1 row) --- Multi-byte tests below are disabled like the sanity tests above. --- Uncomment to run them. -- Test ~<~ and ~<=~ SELECT 'a'::citext ~<~ 'B'::citext AS t; t @@ -2629,7 +2604,6 @@ SELECT 'b'::citext ~<~ 'A'::citext AS f; f (1 row) --- SELECT 'à'::citext ~<~ 'À'::citext AS f; SELECT 'a'::citext ~<=~ 'B'::citext AS t; t --- @@ -2642,7 +2616,6 @@ SELECT 'a'::citext ~<=~ 'A'::citext AS t; t (1 row) --- SELECT 'à'::citext ~<=~ 'À'::citext AS t; -- Test ~>~ and ~>=~ SELECT 'B'::citext ~>~ 'a'::citext AS t; t @@ -2656,7 +2629,6 @@ SELECT 'b'::citext ~>~ 'A'::citext AS t; t (1 row) --- SELECT 'à'::citext ~>~ 'À'::citext AS f; SELECT 'B'::citext ~>~ 'b'::citext AS f; f --- @@ -2669,7 +2641,6 @@ SELECT 'B'::citext ~>=~ 'b'::citext AS t; t (1 row) --- SELECT 'à'::citext ~>=~ 'À'::citext AS t; -- Test implicit casting. citext casts to text, but not vice-versa. SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins. t diff --git a/contrib/citext/expected/citext_utf8.out b/contrib/citext/expected/citext_utf8.out new file mode 100644 index 00000000000..666b07ccec4 --- /dev/null +++ b/contrib/citext/expected/citext_utf8.out @@ -0,0 +1,146 @@ +/* + * This test must be run in a database with UTF-8 encoding + * and a Unicode-aware locale. + */ +SELECT getdatabaseencoding() <> 'UTF8' OR + current_setting('lc_ctype') = 'C' + AS skip_test \gset +\if :skip_test +\quit +\endif +set client_encoding = utf8; +-- CREATE EXTENSION IF NOT EXISTS citext; +-- Multibyte sanity tests. +SELECT 'À'::citext = 'À'::citext AS t; + t +--- + t +(1 row) + +SELECT 'À'::citext = 'à'::citext AS t; + t +--- + t +(1 row) + +SELECT 'À'::text = 'à'::text AS f; -- text wins. + f +--- + f +(1 row) + +SELECT 'À'::citext <> 'B'::citext AS t; + t +--- + t +(1 row) + +-- Test combining characters making up canonically equivalent strings. +SELECT 'Ä'::text <> 'Ä'::text AS t; + t +--- + t +(1 row) + +SELECT 'Ä'::citext <> 'Ä'::citext AS t; + t +--- + t +(1 row) + +-- Test the Turkish dotted I. The lowercase is a single byte while the +-- uppercase is multibyte. This is why the comparison code can't be optimized +-- to compare string lengths. +SELECT 'i'::citext = 'İ'::citext AS t; + t +--- + t +(1 row) + +-- Regression. +SELECT 'láska'::citext <> 'laská'::citext AS t; + t +--- + t +(1 row) + +SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t; + t +--- + t +(1 row) + +SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t; + t +--- + t +(1 row) + +SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t; + t +--- + t +(1 row) + +SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t; + t +--- + t +(1 row) + +SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) = 0 AS t; + t +--- + t +(1 row) + +SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) = 0 AS t; + t +--- + t +(1 row) + +SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) = 0 AS t; + t +--- + t +(1 row) + +SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) > 0 AS t; + t +--- + t +(1 row) + +SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) < 0 AS t; + t +--- + t +(1 row) + +-- Test ~<~ and ~<=~ +SELECT 'à'::citext ~<~ 'À'::citext AS f; + f +--- + f +(1 row) + +SELECT 'à'::citext ~<=~ 'À'::citext AS t; + t +--- + t +(1 row) + +-- Test ~>~ and ~>=~ +SELECT 'à'::citext ~>~ 'À'::citext AS f; + f +--- + f +(1 row) + +SELECT 'à'::citext ~>=~ 'À'::citext AS t; + t +--- + t +(1 row) + diff --git a/contrib/citext/expected/citext_utf8_1.out b/contrib/citext/expected/citext_utf8_1.out new file mode 100644 index 00000000000..433e9853497 --- /dev/null +++ b/contrib/citext/expected/citext_utf8_1.out @@ -0,0 +1,9 @@ +/* + * This test must be run in a database with UTF-8 encoding + * and a Unicode-aware locale. + */ +SELECT getdatabaseencoding() <> 'UTF8' OR + current_setting('lc_ctype') = 'C' + AS skip_test \gset +\if :skip_test +\quit diff --git a/contrib/citext/sql/citext.sql b/contrib/citext/sql/citext.sql index 55fb1d11a6f..8c87be6b1d2 100644 --- a/contrib/citext/sql/citext.sql +++ b/contrib/citext/sql/citext.sql @@ -19,34 +19,6 @@ SELECT 'a'::citext = 'b'::citext AS f; SELECT 'a'::citext = 'ab'::citext AS f; SELECT 'a'::citext <> 'ab'::citext AS t; --- Multibyte sanity tests. Uncomment to run. --- SELECT 'À'::citext = 'À'::citext AS t; --- SELECT 'À'::citext = 'à'::citext AS t; --- SELECT 'À'::text = 'à'::text AS f; -- text wins. --- SELECT 'À'::citext <> 'B'::citext AS t; - --- Test combining characters making up canonically equivalent strings. --- SELECT 'Ä'::text <> 'Ä'::text AS t; --- SELECT 'Ä'::citext <> 'Ä'::citext AS t; - --- Test the Turkish dotted I. The lowercase is a single byte while the --- uppercase is multibyte. This is why the comparison code can't be optimized --- to compare string lengths. --- SELECT 'i'::citext = 'İ'::citext AS t; - --- Regression. --- SELECT 'láska'::citext <> 'laská'::citext AS t; - --- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t; --- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero; --- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive; --- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative; - -- Test > and >= SELECT 'B'::citext > 'a'::citext AS t; SELECT 'b'::citext > 'A'::citext AS t; @@ -811,24 +783,17 @@ SELECT citext_pattern_ge('b'::citext, 'a'::citext) AS true; SELECT citext_pattern_ge('B'::citext, 'a'::citext) AS true; SELECT citext_pattern_ge('b'::citext, 'A'::citext) AS true; --- Multi-byte tests below are disabled like the sanity tests above. --- Uncomment to run them. - -- Test ~<~ and ~<=~ SELECT 'a'::citext ~<~ 'B'::citext AS t; SELECT 'b'::citext ~<~ 'A'::citext AS f; --- SELECT 'à'::citext ~<~ 'À'::citext AS f; SELECT 'a'::citext ~<=~ 'B'::citext AS t; SELECT 'a'::citext ~<=~ 'A'::citext AS t; --- SELECT 'à'::citext ~<=~ 'À'::citext AS t; -- Test ~>~ and ~>=~ SELECT 'B'::citext ~>~ 'a'::citext AS t; SELECT 'b'::citext ~>~ 'A'::citext AS t; --- SELECT 'à'::citext ~>~ 'À'::citext AS f; SELECT 'B'::citext ~>~ 'b'::citext AS f; SELECT 'B'::citext ~>=~ 'b'::citext AS t; --- SELECT 'à'::citext ~>=~ 'À'::citext AS t; -- Test implicit casting. citext casts to text, but not vice-versa. SELECT 'B'::citext ~<~ 'a'::text AS t; -- text wins. diff --git a/contrib/citext/sql/citext_utf8.sql b/contrib/citext/sql/citext_utf8.sql new file mode 100644 index 00000000000..d068000b423 --- /dev/null +++ b/contrib/citext/sql/citext_utf8.sql @@ -0,0 +1,51 @@ +/* + * This test must be run in a database with UTF-8 encoding + * and a Unicode-aware locale. + */ + +SELECT getdatabaseencoding() <> 'UTF8' OR + current_setting('lc_ctype') = 'C' + AS skip_test \gset +\if :skip_test +\quit +\endif + +set client_encoding = utf8; + +-- CREATE EXTENSION IF NOT EXISTS citext; + +-- Multibyte sanity tests. +SELECT 'À'::citext = 'À'::citext AS t; +SELECT 'À'::citext = 'à'::citext AS t; +SELECT 'À'::text = 'à'::text AS f; -- text wins. +SELECT 'À'::citext <> 'B'::citext AS t; + +-- Test combining characters making up canonically equivalent strings. +SELECT 'Ä'::text <> 'Ä'::text AS t; +SELECT 'Ä'::citext <> 'Ä'::citext AS t; + +-- Test the Turkish dotted I. The lowercase is a single byte while the +-- uppercase is multibyte. This is why the comparison code can't be optimized +-- to compare string lengths. +SELECT 'i'::citext = 'İ'::citext AS t; + +-- Regression. +SELECT 'láska'::citext <> 'laská'::citext AS t; + +SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t; +SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t; +SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t; +SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t; +SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) = 0 AS t; +SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) = 0 AS t; +SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) = 0 AS t; +SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) > 0 AS t; +SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) < 0 AS t; + +-- Test ~<~ and ~<=~ +SELECT 'à'::citext ~<~ 'À'::citext AS f; +SELECT 'à'::citext ~<=~ 'À'::citext AS t; + +-- Test ~>~ and ~>=~ +SELECT 'à'::citext ~>~ 'À'::citext AS f; +SELECT 'à'::citext ~>=~ 'À'::citext AS t;