diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index 293d6da027c..4a0de4fe95a 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -1980,6 +1980,73 @@ executeComparison(JsonPathItem *cmp, JsonbValue *lv, JsonbValue *rv, void *p) return compareItems(cmp->type, lv, rv); } +/* + * Perform per-byte comparison of two strings. + */ +static int +binaryCompareStrings(const char *s1, int len1, + const char *s2, int len2) +{ + int cmp; + + cmp = memcmp(s1, s2, Min(len1, len2)); + + if (cmp != 0) + return cmp; + + if (len1 == len2) + return 0; + + return len1 < len2 ? -1 : 1; +} + +/* + * Compare two strings in the current server encoding using Unicode codepoint + * collation. + */ +static int +compareStrings(const char *mbstr1, int mblen1, + const char *mbstr2, int mblen2) +{ + if (GetDatabaseEncoding() == PG_SQL_ASCII || + GetDatabaseEncoding() == PG_UTF8) + { + /* + * It's known property of UTF-8 strings that their per-byte comparison + * result matches codepoints comparison result. ASCII can be + * considered as special case of UTF-8. + */ + return binaryCompareStrings(mbstr1, mblen1, mbstr2, mblen2); + } + else + { + /* We have to convert other encodings to UTF-8 first, then compare. */ + char *utf8str1 = pg_server_to_any(mbstr1, mblen1, PG_UTF8), + *utf8str2 = pg_server_to_any(mbstr2, mblen2, PG_UTF8); + int cmp; + + cmp = binaryCompareStrings(utf8str1, strlen(utf8str1), + utf8str2, strlen(utf8str2)); + + pfree(utf8str1); + pfree(utf8str2); + + /* + * When all Unicode codepoints are equal, return result of binary + * comparison. In some edge cases, same characters may have different + * representations in encoding. Then our behavior could diverge from + * standard. However, that allow us to do simple binary comparison + * for "==" operator, which is performance critical in typical cases. + * In future to implement strict standard conformance, we can do + * normalization of input JSON strings. + */ + if (cmp == 0) + return binaryCompareStrings(mbstr1, mblen1, mbstr2, mblen2); + else + return cmp; + } +} + /* * Compare two SQL/JSON items using comparison operation 'op'. */ @@ -2022,9 +2089,8 @@ compareItems(int32 op, JsonbValue *jb1, JsonbValue *jb2) jb2->val.string.val, jb1->val.string.len) ? jpbFalse : jpbTrue; - cmp = varstr_cmp(jb1->val.string.val, jb1->val.string.len, - jb2->val.string.val, jb2->val.string.len, - DEFAULT_COLLATION_OID); + cmp = compareStrings(jb1->val.string.val, jb1->val.string.len, + jb2->val.string.val, jb2->val.string.len); break; case jbvBinary: diff --git a/src/test/regress/expected/jsonb_jsonpath.out b/src/test/regress/expected/jsonb_jsonpath.out index 31a871af028..0202667a1f7 100644 --- a/src/test/regress/expected/jsonb_jsonpath.out +++ b/src/test/regress/expected/jsonb_jsonpath.out @@ -1833,3 +1833,166 @@ SELECT jsonb_path_match('[{"a": 1}, {"a": 2}]', '$[*].a > 1'); t (1 row) +-- test string comparison (Unicode codepoint collation) +WITH str(j, num) AS +( + SELECT jsonb_build_object('s', s), num + FROM unnest('{"", "a", "ab", "abc", "abcd", "b", "A", "AB", "ABC", "ABc", "ABcD", "B"}'::text[]) WITH ORDINALITY AS a(s, num) +) +SELECT + s1.j, s2.j, + jsonb_path_query_first(s1.j, '$.s < $s', vars => s2.j) lt, + jsonb_path_query_first(s1.j, '$.s <= $s', vars => s2.j) le, + jsonb_path_query_first(s1.j, '$.s == $s', vars => s2.j) eq, + jsonb_path_query_first(s1.j, '$.s >= $s', vars => s2.j) ge, + jsonb_path_query_first(s1.j, '$.s > $s', vars => s2.j) gt +FROM str s1, str s2 +ORDER BY s1.num, s2.num; + j | j | lt | le | eq | ge | gt +---------------+---------------+-------+-------+-------+-------+------- + {"s": ""} | {"s": ""} | false | true | true | true | false + {"s": ""} | {"s": "a"} | true | true | false | false | false + {"s": ""} | {"s": "ab"} | true | true | false | false | false + {"s": ""} | {"s": "abc"} | true | true | false | false | false + {"s": ""} | {"s": "abcd"} | true | true | false | false | false + {"s": ""} | {"s": "b"} | true | true | false | false | false + {"s": ""} | {"s": "A"} | true | true | false | false | false + {"s": ""} | {"s": "AB"} | true | true | false | false | false + {"s": ""} | {"s": "ABC"} | true | true | false | false | false + {"s": ""} | {"s": "ABc"} | true | true | false | false | false + {"s": ""} | {"s": "ABcD"} | true | true | false | false | false + {"s": ""} | {"s": "B"} | true | true | false | false | false + {"s": "a"} | {"s": ""} | false | false | false | true | true + {"s": "a"} | {"s": "a"} | false | true | true | true | false + {"s": "a"} | {"s": "ab"} | true | true | false | false | false + {"s": "a"} | {"s": "abc"} | true | true | false | false | false + {"s": "a"} | {"s": "abcd"} | true | true | false | false | false + {"s": "a"} | {"s": "b"} | true | true | false | false | false + {"s": "a"} | {"s": "A"} | false | false | false | true | true + {"s": "a"} | {"s": "AB"} | false | false | false | true | true + {"s": "a"} | {"s": "ABC"} | false | false | false | true | true + {"s": "a"} | {"s": "ABc"} | false | false | false | true | true + {"s": "a"} | {"s": "ABcD"} | false | false | false | true | true + {"s": "a"} | {"s": "B"} | false | false | false | true | true + {"s": "ab"} | {"s": ""} | false | false | false | true | true + {"s": "ab"} | {"s": "a"} | false | false | false | true | true + {"s": "ab"} | {"s": "ab"} | false | true | true | true | false + {"s": "ab"} | {"s": "abc"} | true | true | false | false | false + {"s": "ab"} | {"s": "abcd"} | true | true | false | false | false + {"s": "ab"} | {"s": "b"} | true | true | false | false | false + {"s": "ab"} | {"s": "A"} | false | false | false | true | true + {"s": "ab"} | {"s": "AB"} | false | false | false | true | true + {"s": "ab"} | {"s": "ABC"} | false | false | false | true | true + {"s": "ab"} | {"s": "ABc"} | false | false | false | true | true + {"s": "ab"} | {"s": "ABcD"} | false | false | false | true | true + {"s": "ab"} | {"s": "B"} | false | false | false | true | true + {"s": "abc"} | {"s": ""} | false | false | false | true | true + {"s": "abc"} | {"s": "a"} | false | false | false | true | true + {"s": "abc"} | {"s": "ab"} | false | false | false | true | true + {"s": "abc"} | {"s": "abc"} | false | true | true | true | false + {"s": "abc"} | {"s": "abcd"} | true | true | false | false | false + {"s": "abc"} | {"s": "b"} | true | true | false | false | false + {"s": "abc"} | {"s": "A"} | false | false | false | true | true + {"s": "abc"} | {"s": "AB"} | false | false | false | true | true + {"s": "abc"} | {"s": "ABC"} | false | false | false | true | true + {"s": "abc"} | {"s": "ABc"} | false | false | false | true | true + {"s": "abc"} | {"s": "ABcD"} | false | false | false | true | true + {"s": "abc"} | {"s": "B"} | false | false | false | true | true + {"s": "abcd"} | {"s": ""} | false | false | false | true | true + {"s": "abcd"} | {"s": "a"} | false | false | false | true | true + {"s": "abcd"} | {"s": "ab"} | false | false | false | true | true + {"s": "abcd"} | {"s": "abc"} | false | false | false | true | true + {"s": "abcd"} | {"s": "abcd"} | false | true | true | true | false + {"s": "abcd"} | {"s": "b"} | true | true | false | false | false + {"s": "abcd"} | {"s": "A"} | false | false | false | true | true + {"s": "abcd"} | {"s": "AB"} | false | false | false | true | true + {"s": "abcd"} | {"s": "ABC"} | false | false | false | true | true + {"s": "abcd"} | {"s": "ABc"} | false | false | false | true | true + {"s": "abcd"} | {"s": "ABcD"} | false | false | false | true | true + {"s": "abcd"} | {"s": "B"} | false | false | false | true | true + {"s": "b"} | {"s": ""} | false | false | false | true | true + {"s": "b"} | {"s": "a"} | false | false | false | true | true + {"s": "b"} | {"s": "ab"} | false | false | false | true | true + {"s": "b"} | {"s": "abc"} | false | false | false | true | true + {"s": "b"} | {"s": "abcd"} | false | false | false | true | true + {"s": "b"} | {"s": "b"} | false | true | true | true | false + {"s": "b"} | {"s": "A"} | false | false | false | true | true + {"s": "b"} | {"s": "AB"} | false | false | false | true | true + {"s": "b"} | {"s": "ABC"} | false | false | false | true | true + {"s": "b"} | {"s": "ABc"} | false | false | false | true | true + {"s": "b"} | {"s": "ABcD"} | false | false | false | true | true + {"s": "b"} | {"s": "B"} | false | false | false | true | true + {"s": "A"} | {"s": ""} | false | false | false | true | true + {"s": "A"} | {"s": "a"} | true | true | false | false | false + {"s": "A"} | {"s": "ab"} | true | true | false | false | false + {"s": "A"} | {"s": "abc"} | true | true | false | false | false + {"s": "A"} | {"s": "abcd"} | true | true | false | false | false + {"s": "A"} | {"s": "b"} | true | true | false | false | false + {"s": "A"} | {"s": "A"} | false | true | true | true | false + {"s": "A"} | {"s": "AB"} | true | true | false | false | false + {"s": "A"} | {"s": "ABC"} | true | true | false | false | false + {"s": "A"} | {"s": "ABc"} | true | true | false | false | false + {"s": "A"} | {"s": "ABcD"} | true | true | false | false | false + {"s": "A"} | {"s": "B"} | true | true | false | false | false + {"s": "AB"} | {"s": ""} | false | false | false | true | true + {"s": "AB"} | {"s": "a"} | true | true | false | false | false + {"s": "AB"} | {"s": "ab"} | true | true | false | false | false + {"s": "AB"} | {"s": "abc"} | true | true | false | false | false + {"s": "AB"} | {"s": "abcd"} | true | true | false | false | false + {"s": "AB"} | {"s": "b"} | true | true | false | false | false + {"s": "AB"} | {"s": "A"} | false | false | false | true | true + {"s": "AB"} | {"s": "AB"} | false | true | true | true | false + {"s": "AB"} | {"s": "ABC"} | true | true | false | false | false + {"s": "AB"} | {"s": "ABc"} | true | true | false | false | false + {"s": "AB"} | {"s": "ABcD"} | true | true | false | false | false + {"s": "AB"} | {"s": "B"} | true | true | false | false | false + {"s": "ABC"} | {"s": ""} | false | false | false | true | true + {"s": "ABC"} | {"s": "a"} | true | true | false | false | false + {"s": "ABC"} | {"s": "ab"} | true | true | false | false | false + {"s": "ABC"} | {"s": "abc"} | true | true | false | false | false + {"s": "ABC"} | {"s": "abcd"} | true | true | false | false | false + {"s": "ABC"} | {"s": "b"} | true | true | false | false | false + {"s": "ABC"} | {"s": "A"} | false | false | false | true | true + {"s": "ABC"} | {"s": "AB"} | false | false | false | true | true + {"s": "ABC"} | {"s": "ABC"} | false | true | true | true | false + {"s": "ABC"} | {"s": "ABc"} | true | true | false | false | false + {"s": "ABC"} | {"s": "ABcD"} | true | true | false | false | false + {"s": "ABC"} | {"s": "B"} | true | true | false | false | false + {"s": "ABc"} | {"s": ""} | false | false | false | true | true + {"s": "ABc"} | {"s": "a"} | true | true | false | false | false + {"s": "ABc"} | {"s": "ab"} | true | true | false | false | false + {"s": "ABc"} | {"s": "abc"} | true | true | false | false | false + {"s": "ABc"} | {"s": "abcd"} | true | true | false | false | false + {"s": "ABc"} | {"s": "b"} | true | true | false | false | false + {"s": "ABc"} | {"s": "A"} | false | false | false | true | true + {"s": "ABc"} | {"s": "AB"} | false | false | false | true | true + {"s": "ABc"} | {"s": "ABC"} | false | false | false | true | true + {"s": "ABc"} | {"s": "ABc"} | false | true | true | true | false + {"s": "ABc"} | {"s": "ABcD"} | true | true | false | false | false + {"s": "ABc"} | {"s": "B"} | true | true | false | false | false + {"s": "ABcD"} | {"s": ""} | false | false | false | true | true + {"s": "ABcD"} | {"s": "a"} | true | true | false | false | false + {"s": "ABcD"} | {"s": "ab"} | true | true | false | false | false + {"s": "ABcD"} | {"s": "abc"} | true | true | false | false | false + {"s": "ABcD"} | {"s": "abcd"} | true | true | false | false | false + {"s": "ABcD"} | {"s": "b"} | true | true | false | false | false + {"s": "ABcD"} | {"s": "A"} | false | false | false | true | true + {"s": "ABcD"} | {"s": "AB"} | false | false | false | true | true + {"s": "ABcD"} | {"s": "ABC"} | false | false | false | true | true + {"s": "ABcD"} | {"s": "ABc"} | false | false | false | true | true + {"s": "ABcD"} | {"s": "ABcD"} | false | true | true | true | false + {"s": "ABcD"} | {"s": "B"} | true | true | false | false | false + {"s": "B"} | {"s": ""} | false | false | false | true | true + {"s": "B"} | {"s": "a"} | true | true | false | false | false + {"s": "B"} | {"s": "ab"} | true | true | false | false | false + {"s": "B"} | {"s": "abc"} | true | true | false | false | false + {"s": "B"} | {"s": "abcd"} | true | true | false | false | false + {"s": "B"} | {"s": "b"} | true | true | false | false | false + {"s": "B"} | {"s": "A"} | false | false | false | true | true + {"s": "B"} | {"s": "AB"} | false | false | false | true | true + {"s": "B"} | {"s": "ABC"} | false | false | false | true | true + {"s": "B"} | {"s": "ABc"} | false | false | false | true | true + {"s": "B"} | {"s": "ABcD"} | false | false | false | true | true + {"s": "B"} | {"s": "B"} | false | true | true | true | false +(144 rows) + diff --git a/src/test/regress/sql/jsonb_jsonpath.sql b/src/test/regress/sql/jsonb_jsonpath.sql index 733fbd4e0d0..e7629fb7f9d 100644 --- a/src/test/regress/sql/jsonb_jsonpath.sql +++ b/src/test/regress/sql/jsonb_jsonpath.sql @@ -387,3 +387,19 @@ SELECT jsonb_path_match('[true, true]', '$[*]', silent => false); SELECT jsonb '[{"a": 1}, {"a": 2}]' @@ '$[*].a > 1'; SELECT jsonb '[{"a": 1}, {"a": 2}]' @@ '$[*].a > 2'; SELECT jsonb_path_match('[{"a": 1}, {"a": 2}]', '$[*].a > 1'); + +-- test string comparison (Unicode codepoint collation) +WITH str(j, num) AS +( + SELECT jsonb_build_object('s', s), num + FROM unnest('{"", "a", "ab", "abc", "abcd", "b", "A", "AB", "ABC", "ABc", "ABcD", "B"}'::text[]) WITH ORDINALITY AS a(s, num) +) +SELECT + s1.j, s2.j, + jsonb_path_query_first(s1.j, '$.s < $s', vars => s2.j) lt, + jsonb_path_query_first(s1.j, '$.s <= $s', vars => s2.j) le, + jsonb_path_query_first(s1.j, '$.s == $s', vars => s2.j) eq, + jsonb_path_query_first(s1.j, '$.s >= $s', vars => s2.j) ge, + jsonb_path_query_first(s1.j, '$.s > $s', vars => s2.j) gt +FROM str s1, str s2 +ORDER BY s1.num, s2.num;