1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Adjust string comparison in jsonpath

We have implemented jsonpath string comparison using default database locale.
However, standard requires us to compare Unicode codepoints.  This commit
implements that, but for performance reasons we still use per-byte comparison
for "==" operator.  Thus, for consistency other comparison operators do per-byte
comparison if Unicode codepoints appear to be equal.

In some edge cases, when same Unicode codepoints have different binary
representations in database encoding, we diverge standard to achieve better
performance of "==" operator.  In future to implement strict standard
conformance, we can do normalization of input JSON strings.

Original patch was written by Nikita Glukhov, rewritten by me.

Reported-by: Markus Winand
Discussion: https://postgr.es/m/8B7FA3B4-328D-43D7-95A8-37B8891B8C78%40winand.at
Author: Nikita Glukhov, Alexander Korotkov
Backpatch-through: 12
This commit is contained in:
Alexander Korotkov
2019-08-11 22:54:53 +03:00
parent 6ce0366be2
commit 3218ff5c6a
3 changed files with 248 additions and 3 deletions

View File

@ -1833,3 +1833,166 @@ SELECT jsonb_path_match('[{"a": 1}, {"a": 2}]', '$[*].a > 1');
t
(1 row)
-- test string comparison (Unicode codepoint collation)
WITH str(j, num) AS
(
SELECT jsonb_build_object('s', s), num
FROM unnest('{"", "a", "ab", "abc", "abcd", "b", "A", "AB", "ABC", "ABc", "ABcD", "B"}'::text[]) WITH ORDINALITY AS a(s, num)
)
SELECT
s1.j, s2.j,
jsonb_path_query_first(s1.j, '$.s < $s', vars => s2.j) lt,
jsonb_path_query_first(s1.j, '$.s <= $s', vars => s2.j) le,
jsonb_path_query_first(s1.j, '$.s == $s', vars => s2.j) eq,
jsonb_path_query_first(s1.j, '$.s >= $s', vars => s2.j) ge,
jsonb_path_query_first(s1.j, '$.s > $s', vars => s2.j) gt
FROM str s1, str s2
ORDER BY s1.num, s2.num;
j | j | lt | le | eq | ge | gt
---------------+---------------+-------+-------+-------+-------+-------
{"s": ""} | {"s": ""} | false | true | true | true | false
{"s": ""} | {"s": "a"} | true | true | false | false | false
{"s": ""} | {"s": "ab"} | true | true | false | false | false
{"s": ""} | {"s": "abc"} | true | true | false | false | false
{"s": ""} | {"s": "abcd"} | true | true | false | false | false
{"s": ""} | {"s": "b"} | true | true | false | false | false
{"s": ""} | {"s": "A"} | true | true | false | false | false
{"s": ""} | {"s": "AB"} | true | true | false | false | false
{"s": ""} | {"s": "ABC"} | true | true | false | false | false
{"s": ""} | {"s": "ABc"} | true | true | false | false | false
{"s": ""} | {"s": "ABcD"} | true | true | false | false | false
{"s": ""} | {"s": "B"} | true | true | false | false | false
{"s": "a"} | {"s": ""} | false | false | false | true | true
{"s": "a"} | {"s": "a"} | false | true | true | true | false
{"s": "a"} | {"s": "ab"} | true | true | false | false | false
{"s": "a"} | {"s": "abc"} | true | true | false | false | false
{"s": "a"} | {"s": "abcd"} | true | true | false | false | false
{"s": "a"} | {"s": "b"} | true | true | false | false | false
{"s": "a"} | {"s": "A"} | false | false | false | true | true
{"s": "a"} | {"s": "AB"} | false | false | false | true | true
{"s": "a"} | {"s": "ABC"} | false | false | false | true | true
{"s": "a"} | {"s": "ABc"} | false | false | false | true | true
{"s": "a"} | {"s": "ABcD"} | false | false | false | true | true
{"s": "a"} | {"s": "B"} | false | false | false | true | true
{"s": "ab"} | {"s": ""} | false | false | false | true | true
{"s": "ab"} | {"s": "a"} | false | false | false | true | true
{"s": "ab"} | {"s": "ab"} | false | true | true | true | false
{"s": "ab"} | {"s": "abc"} | true | true | false | false | false
{"s": "ab"} | {"s": "abcd"} | true | true | false | false | false
{"s": "ab"} | {"s": "b"} | true | true | false | false | false
{"s": "ab"} | {"s": "A"} | false | false | false | true | true
{"s": "ab"} | {"s": "AB"} | false | false | false | true | true
{"s": "ab"} | {"s": "ABC"} | false | false | false | true | true
{"s": "ab"} | {"s": "ABc"} | false | false | false | true | true
{"s": "ab"} | {"s": "ABcD"} | false | false | false | true | true
{"s": "ab"} | {"s": "B"} | false | false | false | true | true
{"s": "abc"} | {"s": ""} | false | false | false | true | true
{"s": "abc"} | {"s": "a"} | false | false | false | true | true
{"s": "abc"} | {"s": "ab"} | false | false | false | true | true
{"s": "abc"} | {"s": "abc"} | false | true | true | true | false
{"s": "abc"} | {"s": "abcd"} | true | true | false | false | false
{"s": "abc"} | {"s": "b"} | true | true | false | false | false
{"s": "abc"} | {"s": "A"} | false | false | false | true | true
{"s": "abc"} | {"s": "AB"} | false | false | false | true | true
{"s": "abc"} | {"s": "ABC"} | false | false | false | true | true
{"s": "abc"} | {"s": "ABc"} | false | false | false | true | true
{"s": "abc"} | {"s": "ABcD"} | false | false | false | true | true
{"s": "abc"} | {"s": "B"} | false | false | false | true | true
{"s": "abcd"} | {"s": ""} | false | false | false | true | true
{"s": "abcd"} | {"s": "a"} | false | false | false | true | true
{"s": "abcd"} | {"s": "ab"} | false | false | false | true | true
{"s": "abcd"} | {"s": "abc"} | false | false | false | true | true
{"s": "abcd"} | {"s": "abcd"} | false | true | true | true | false
{"s": "abcd"} | {"s": "b"} | true | true | false | false | false
{"s": "abcd"} | {"s": "A"} | false | false | false | true | true
{"s": "abcd"} | {"s": "AB"} | false | false | false | true | true
{"s": "abcd"} | {"s": "ABC"} | false | false | false | true | true
{"s": "abcd"} | {"s": "ABc"} | false | false | false | true | true
{"s": "abcd"} | {"s": "ABcD"} | false | false | false | true | true
{"s": "abcd"} | {"s": "B"} | false | false | false | true | true
{"s": "b"} | {"s": ""} | false | false | false | true | true
{"s": "b"} | {"s": "a"} | false | false | false | true | true
{"s": "b"} | {"s": "ab"} | false | false | false | true | true
{"s": "b"} | {"s": "abc"} | false | false | false | true | true
{"s": "b"} | {"s": "abcd"} | false | false | false | true | true
{"s": "b"} | {"s": "b"} | false | true | true | true | false
{"s": "b"} | {"s": "A"} | false | false | false | true | true
{"s": "b"} | {"s": "AB"} | false | false | false | true | true
{"s": "b"} | {"s": "ABC"} | false | false | false | true | true
{"s": "b"} | {"s": "ABc"} | false | false | false | true | true
{"s": "b"} | {"s": "ABcD"} | false | false | false | true | true
{"s": "b"} | {"s": "B"} | false | false | false | true | true
{"s": "A"} | {"s": ""} | false | false | false | true | true
{"s": "A"} | {"s": "a"} | true | true | false | false | false
{"s": "A"} | {"s": "ab"} | true | true | false | false | false
{"s": "A"} | {"s": "abc"} | true | true | false | false | false
{"s": "A"} | {"s": "abcd"} | true | true | false | false | false
{"s": "A"} | {"s": "b"} | true | true | false | false | false
{"s": "A"} | {"s": "A"} | false | true | true | true | false
{"s": "A"} | {"s": "AB"} | true | true | false | false | false
{"s": "A"} | {"s": "ABC"} | true | true | false | false | false
{"s": "A"} | {"s": "ABc"} | true | true | false | false | false
{"s": "A"} | {"s": "ABcD"} | true | true | false | false | false
{"s": "A"} | {"s": "B"} | true | true | false | false | false
{"s": "AB"} | {"s": ""} | false | false | false | true | true
{"s": "AB"} | {"s": "a"} | true | true | false | false | false
{"s": "AB"} | {"s": "ab"} | true | true | false | false | false
{"s": "AB"} | {"s": "abc"} | true | true | false | false | false
{"s": "AB"} | {"s": "abcd"} | true | true | false | false | false
{"s": "AB"} | {"s": "b"} | true | true | false | false | false
{"s": "AB"} | {"s": "A"} | false | false | false | true | true
{"s": "AB"} | {"s": "AB"} | false | true | true | true | false
{"s": "AB"} | {"s": "ABC"} | true | true | false | false | false
{"s": "AB"} | {"s": "ABc"} | true | true | false | false | false
{"s": "AB"} | {"s": "ABcD"} | true | true | false | false | false
{"s": "AB"} | {"s": "B"} | true | true | false | false | false
{"s": "ABC"} | {"s": ""} | false | false | false | true | true
{"s": "ABC"} | {"s": "a"} | true | true | false | false | false
{"s": "ABC"} | {"s": "ab"} | true | true | false | false | false
{"s": "ABC"} | {"s": "abc"} | true | true | false | false | false
{"s": "ABC"} | {"s": "abcd"} | true | true | false | false | false
{"s": "ABC"} | {"s": "b"} | true | true | false | false | false
{"s": "ABC"} | {"s": "A"} | false | false | false | true | true
{"s": "ABC"} | {"s": "AB"} | false | false | false | true | true
{"s": "ABC"} | {"s": "ABC"} | false | true | true | true | false
{"s": "ABC"} | {"s": "ABc"} | true | true | false | false | false
{"s": "ABC"} | {"s": "ABcD"} | true | true | false | false | false
{"s": "ABC"} | {"s": "B"} | true | true | false | false | false
{"s": "ABc"} | {"s": ""} | false | false | false | true | true
{"s": "ABc"} | {"s": "a"} | true | true | false | false | false
{"s": "ABc"} | {"s": "ab"} | true | true | false | false | false
{"s": "ABc"} | {"s": "abc"} | true | true | false | false | false
{"s": "ABc"} | {"s": "abcd"} | true | true | false | false | false
{"s": "ABc"} | {"s": "b"} | true | true | false | false | false
{"s": "ABc"} | {"s": "A"} | false | false | false | true | true
{"s": "ABc"} | {"s": "AB"} | false | false | false | true | true
{"s": "ABc"} | {"s": "ABC"} | false | false | false | true | true
{"s": "ABc"} | {"s": "ABc"} | false | true | true | true | false
{"s": "ABc"} | {"s": "ABcD"} | true | true | false | false | false
{"s": "ABc"} | {"s": "B"} | true | true | false | false | false
{"s": "ABcD"} | {"s": ""} | false | false | false | true | true
{"s": "ABcD"} | {"s": "a"} | true | true | false | false | false
{"s": "ABcD"} | {"s": "ab"} | true | true | false | false | false
{"s": "ABcD"} | {"s": "abc"} | true | true | false | false | false
{"s": "ABcD"} | {"s": "abcd"} | true | true | false | false | false
{"s": "ABcD"} | {"s": "b"} | true | true | false | false | false
{"s": "ABcD"} | {"s": "A"} | false | false | false | true | true
{"s": "ABcD"} | {"s": "AB"} | false | false | false | true | true
{"s": "ABcD"} | {"s": "ABC"} | false | false | false | true | true
{"s": "ABcD"} | {"s": "ABc"} | false | false | false | true | true
{"s": "ABcD"} | {"s": "ABcD"} | false | true | true | true | false
{"s": "ABcD"} | {"s": "B"} | true | true | false | false | false
{"s": "B"} | {"s": ""} | false | false | false | true | true
{"s": "B"} | {"s": "a"} | true | true | false | false | false
{"s": "B"} | {"s": "ab"} | true | true | false | false | false
{"s": "B"} | {"s": "abc"} | true | true | false | false | false
{"s": "B"} | {"s": "abcd"} | true | true | false | false | false
{"s": "B"} | {"s": "b"} | true | true | false | false | false
{"s": "B"} | {"s": "A"} | false | false | false | true | true
{"s": "B"} | {"s": "AB"} | false | false | false | true | true
{"s": "B"} | {"s": "ABC"} | false | false | false | true | true
{"s": "B"} | {"s": "ABc"} | false | false | false | true | true
{"s": "B"} | {"s": "ABcD"} | false | false | false | true | true
{"s": "B"} | {"s": "B"} | false | true | true | true | false
(144 rows)

View File

@ -387,3 +387,19 @@ SELECT jsonb_path_match('[true, true]', '$[*]', silent => false);
SELECT jsonb '[{"a": 1}, {"a": 2}]' @@ '$[*].a > 1';
SELECT jsonb '[{"a": 1}, {"a": 2}]' @@ '$[*].a > 2';
SELECT jsonb_path_match('[{"a": 1}, {"a": 2}]', '$[*].a > 1');
-- test string comparison (Unicode codepoint collation)
WITH str(j, num) AS
(
SELECT jsonb_build_object('s', s), num
FROM unnest('{"", "a", "ab", "abc", "abcd", "b", "A", "AB", "ABC", "ABc", "ABcD", "B"}'::text[]) WITH ORDINALITY AS a(s, num)
)
SELECT
s1.j, s2.j,
jsonb_path_query_first(s1.j, '$.s < $s', vars => s2.j) lt,
jsonb_path_query_first(s1.j, '$.s <= $s', vars => s2.j) le,
jsonb_path_query_first(s1.j, '$.s == $s', vars => s2.j) eq,
jsonb_path_query_first(s1.j, '$.s >= $s', vars => s2.j) ge,
jsonb_path_query_first(s1.j, '$.s > $s', vars => s2.j) gt
FROM str s1, str s2
ORDER BY s1.num, s2.num;